libwebsockets
Lightweight C library for HTML5 websockets
lws-tokenize.h
1 /*
2  * libwebsockets - small server side websockets and web server implementation
3  *
4  * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /* Do not treat - as a terminal character, so "my-token" is one token */
26 #define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0)
27 /* Separately report aggregate colon-delimited tokens */
28 #define LWS_TOKENIZE_F_AGG_COLON (1 << 1)
29 /* Enforce sequencing for a simple token , token , token ... list */
30 #define LWS_TOKENIZE_F_COMMA_SEP_LIST (1 << 2)
31 /* Allow more characters in the tokens and less delimiters... default is
32  * only alphanumeric + underscore in tokens */
33 #define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3)
34 /* Do not treat . as a terminal character, so "warmcat.com" is one token */
35 #define LWS_TOKENIZE_F_DOT_NONTERM (1 << 4)
36 /* If something starts looking like a float, like 1.2, force to be string token.
37  * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and
38  * avoids illegal float format detection like 1.myserver.com */
39 #define LWS_TOKENIZE_F_NO_FLOATS (1 << 5)
40 /* Instead of LWS_TOKZE_INTEGER, report integers as any other string token */
41 #define LWS_TOKENIZE_F_NO_INTEGERS (1 << 6)
42 /* # makes the rest of the line a comment */
43 #define LWS_TOKENIZE_F_HASH_COMMENT (1 << 7)
44 /* Do not treat / as a terminal character, so "multipart/related" is one token */
45 #define LWS_TOKENIZE_F_SLASH_NONTERM (1 << 8)
46 /* Do not treat * as a terminal character, so "myfile*" is one token */
47 #define LWS_TOKENIZE_F_ASTERISK_NONTERM (1 << 9)
48 /* Do not treat = as a terminal character, so "x=y" is one token */
49 #define LWS_TOKENIZE_F_EQUALS_NONTERM (1 << 10)
50 
51 typedef enum {
52 
53  LWS_TOKZE_ERRS = 5, /* the number of errors defined */
54 
55  LWS_TOKZE_ERR_BROKEN_UTF8 = -5, /* malformed or partial utf8 */
56  LWS_TOKZE_ERR_UNTERM_STRING = -4, /* ended while we were in "" */
57  LWS_TOKZE_ERR_MALFORMED_FLOAT = -3, /* like 0..1 or 0.1.1 */
58  LWS_TOKZE_ERR_NUM_ON_LHS = -2, /* like 123= or 0.1= */
59  LWS_TOKZE_ERR_COMMA_LIST = -1, /* like ",tok", or, "tok,," */
60 
61  LWS_TOKZE_ENDED = 0, /* no more content */
62 
63  /* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */
64 
65  LWS_TOKZE_DELIMITER, /* a delimiter appeared */
66  LWS_TOKZE_TOKEN, /* a token appeared */
67  LWS_TOKZE_INTEGER, /* an integer appeared */
68  LWS_TOKZE_FLOAT, /* a float appeared */
69  LWS_TOKZE_TOKEN_NAME_EQUALS, /* token [whitespace] = */
70  LWS_TOKZE_TOKEN_NAME_COLON, /* token [whitespace] : (only with
71  LWS_TOKENIZE_F_AGG_COLON flag) */
72  LWS_TOKZE_QUOTED_STRING, /* "*", where * may have any char */
73 
74 } lws_tokenize_elem;
75 
76 /*
77  * helper enums to allow caller to enforce legal delimiter sequencing, eg
78  * disallow "token,,token", "token,", and ",token"
79  */
80 
81 enum lws_tokenize_delimiter_tracking {
82  LWSTZ_DT_NEED_FIRST_CONTENT,
83  LWSTZ_DT_NEED_DELIM,
84  LWSTZ_DT_NEED_NEXT_CONTENT,
85 };
86 
87 typedef struct lws_tokenize {
88  const char *start;
89  const char *token;
90  size_t len;
91  size_t token_len;
93  uint16_t flags;
94  uint8_t delim;
95 
96  int8_t e;
98 
113 LWS_VISIBLE LWS_EXTERN void
114 lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags);
115 
135 LWS_VISIBLE LWS_EXTERN lws_tokenize_elem
136 lws_tokenize(struct lws_tokenize *ts);
137 
148 LWS_VISIBLE LWS_EXTERN int
149 lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max);
150 
151 
152 /*
153  * lws_strexp: flexible string expansion helper api
154  *
155  * This stateful helper can handle multiple separate input chunks and multiple
156  * output buffer loads with arbitrary boundaries between literals and expanded
157  * symbols. This allows it to handle fragmented input as well as arbitrarily
158  * long symbol expansions that are bigger than the output buffer itself.
159  *
160  * A user callback is used to convert symbol names to the symbol value.
161  *
162  * A single byte buffer for input and another for output can process any
163  * length substitution then. The state object is around 64 bytes on a 64-bit
164  * system and it only uses 8 bytes stack.
165  */
166 
167 
168 typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out,
169  size_t *pos, size_t olen, size_t *exp_ofs);
170 
171 typedef struct lws_strexp {
172  char name[32];
173  lws_strexp_expand_cb cb;
174  void *priv;
175  char *out;
176  size_t olen;
177  size_t pos;
178 
179  size_t exp_ofs;
180 
181  uint8_t name_pos;
182  char state;
183 } lws_strexp_t;
184 
185 enum {
186  LSTRX_DONE, /* it completed OK */
187  LSTRX_FILLED_OUT, /* out buf filled and needs resetting */
188  LSTRX_FATAL_NAME_TOO_LONG = -1, /* fatal */
189  LSTRX_FATAL_NAME_UNKNOWN = -2,
190 };
191 
192 
209 LWS_VISIBLE LWS_EXTERN void
210 lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb,
211  char *out, size_t olen);
212 
229 LWS_VISIBLE LWS_EXTERN void
230 lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen);
231 
254 LWS_VISIBLE LWS_EXTERN int
255 lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len,
256  size_t *pused_in, size_t *pused_out);
257 
270 LWS_VISIBLE LWS_EXTERN int
271 lws_strcmp_wildcard(const char *wildcard, size_t wlen, const char *check,
272  size_t clen);
Definition: lws-tokenize.h:171
Definition: lws-tokenize.h:87
const char * token
Definition: lws-tokenize.h:89
size_t len
Definition: lws-tokenize.h:90
size_t token_len
Definition: lws-tokenize.h:91
int8_t e
Definition: lws-tokenize.h:96
uint16_t flags
Definition: lws-tokenize.h:93
const char * start
Definition: lws-tokenize.h:88