libwebsockets
Lightweight C library for HTML5 websockets
Loading...
Searching...
No Matches
lws-tokenize.h
Go to the documentation of this file.
1/*
2 * libwebsockets - small server side websockets and web server implementation
3 *
4 * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25/* Do not treat - as a terminal character, so "my-token" is one token */
26#define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0)
27/* Separately report aggregate colon-delimited tokens */
28#define LWS_TOKENIZE_F_AGG_COLON (1 << 1)
29/* Enforce sequencing for a simple token , token , token ... list */
30#define LWS_TOKENIZE_F_COMMA_SEP_LIST (1 << 2)
31/* Allow more characters in the tokens and less delimiters... default is
32 * only alphanumeric + underscore in tokens */
33#define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3)
34/* Do not treat . as a terminal character, so "warmcat.com" is one token */
35#define LWS_TOKENIZE_F_DOT_NONTERM (1 << 4)
36/* If something starts looking like a float, like 1.2, force to be string token.
37 * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and
38 * avoids illegal float format detection like 1.myserver.com */
39#define LWS_TOKENIZE_F_NO_FLOATS (1 << 5)
40/* Instead of LWS_TOKZE_INTEGER, report integers as any other string token */
41#define LWS_TOKENIZE_F_NO_INTEGERS (1 << 6)
42/* # makes the rest of the line a comment */
43#define LWS_TOKENIZE_F_HASH_COMMENT (1 << 7)
44/* Do not treat / as a terminal character, so "multipart/related" is one token */
45#define LWS_TOKENIZE_F_SLASH_NONTERM (1 << 8)
46/* Do not treat * as a terminal character, so "myfile*" is one token */
47#define LWS_TOKENIZE_F_ASTERISK_NONTERM (1 << 9)
48/* Do not treat = as a terminal character, so "x=y" is one token */
49#define LWS_TOKENIZE_F_EQUALS_NONTERM (1 << 10)
50/* Do not treat : as a terminal character, so ::1 is one token */
51#define LWS_TOKENIZE_F_COLON_NONTERM (1 << 11)
52
53/* We're just tokenizing a chunk, don't treat running out of input as final */
54#define LWS_TOKENIZE_F_EXPECT_MORE (1 << 12)
55
56typedef enum {
57
58 LWS_TOKZE_ERRS = 7, /* the number of errors defined */
59
60 LWS_TOKZE_TOO_LONG = -7, /* token too long */
61 LWS_TOKZE_WANT_READ = -6, /* need more input */
62 LWS_TOKZE_ERR_BROKEN_UTF8 = -5, /* malformed or partial utf8 */
63 LWS_TOKZE_ERR_UNTERM_STRING = -4, /* ended while we were in "" */
64 LWS_TOKZE_ERR_MALFORMED_FLOAT = -3, /* like 0..1 or 0.1.1 */
65 LWS_TOKZE_ERR_NUM_ON_LHS = -2, /* like 123= or 0.1= */
66 LWS_TOKZE_ERR_COMMA_LIST = -1, /* like ",tok", or, "tok,," */
67
68 LWS_TOKZE_ENDED = 0, /* no more content */
69
70 /* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */
71
72 LWS_TOKZE_DELIMITER, /* a delimiter appeared */
73 LWS_TOKZE_TOKEN, /* a token appeared */
74 LWS_TOKZE_INTEGER, /* an integer appeared */
75 LWS_TOKZE_FLOAT, /* a float appeared */
76 LWS_TOKZE_TOKEN_NAME_EQUALS, /* token [whitespace] = */
77 LWS_TOKZE_TOKEN_NAME_COLON, /* token [whitespace] : (only with
78 LWS_TOKENIZE_F_AGG_COLON flag) */
79 LWS_TOKZE_QUOTED_STRING, /* "*", where * may have any char */
80
82
83/*
84 * helper enums to allow caller to enforce legal delimiter sequencing, eg
85 * disallow "token,,token", "token,", and ",token"
86 */
87
93
100
101typedef struct lws_tokenize {
102 char collect[256]; /* token length limit */
103 const char *start;
104 const char *token;
105 size_t len;
106 size_t token_len;
107
109
110 int line;
112
115
116 int8_t e;
121
135
137lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags);
138
157
160
170
172lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max);
173
174
175/*
176 * lws_strexp: flexible string expansion helper api
177 *
178 * This stateful helper can handle multiple separate input chunks and multiple
179 * output buffer loads with arbitrary boundaries between literals and expanded
180 * symbols. This allows it to handle fragmented input as well as arbitrarily
181 * long symbol expansions that are bigger than the output buffer itself.
182 *
183 * A user callback is used to convert symbol names to the symbol value.
184 *
185 * A single byte buffer for input and another for output can process any
186 * length substitution then. The state object is around 64 bytes on a 64-bit
187 * system and it only uses 8 bytes stack.
188 */
189
190
191typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out,
192 size_t *pos, size_t olen, size_t *exp_ofs);
193
194typedef struct lws_strexp {
195 char name[32];
197 void *priv;
198 char *out;
199 size_t olen;
200 size_t pos;
201
202 size_t exp_ofs;
203
205 char state;
207
208enum {
209 LSTRX_DONE, /* it completed OK */
210 LSTRX_FILLED_OUT, /* out buf filled and needs resetting */
213};
214
215
234 char *out, size_t olen);
235
253lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen);
254
278lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len,
279 size_t *pused_in, size_t *pused_out);
280
294lws_strcmp_wildcard(const char *wildcard, size_t wlen, const char *check,
295 size_t clen);
unsigned short uint16_t
#define LWS_EXTERN
unsigned char uint8_t
#define LWS_VISIBLE
lws_tokenize_delimiter_tracking
@ LWSTZ_DT_NEED_NEXT_CONTENT
@ LWSTZ_DT_NEED_DELIM
@ LWSTZ_DT_NEED_FIRST_CONTENT
lws_tokenize_state state
LWS_VISIBLE LWS_EXTERN void lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb, char *out, size_t olen)
char name[32]
uint8_t reset_token
const char * token
struct lws_tokenize lws_tokenize_t
int(* lws_strexp_expand_cb)(void *priv, const char *name, char *out, size_t *pos, size_t olen, size_t *exp_ofs)
LWS_VISIBLE LWS_EXTERN void lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen)
lws_strexp_expand_cb cb
LWS_VISIBLE LWS_EXTERN lws_tokenize_elem lws_tokenize(struct lws_tokenize *ts)
LWS_VISIBLE LWS_EXTERN int lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len, size_t *pused_in, size_t *pused_out)
@ LSTRX_DONE
@ LSTRX_FATAL_NAME_TOO_LONG
@ LSTRX_FATAL_NAME_UNKNOWN
@ LSTRX_FILLED_OUT
LWS_VISIBLE LWS_EXTERN void lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags)
lws_tokenize_state
@ LWS_TOKZS_TOKEN
@ LWS_TOKZS_TOKEN_POST_TERMINAL
@ LWS_TOKZS_QUOTED_STRING
@ LWS_TOKZS_LEADING_WHITESPACE
lws_tokenize_elem
@ LWS_TOKZE_ERR_NUM_ON_LHS
@ LWS_TOKZE_WANT_READ
@ LWS_TOKZE_TOKEN
@ LWS_TOKZE_QUOTED_STRING
@ LWS_TOKZE_ERR_MALFORMED_FLOAT
@ LWS_TOKZE_DELIMITER
@ LWS_TOKZE_TOKEN_NAME_EQUALS
@ LWS_TOKZE_TOO_LONG
@ LWS_TOKZE_FLOAT
@ LWS_TOKZE_ERR_BROKEN_UTF8
@ LWS_TOKZE_ERRS
@ LWS_TOKZE_INTEGER
@ LWS_TOKZE_ENDED
@ LWS_TOKZE_TOKEN_NAME_COLON
@ LWS_TOKZE_ERR_UNTERM_STRING
@ LWS_TOKZE_ERR_COMMA_LIST
uint16_t flags
uint8_t name_pos
char collect[256]
const char * start
size_t exp_ofs
LWS_VISIBLE LWS_EXTERN int lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max)
struct lws_strexp lws_strexp_t
LWS_VISIBLE LWS_EXTERN int lws_strcmp_wildcard(const char *wildcard, size_t wlen, const char *check, size_t clen)