libwebsockets
Lightweight C library for HTML5 websockets
lws-tokenize.h
Go to the documentation of this file.
1 /*
2  * libwebsockets - small server side websockets and web server implementation
3  *
4  * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /* Do not treat - as a terminal character, so "my-token" is one token */
26 #define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0)
27 /* Separately report aggregate colon-delimited tokens */
28 #define LWS_TOKENIZE_F_AGG_COLON (1 << 1)
29 /* Enforce sequencing for a simple token , token , token ... list */
30 #define LWS_TOKENIZE_F_COMMA_SEP_LIST (1 << 2)
31 /* Allow more characters in the tokens and less delimiters... default is
32  * only alphanumeric + underscore in tokens */
33 #define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3)
34 /* Do not treat . as a terminal character, so "warmcat.com" is one token */
35 #define LWS_TOKENIZE_F_DOT_NONTERM (1 << 4)
36 /* If something starts looking like a float, like 1.2, force to be string token.
37  * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and
38  * avoids illegal float format detection like 1.myserver.com */
39 #define LWS_TOKENIZE_F_NO_FLOATS (1 << 5)
40 /* Instead of LWS_TOKZE_INTEGER, report integers as any other string token */
41 #define LWS_TOKENIZE_F_NO_INTEGERS (1 << 6)
42 /* # makes the rest of the line a comment */
43 #define LWS_TOKENIZE_F_HASH_COMMENT (1 << 7)
44 /* Do not treat / as a terminal character, so "multipart/related" is one token */
45 #define LWS_TOKENIZE_F_SLASH_NONTERM (1 << 8)
46 /* Do not treat * as a terminal character, so "myfile*" is one token */
47 #define LWS_TOKENIZE_F_ASTERISK_NONTERM (1 << 9)
48 /* Do not treat = as a terminal character, so "x=y" is one token */
49 #define LWS_TOKENIZE_F_EQUALS_NONTERM (1 << 10)
50 /* Do not treat : as a terminal character, so ::1 is one token */
51 #define LWS_TOKENIZE_F_COLON_NONTERM (1 << 11)
52 
53 /* We're just tokenizing a chunk, don't treat running out of input as final */
54 #define LWS_TOKENIZE_F_EXPECT_MORE (1 << 12)
55 
56 typedef enum {
57 
58  LWS_TOKZE_ERRS = 7, /* the number of errors defined */
59 
60  LWS_TOKZE_TOO_LONG = -7, /* token too long */
61  LWS_TOKZE_WANT_READ = -6, /* need more input */
62  LWS_TOKZE_ERR_BROKEN_UTF8 = -5, /* malformed or partial utf8 */
63  LWS_TOKZE_ERR_UNTERM_STRING = -4, /* ended while we were in "" */
64  LWS_TOKZE_ERR_MALFORMED_FLOAT = -3, /* like 0..1 or 0.1.1 */
65  LWS_TOKZE_ERR_NUM_ON_LHS = -2, /* like 123= or 0.1= */
66  LWS_TOKZE_ERR_COMMA_LIST = -1, /* like ",tok", or, "tok,," */
67 
68  LWS_TOKZE_ENDED = 0, /* no more content */
69 
70  /* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */
71 
72  LWS_TOKZE_DELIMITER, /* a delimiter appeared */
73  LWS_TOKZE_TOKEN, /* a token appeared */
74  LWS_TOKZE_INTEGER, /* an integer appeared */
75  LWS_TOKZE_FLOAT, /* a float appeared */
76  LWS_TOKZE_TOKEN_NAME_EQUALS, /* token [whitespace] = */
77  LWS_TOKZE_TOKEN_NAME_COLON, /* token [whitespace] : (only with
78  LWS_TOKENIZE_F_AGG_COLON flag) */
79  LWS_TOKZE_QUOTED_STRING, /* "*", where * may have any char */
80 
82 
83 /*
84  * helper enums to allow caller to enforce legal delimiter sequencing, eg
85  * disallow "token,,token", "token,", and ",token"
86  */
87 
92 };
93 
94 typedef enum {
100 
101 typedef struct lws_tokenize {
102  char collect[256]; /* token length limit */
103  const char *start;
104  const char *token;
105  size_t len;
106  size_t token_len;
109 
110  int line;
111  int effline;
112 
115 
116  int8_t e;
121 
137 lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags);
138 
160 
172 lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max);
173 
174 
175 /*
176  * lws_strexp: flexible string expansion helper api
177  *
178  * This stateful helper can handle multiple separate input chunks and multiple
179  * output buffer loads with arbitrary boundaries between literals and expanded
180  * symbols. This allows it to handle fragmented input as well as arbitrarily
181  * long symbol expansions that are bigger than the output buffer itself.
182  *
183  * A user callback is used to convert symbol names to the symbol value.
184  *
185  * A single byte buffer for input and another for output can process any
186  * length substitution then. The state object is around 64 bytes on a 64-bit
187  * system and it only uses 8 bytes stack.
188  */
189 
190 
191 typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out,
192  size_t *pos, size_t olen, size_t *exp_ofs);
193 
194 typedef struct lws_strexp {
195  char name[32];
197  void *priv;
198  char *out;
199  size_t olen;
200  size_t pos;
201 
202  size_t exp_ofs;
203 
205  char state;
207 
208 enum {
209  LSTRX_DONE, /* it completed OK */
210  LSTRX_FILLED_OUT, /* out buf filled and needs resetting */
211  LSTRX_FATAL_NAME_TOO_LONG = -1, /* fatal */
213 };
214 
215 
234  char *out, size_t olen);
235 
253 lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen);
254 
278 lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len,
279  size_t *pused_in, size_t *pused_out);
280 
294 lws_strcmp_wildcard(const char *wildcard, size_t wlen, const char *check,
295  size_t clen);
unsigned short uint16_t
#define LWS_EXTERN
unsigned char uint8_t
#define LWS_VISIBLE
lws_tokenize_delimiter_tracking
Definition: lws-tokenize.h:88
@ LWSTZ_DT_NEED_NEXT_CONTENT
Definition: lws-tokenize.h:91
@ LWSTZ_DT_NEED_DELIM
Definition: lws-tokenize.h:90
@ LWSTZ_DT_NEED_FIRST_CONTENT
Definition: lws-tokenize.h:89
lws_tokenize_state state
Definition: lws-tokenize.h:108
LWS_VISIBLE LWS_EXTERN void lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb, char *out, size_t olen)
char name[32]
Definition: lws-tokenize.h:195
uint8_t reset_token
Definition: lws-tokenize.h:117
const char * token
Definition: lws-tokenize.h:104
struct lws_tokenize lws_tokenize_t
int(* lws_strexp_expand_cb)(void *priv, const char *name, char *out, size_t *pos, size_t olen, size_t *exp_ofs)
Definition: lws-tokenize.h:191
size_t token_len
Definition: lws-tokenize.h:106
LWS_VISIBLE LWS_EXTERN void lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen)
lws_strexp_expand_cb cb
Definition: lws-tokenize.h:196
LWS_VISIBLE LWS_EXTERN lws_tokenize_elem lws_tokenize(struct lws_tokenize *ts)
char * out
Definition: lws-tokenize.h:198
void * priv
Definition: lws-tokenize.h:197
size_t pos
Definition: lws-tokenize.h:200
LWS_VISIBLE LWS_EXTERN int lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len, size_t *pused_in, size_t *pused_out)
uint8_t crlf
Definition: lws-tokenize.h:118
LWS_VISIBLE LWS_EXTERN void lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags)
lws_tokenize_state
Definition: lws-tokenize.h:94
@ LWS_TOKZS_TOKEN
Definition: lws-tokenize.h:97
@ LWS_TOKZS_TOKEN_POST_TERMINAL
Definition: lws-tokenize.h:98
@ LWS_TOKZS_QUOTED_STRING
Definition: lws-tokenize.h:96
@ LWS_TOKZS_LEADING_WHITESPACE
Definition: lws-tokenize.h:95
lws_tokenize_elem
Definition: lws-tokenize.h:56
@ LWS_TOKZE_ERR_NUM_ON_LHS
Definition: lws-tokenize.h:65
@ LWS_TOKZE_WANT_READ
Definition: lws-tokenize.h:61
@ LWS_TOKZE_TOKEN
Definition: lws-tokenize.h:73
@ LWS_TOKZE_QUOTED_STRING
Definition: lws-tokenize.h:79
@ LWS_TOKZE_ERR_MALFORMED_FLOAT
Definition: lws-tokenize.h:64
@ LWS_TOKZE_DELIMITER
Definition: lws-tokenize.h:72
@ LWS_TOKZE_TOKEN_NAME_EQUALS
Definition: lws-tokenize.h:76
@ LWS_TOKZE_TOO_LONG
Definition: lws-tokenize.h:60
@ LWS_TOKZE_FLOAT
Definition: lws-tokenize.h:75
@ LWS_TOKZE_ERR_BROKEN_UTF8
Definition: lws-tokenize.h:62
@ LWS_TOKZE_ERRS
Definition: lws-tokenize.h:58
@ LWS_TOKZE_INTEGER
Definition: lws-tokenize.h:74
@ LWS_TOKZE_ENDED
Definition: lws-tokenize.h:68
@ LWS_TOKZE_TOKEN_NAME_COLON
Definition: lws-tokenize.h:77
@ LWS_TOKZE_ERR_UNTERM_STRING
Definition: lws-tokenize.h:63
@ LWS_TOKZE_ERR_COMMA_LIST
Definition: lws-tokenize.h:66
uint16_t flags
Definition: lws-tokenize.h:113
uint8_t delim
Definition: lws-tokenize.h:114
uint8_t name_pos
Definition: lws-tokenize.h:204
char collect[256]
Definition: lws-tokenize.h:102
const char * start
Definition: lws-tokenize.h:103
size_t exp_ofs
Definition: lws-tokenize.h:202
LWS_VISIBLE LWS_EXTERN int lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max)
struct lws_strexp lws_strexp_t
LWS_VISIBLE LWS_EXTERN int lws_strcmp_wildcard(const char *wildcard, size_t wlen, const char *check, size_t clen)
size_t olen
Definition: lws-tokenize.h:199
@ LSTRX_DONE
Definition: lws-tokenize.h:209
@ LSTRX_FATAL_NAME_TOO_LONG
Definition: lws-tokenize.h:211
@ LSTRX_FATAL_NAME_UNKNOWN
Definition: lws-tokenize.h:212
@ LSTRX_FILLED_OUT
Definition: lws-tokenize.h:210