libwebsockets
Lightweight C library for HTML5 websockets
lws-html.h
Go to the documentation of this file.
1 /*
2  * libwebsockets - small server side websockets and web server implementation
3  *
4  * Copyright (C) 2010 - 2022 Andy Green <andy@warmcat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  * Extremely Lightweight HTML5 Stream Parser, same approach as lecp but for
25  * html5.
26  */
27 
28 #if !defined(LHP_MAX_ELEMS_NEST)
29 #define LHP_MAX_ELEMS_NEST 32
30 #endif
31 #if !defined(LHP_MAX_DEPTH)
32 #define LHP_MAX_DEPTH 12
33 #endif
34 #if !defined(LHP_STRING_CHUNK)
35 #define LHP_STRING_CHUNK 254
36 #endif
37 
39 
45 
48 
51 
52  LHPCB_ELEMENT_START = 4, /* reported at end of <> */
54 
56 
58 };
59 
60 /*
61  * CSS v2.1 full property set, taken from
62  *
63  * https://www.w3.org/TR/CSS21/propidx.html
64  */
65 
66 typedef enum lcsp_props {
187 
188  LCSP_PROP__COUNT /* always last */
189 } lcsp_props_t;
190 
191 /*
192  * Indexes for the well-known property values
193  */
194 
195 typedef enum {
321 
322  LCSP_PROPVAL__COUNT /* always last */
323 } lcsp_propvals_t;
324 
325 struct lhp_ctx;
326 typedef lws_stateful_ret_t (*lhp_callback)(struct lhp_ctx *ctx, char reason);
327 
328 /* html attribute */
329 
330 typedef struct lhp_atr {
332  size_t name_len; /* 0 if it is elem tag */
334 
335  /* name+NUL then value+NUL follow */
336 } lhp_atr_t;
337 
338 /*
339  * In order to lay out the table, we have to incrementally adjust all foregoing
340  * DLOs as newer cells change the situation. So we have to keep track of all
341  * cell DLOs in a stack of tables until it's all done.
342  */
343 
344 typedef struct {
345  lws_dll2_t list; /* ps->table_cols */
346 
347  lws_dll2_owner_t row_dlos; /* lws_dlo_t in column */
348 
349  lws_fx_t height; /* currently computed row height */
350 } lhp_table_row_t;
351 
352 typedef struct {
353  lws_dll2_t list; /* ps->table_cols */
354 
355  lws_dll2_owner_t col_dlos; /* lws_dlo_t in column */
356 
357  lws_fx_t width; /* currently computed column width */
358 } lhp_table_col_t;
359 
360 struct lcsp_atr;
361 
362 #define CCPAS_TOP 0
363 #define CCPAS_RIGHT 1
364 #define CCPAS_BOTTOM 2
365 #define CCPAS_LEFT 3
366 
367 typedef struct lhp_pstack {
369  void *user; /* private to the stack level */
370  lhp_callback cb;
371 
372  /* static: x,y: offset from parent, w,h: surface size of this object */
374 
375  /* dynamic cursor inside drt for progressive child placement */
380 
383 
384  lws_dll2_owner_t atr; /* lhp_atr_t */
385 
387 
389  const struct lcsp_atr *css_color;
390 
391  const struct lcsp_atr *css_position;
392  const struct lcsp_atr *css_display;
393  const struct lcsp_atr *css_width;
394  const struct lcsp_atr *css_height;
395 
396  const struct lcsp_atr *css_border_radius[4];
397 
398  const struct lcsp_atr *css_pos[4];
399  const struct lcsp_atr *css_margin[4];
400  const struct lcsp_atr *css_padding[4];
401 
402  uint16_t tr_idx; /* in table */
403  uint16_t td_idx; /* in current tr */
404 
405  uint8_t is_block:1; /* children use space in our drt */
407 
408  /* user layout owns these after initial values set */
409 
412  int oi[4];
413  int positioned[4];
415  uint8_t runon; /* continues same line */
416 
417 } lhp_pstack_t;
418 
419 typedef enum lcsp_css_units {
421 
422  LCSP_UNIT_NUM, /* u.i */
423 
433 
436 
437  LCSP_UNIT_FREQ_HZ, /* u.i */
438 
439  LCSP_UNIT_RGBA, /* u.rgba */
440 
441  LCSP_UNIT_URL, /* string at end of atr */
442  LCSP_UNIT_STRING, /* string at end of atr */
443  LCSP_UNIT_DATA, /* binary data at end of atr */
444 
445 } lcsp_css_units_t;
446 
447 typedef struct lcsp_atr {
449 
450  int propval; /* lcsp_propvals_t LCSP_PROPVAL_ */
451 
452  size_t value_len; /* for string . url */
453  lcsp_css_units_t unit;
454 
455  union {
456  lws_fx_t i;
457  uint32_t rgba; /* for colours */
458  } u;
459 
461 
463 
464  /* .value_len bytes follow (for strings and blobs) */
465 } lcsp_atr_t;
466 
467 /* css definitions like font-weight: */
468 typedef struct lcsp_defs {
470  lws_dll2_owner_t atrs; /* lcsp_atr_t */
471  lcsp_props_t prop; /* lcsp_props_t, LCSP_PROP_* */
472 } lcsp_defs_t;
473 
474 typedef struct lcsp_names {
477 
478  /* name + NUL follow */
479 } lcsp_names_t;
480 
481 typedef struct lcsp_stanza { /* css stanza, with names and defs */
483 
484  lws_dll2_owner_t names; /* lcsp_names_t */
485  lws_dll2_owner_t defs; /* lcsp_defs_t */
486 
487 } lcsp_stanza_t;
488 
489 /*
490  * A list of stanza references can easily have to bring in the same stanza
491  * multiple times, eg, <div><span class=x><div> won't work unless the div
492  * stanzas are listed twice at different places in the list. It means we can't
493  * use dll2 directly since the number of references is open-ended.
494  *
495  * lcsp_stanza_ptr provides indirection that allows multiple listings.
496  */
497 
498 typedef struct lcsp_stanza_ptr {
500 
501  lcsp_stanza_t *stz;
502 } lcsp_stanza_ptr_t;
503 
504 typedef struct lcsp_atr_ptr {
506 
507  lcsp_atr_t *atr;
508 } lcsp_atr_ptr_t;
509 
510 #define LHP_FLAG_DOCUMENT_END (1 << 0)
511 
512 typedef struct lhp_ctx {
513  lws_dll2_owner_t stack; /* lhp_pstack_t */
514 
515  struct lwsac *cssac; /* css allocations all in an ac */
516  struct lwsac *cascadeac; /* active_stanzas ac */
517  struct lwsac *propatrac; /* prop atr query results ac */
518  lws_dll2_owner_t css; /* lcsp_stanza_t (all in ac) */
519 
521 
523  lcsp_css_units_t unit;
524  lcsp_stanza_t *stz; /* current stanza getting properties */
525  lcsp_defs_t *def; /* current property getting values */
526 
527  lws_dll2_owner_t active_stanzas; /* lcsp_stanza_ptr_t allocated
528  * in cascadeac */
529  lws_dll2_owner_t active_atr; /* lcsp_atr_ptr_t allocated in
530  * propatrac */
531 
533 
534  const char *base_url; /* strdup of https://x.com/y.html */
535  sul_cb_t ssevcb; /* callback for ss events */
536  lws_sorted_usec_list_t *ssevsul; /* sul to use to resume rz */
537  sul_cb_t sshtmlevcb; /* callback for more html parse */
538  lws_sorted_usec_list_t *sshtmlevsul; /* sul for more html parse */
539 
540  void *user;
541  void *user1;
542  const char *tag; /* private */
543  size_t tag_len; /* private */
544 
545  int npos;
546  int state; /* private */
547  int state_css_comm; /* private */
548  int nl_temp;
550 
553  int32_t window; /* 0, or ss item flow control limit */
554 
555  union {
556  uint32_t s;
557  struct {
558  uint32_t first:1;
559  uint32_t closing:1;
560  uint32_t void_element:1;
561  uint32_t doctype:1;
562  uint32_t inq:1;
563  uint32_t tag_used:1;
564  uint32_t arg:1;
565  uint32_t default_css:1;
566 #define LHP_CSS_PROPVAL_INT_WHOLE 1
567 #define LHP_CSS_PROPVAL_INT_FRAC 2
568 #define LHP_CSS_PROPVAL_INT_UNIT 3
569  uint32_t integer:2;
570  uint32_t color:2;
571  } f;
572  } u;
573 
574  int prop; /* lcsp_props_t */
575  int propval; /* lcsp_propvals_t */
576  int16_t css_state; /* private */
577  int16_t cssval_state; /* private */
578 
583 
584  /* at end so we can memset members above it in one go */
585 
587 
588 } lhp_ctx_t;
589 
590 /*
591  * lws_lhp_construct() - Construct an lhp context
592  *
593  * \param ctx: the lhp context to prepare
594  * \param cb: the stream parsing callback
595  * \param user: opaque user pointer available from the lhp context
596  * \param ic: struct with arguments for lhp context
597  *
598  * The lhp context is allocated by the caller (the size is known).
599  * Prepares an lhp context to parse html. Returns 0 for OK, or nonzero if OOM.
600  */
601 LWS_VISIBLE LWS_EXTERN int
602 lws_lhp_construct(lhp_ctx_t *ctx, lhp_callback cb, void *user,
603  const lws_surface_info_t *ic);
604 
605 /*
606  * lws_lhp_destruct() - Destroy an lhp context
607  *
608  * \param ctx: the lhp context to prepare
609  *
610  * Destroys an lhp context. The lhp context is allocated by the caller (the
611  * size is known). But there are suballocations that must be destroyed with
612  * this.
613  */
614 LWS_VISIBLE LWS_EXTERN void
615 lws_lhp_destruct(lhp_ctx_t *ctx);
616 
617 /**
618  * lws_lhp_ss_browse() - browse url using SS and parse via lhp to DLOs
619  *
620  * \param cx: the lws_context
621  * \param rs: the user's render state object
622  * \param url: the https://x.com/y.xyz URL to browse
623  * \param render: the user's linewise render callback (called from \p rs.sul)
624  *
625  * High level network fetch via SS and render html via lhp / DLO
626  *
627  * rs->ic must be prepared before calling.
628  *
629  * Returns nonzero if an early, fatal problem, else returns 0 and continues
630  * asynchronously.
631  *
632  * If rs->box is (0,0,0,0) on entry, it is set to represent the whole display
633  * surface. Otherwise if not representing the whole display surface, it
634  * indicates partial mode should be used.
635  */
636 LWS_VISIBLE LWS_EXTERN int
637 lws_lhp_ss_browse(struct lws_context *cx, lws_display_render_state_t *rs,
638  const char *url, sul_cb_t render);
639 
640 /**
641  * lws_lhp_parse() - parses a chunk of input HTML
642  *
643  * \p ctx: the parsing context
644  * \p buf: pointer to the start of the chunk of html
645  * \p len: pointer the number of bytes of html available at *\pbuf
646  *
647  * Parses up to *len bytes at *buf. On exit, *buf and *len are adjusted
648  * according to how much data was used. May return before processing all the
649  * input.
650  *
651  * Returns LWS_SRET_WANT_INPUT if the parsing is stalled on some other async
652  * event (eg, fetch of image to find out the dimensions).
653  *
654  * The lws_lhp_ss_browse() api wraps this.
655  */
656 LWS_VISIBLE LWS_EXTERN lws_stateful_ret_t
657 lws_lhp_parse(lhp_ctx_t *ctx, const uint8_t **buf, size_t *len);
658 
659 /**
660  * lws_css_cascade_get_prop_atr() - create active css atr list for property
661  *
662  * \p ctx: the parsing context
663  * \p prop: the LCSP_PROP_ property to generate the attribute list for
664  *
665  * Returns NULL if no atr or OOM.
666  *
667  * Otherwise produces a list of active CSS property attributes walkable via
668  * ctx->active_atr, and returns the tail one. For simple attributes where the
669  * last definition is the active one, this points to the last definition.
670  */
671 LWS_VISIBLE LWS_EXTERN const lcsp_atr_t *
672 lws_css_cascade_get_prop_atr(lhp_ctx_t *ctx, lcsp_props_t prop);
673 
674 /**
675  * lws_http_rel_to_url() - make absolute url from base and relative
676  *
677  * \param dest: place to store the result
678  * \param len: max length of result including NUL
679  * \param base: a reference url including a file part
680  * \param rel: the absolute or relative url or path to apply to base
681  *
682  * Copy the url formof rel into dest, using base to fill in missing context
683  *
684  * If base is https://x.com/y/z.html
685  *
686  * a.html -> https://x.com/y/a/html
687  * ../b.html -> https://x.com/b.html
688  * /c.html -> https://x.com/c.html
689  * https://y.com/a.html -> https://y.com/a.html
690  */
691 LWS_VISIBLE LWS_EXTERN int
692 lws_http_rel_to_url(char *dest, size_t len, const char *base, const char *rel);
693 
694 LWS_VISIBLE LWS_EXTERN lhp_pstack_t *
695 lws_css_get_parent_block(lhp_ctx_t *ctx, lhp_pstack_t *ps);
696 
697 LWS_VISIBLE LWS_EXTERN const char *
698 lws_css_pstack_name(lhp_pstack_t *ps);
699 
700 LWS_VISIBLE LWS_EXTERN const char *
701 lws_html_get_atr(lhp_pstack_t *ps, const char *aname, size_t aname_len);
702 
703 LWS_VISIBLE LWS_EXTERN const lws_fx_t *
705 
706 LWS_VISIBLE LWS_EXTERN void
707 lws_lhp_tag_dlo_id(lhp_ctx_t *ctx, lhp_pstack_t *ps, lws_dlo_t *dlo);
708 
709 void
710 lhp_set_dlo_padding_margin(lhp_pstack_t *ps, lws_dlo_t *dlo);
711 
712 #define LWS_LHPREF_WIDTH 0
713 #define LWS_LHPREF_HEIGHT 1
714 #define LWS_LHPREF_NONE 2
715 
716 LWS_VISIBLE LWS_EXTERN int
717 lhp_prop_axis(const lcsp_atr_t *a);