libwebsockets
Lightweight C library for HTML5 websockets
lws-metrics.h
Go to the documentation of this file.
1  /*
2  * libwebsockets - small server side websockets and web server implementation
3  *
4  * Copyright (C) 2010 - 2021 Andy Green <andy@warmcat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  * Public apis related to metric collection and reporting
25  */
26 
27 /* lws_metrics public part */
28 
29 typedef uint64_t u_mt_t;
30 
31 enum {
32  LWSMTFL_REPORT_OUTLIERS = (1 << 0),
33  /**< track outliers and report them internally */
34  LWSMTFL_REPORT_OOB = (1 << 1),
35  /**< report events as they happen */
37  /**< explicitly externally report no activity at periodic cb, by
38  * default no events in the period is just not reported */
39  LWSMTFL_REPORT_MEAN = (1 << 3),
40  /**< average/min/max is meaningful, else only sum is meaningful */
41  LWSMTFL_REPORT_ONLY_GO = (1 << 4),
42  /**< no-go pieces invalid */
44  /**< aggregate compares to wallclock us for duty cycle */
45  LWSMTFL_REPORT_HIST = (1 << 6),
46  /**< our type is histogram (otherwise, sum / mean aggregation) */
47 };
48 
49 /*
50  * lws_metrics_tag allows your object to accumulate OpenMetrics-style
51  * descriptive tags before accounting for it with a metrics object at the end.
52  *
53  * Tags should represent low entropy information that is likely to repeat
54  * identically, so, eg, http method name, not eg, latency in us which is
55  * unlikely to be seen the same twice.
56  *
57  * Tags are just a list of name=value pairs, used for qualifying the final
58  * metrics entry with decorations in additional dimensions. For example,
59  * rather than keep individual metrics on methods, scheme, mountpoint, result
60  * code, you can keep metrics on http transactions only, and qualify the
61  * transaction metrics entries with tags that can be queried on the metrics
62  * backend to get the finer-grained information.
63  *
64  * http_srv{code="404",mount="/",method="GET",scheme="http"} 3
65  *
66  * For OpenMetrics the tags are converted to a { list } and appended to the base
67  * metrics name before using with actual metrics objects, the same set of tags
68  * on different transactions resolve to the same qualification string.
69  */
70 
71 typedef struct lws_metrics_tag {
73 
74  const char *name; /* tag, intended to be in .rodata, not copied */
75  /* overallocated value */
76 } lws_metrics_tag_t;
77 
78 LWS_EXTERN LWS_VISIBLE int
79 lws_metrics_tag_add(lws_dll2_owner_t *owner, const char *name, const char *val);
80 
81 #if defined(LWS_WITH_SYS_METRICS)
82 /*
83  * wsi-specific version that also appends the tag value to the lifecycle tag
84  * used for logging the wsi identity
85  */
86 LWS_EXTERN LWS_VISIBLE int
87 lws_metrics_tag_wsi_add(struct lws *wsi, const char *name, const char *val);
88 #else
89 #define lws_metrics_tag_wsi_add(_a, _b, _c)
90 #endif
91 
92 #if defined(LWS_WITH_SECURE_STREAMS)
93 /*
94  * ss-specific version that also appends the tag value to the lifecycle tag
95  * used for logging the ss identity
96  */
97 #if defined(LWS_WITH_SYS_METRICS)
98 LWS_EXTERN LWS_VISIBLE int
99 lws_metrics_tag_ss_add(struct lws_ss_handle *ss, const char *name, const char *val);
100 #else
101 #define lws_metrics_tag_ss_add(_a, _b, _c)
102 #endif
103 #endif
104 
105 LWS_EXTERN LWS_VISIBLE void
106 lws_metrics_tags_destroy(lws_dll2_owner_t *owner);
107 
108 LWS_EXTERN LWS_VISIBLE size_t
109 lws_metrics_tags_serialize(lws_dll2_owner_t *owner, char *buf, size_t len);
110 
111 LWS_EXTERN LWS_VISIBLE const char *
112 lws_metrics_tag_get(lws_dll2_owner_t *owner, const char *name);
113 
114 /* histogram bucket */
115 
116 typedef struct lws_metric_bucket {
119 
120  /* name + NUL is overallocated */
121 } lws_metric_bucket_t;
122 
123 /* get overallocated name of bucket from bucket pointer */
124 #define lws_metric_bucket_name_len(_b) (*((uint8_t *)&(_b)[1]))
125 #define lws_metric_bucket_name(_b) (((const char *)&(_b)[1]) + 1)
126 
127 /*
128  * These represent persistent local event measurements. They may aggregate
129  * a large number of events inbetween external dumping of summaries of the
130  * period covered, in two different ways
131  *
132  * 1) aggregation by sum or mean, to absorb multiple scalar readings
133  *
134  * - go / no-go ratio counting
135  * - mean averaging for, eg, latencies
136  * - min / max for averaged values
137  * - period the stats covers
138  *
139  * 2) aggregation by histogram, to absorb a range of outcomes that may occur
140  * multiple times
141  *
142  * - add named buckets to histogram
143  * - bucket has a 64-bit count
144  * - bumping a bucket just increments the count if already exists, else adds
145  * a new one with count set to 1
146  *
147  * The same type with a union covers both cases.
148  *
149  * The lws_system ops api that hooks lws_metrics up to a metrics backend is
150  * given a pointer to these according to the related policy, eg, hourly, or
151  * every event passed straight through.
152  */
153 
154 typedef struct lws_metric_pub {
155  const char *name;
156  /**< eg, "n.cn.dns", "vh.myendpoint" */
158  /**< ignored by lws, backend handler completely owns it */
159 
161  /**< us time metric started collecting, reset to us_dumped at dump */
163  /**< 0, or us time last event, reset to 0 at last dump */
165  /**< 0 if never, else us time of last dump to external api */
166 
167  /* scope of data in .u is "since last dump" --> */
168 
169  union {
170  /* aggregation, by sum or mean */
171 
172  struct {
173  u_mt_t sum[2];
174  /**< go, no-go summed for mean or plan sum */
175  u_mt_t min;
176  /**< smallest individual measurement */
177  u_mt_t max;
178  /**< largest individual measurement */
179 
180  uint32_t count[2];
181  /**< go, no-go count of measurements in sum */
182  } agg;
183 
184  /* histogram with dynamic named buckets */
185 
186  struct {
187  lws_metric_bucket_t *head;
188  /**< first bucket in our bucket list */
189 
190  uint64_t total_count;
191  /**< total count in all of our buckets */
192  uint32_t list_size;
193  /**< number of buckets in our bucket list */
194  } hist;
195  } u;
196 
198 
199 } lws_metric_pub_t;
200 
201 LWS_EXTERN LWS_VISIBLE void
202 lws_metrics_hist_bump_priv_tagged(lws_metric_pub_t *mt, lws_dll2_owner_t *tow,
203  lws_dll2_owner_t *tow2);
204 
205 
206 /*
207  * Calipers are a helper struct for implementing "hanging latency" detection,
208  * where setting the start time and finding the end time may happen in more than
209  * one place.
210  *
211  * There are convenience wrappers to eliminate caliper definitions and code
212  * cleanly if WITH_SYS_METRICS is disabled for the build.
213  */
214 
215 struct lws_metric;
216 
217 typedef struct lws_metric_caliper {
218  struct lws_dll2_owner mtags_owner; /**< collect tags here during
219  * caliper lifetime */
220  struct lws_metric *mt; /**< NULL == inactive */
222 } lws_metric_caliper_t;
223 
224 #if defined(LWS_WITH_SYS_METRICS)
225 #define lws_metrics_caliper_compose(_name)
226  lws_metric_caliper_t _name;
227 #define lws_metrics_caliper_bind(_name, _mt)
228  { if (_name.mt) {
229  lwsl_err("caliper: overwrite %s\n",
230  lws_metrics_priv_to_pub(_name.mt)->name);
231  assert(0); }
232  _name.mt = _mt; _name.us_start = lws_now_usecs(); }
233 #define lws_metrics_caliper_declare(_name, _mt)
234  lws_metric_caliper_t _name = { .mt = _mt, .us_start = lws_now_usecs() }
235 #define lws_metrics_caliper_report(_name, _go_nogo)
236  { if (_name.us_start) { lws_metric_event(_name.mt, _go_nogo,
237  (u_mt_t)(lws_now_usecs() -
238  _name.us_start));
239  } lws_metrics_caliper_done(_name); }
240 #define lws_metrics_caliper_report_hist(_name, pwsi) if (_name.mt) {
241  lws_metrics_hist_bump_priv_tagged(lws_metrics_priv_to_pub(_name.mt),
242  &_name.mtags_owner,
243  pwsi ? &((pwsi)->cal_conn.mtags_owner) : NULL);
244  lws_metrics_caliper_done(_name); }
245 
246 #define lws_metrics_caliper_cancel(_name) { lws_metrics_caliper_done(_name); }
247 #define lws_metrics_hist_bump(_mt, _name)
248  lws_metrics_hist_bump_(_mt, _name)
249 #define lws_metrics_hist_bump_priv(_mt, _name)
250  lws_metrics_hist_bump_(lws_metrics_priv_to_pub(_mt), _name)
251 #define lws_metrics_caliper_done(_name) {
252  _name.us_start = 0; _name.mt = NULL;
253  lws_metrics_tags_destroy(&_name.mtags_owner); }
254 #else
255 #define lws_metrics_caliper_compose(_name)
256 #define lws_metrics_caliper_bind(_name, _mt)
257 #define lws_metrics_caliper_declare(_name, _mp)
258 #define lws_metrics_caliper_report(_name, _go_nogo)
259 #define lws_metrics_caliper_report_hist(_name, pwsiconn)
260 #define lws_metrics_caliper_cancel(_name)
261 #define lws_metrics_hist_bump(_mt, _name)
262 #define lws_metrics_hist_bump_priv(_mt, _name)
263 #define lws_metrics_caliper_done(_name)
264 #endif
265 
266 /**
267  * lws_metrics_format() - helper to format a metrics object for logging
268  *
269  * \param pub: public part of metrics object
270  * \param buf: output buffer to place string in
271  * \param len: available length of \p buf
272  *
273  * Helper for describing the state of a metrics object as a human-readable
274  * string, accounting for how its flags indicate what it contains. This is not
275  * how you would report metrics, but during development it can be useful to
276  * log them inbetween possibily long report intervals.
277  *
278  * It uses the metric's flags to adapt the format shown appropriately, eg,
279  * as a histogram if LWSMTFL_REPORT_HIST etc
280  */
281 LWS_EXTERN LWS_VISIBLE int
282 lws_metrics_format(lws_metric_pub_t *pub, lws_metric_bucket_t **sub,
283  char *buf, size_t len);
284 
285 /**
286  * lws_metrics_hist_bump() - add or increment histogram bucket
287  *
288  * \param pub: public part of metrics object
289  * \param name: bucket name to increment
290  *
291  * Either increment the count of an existing bucket of the right name in the
292  * metrics object, or add a new bucket of the given name and set its count to 1.
293  *
294  * The metrics object must have been created with flag LWSMTFL_REPORT_HIST
295  *
296  * Normally, you will actually use the preprocessor wrapper
297  * lws_metrics_hist_bump() defined above, since this automatically takes care of
298  * removing itself from the build if WITH_SYS_METRICS is not defined, without
299  * needing any preprocessor conditionals.
300  */
301 LWS_EXTERN LWS_VISIBLE int
302 lws_metrics_hist_bump_(lws_metric_pub_t *pub, const char *name);
303 
304 LWS_VISIBLE LWS_EXTERN int
305 lws_metrics_foreach(struct lws_context *ctx, void *user,
306  int (*cb)(lws_metric_pub_t *pub, void *user));
307 
308 LWS_VISIBLE LWS_EXTERN int
309 lws_metrics_hist_bump_describe_wsi(struct lws *wsi, lws_metric_pub_t *pub,
310  const char *name);
311 
312 enum {
313  LMT_NORMAL = 0, /* related to successful events */
314  LMT_OUTLIER, /* related to successful events outside of bounds */
315 
316  LMT_FAIL, /* related to failed events */
317 
319 };
320 
321 typedef enum lws_metric_rpt {
322  LMR_PERIODIC = 0, /* we are reporting on a schedule */
323  LMR_OUTLIER, /* we are reporting the last outlier */
324 } lws_metric_rpt_kind_t;
325 
326 #define METRES_GO 0
327 #define METRES_NOGO 1