2 * Copyright Neil Brown ©2015-2023 <neil@brown.name>
3 * May be distributed under terms of GPLv2 - see file:COPYING
5 * Assorted utility functions used by edlib
8 #define _GNU_SOURCE /* for asprintf */
22 void buf_init(struct buf *b safe)
25 b->b = malloc(b->size);
29 void buf_resize(struct buf *b safe, int size)
31 size += 1; /* we will nul-terminate */
34 b->b = realloc(b->b, b->size);
38 void buf_concat_len(struct buf *b safe, const char *s safe, int l)
41 if (b->len + l >= b->size) {
42 while (b->len + l >= b->size)
44 b->b = realloc(b->b, b->size);
46 memcpy(b->b + b->len, s, l);
51 void buf_concat(struct buf *b safe, const char *s safe)
54 buf_concat_len(b, s, l);
57 void buf_append(struct buf *b safe, wchar_t wch)
61 buf_concat(b, put_utf8(t, wch));
64 void buf_append_byte(struct buf *b safe, char c)
66 buf_concat_len(b, &c, 1);
70 * performance measurements
73 static long long tstart[TIME__COUNT];
74 static int tcount[TIME__COUNT];
75 static long long tsum[TIME__COUNT];
76 static int stats_enabled = 1;
78 static time_t last_dump = 0;
79 static FILE *dump_file;
81 static void dump_key_hash(void);
82 static void dump_count_hash(void);
83 static void stat_dump(void);
84 static void dump_mem(void);
86 static const char *tnames[] = {
88 [TIME_WINDOW] = "WINDOW",
91 [TIME_TIMER] = "TIMER",
93 [TIME_REFRESH] = "REFRESH",
97 #define NSEC 1000000000
98 void time_start(enum timetype type)
100 struct timespec start;
101 if (type < 0 || type >= TIME__COUNT || !stats_enabled)
103 clock_gettime(CLOCK_MONOTONIC, &start);
104 tstart[type] = start.tv_sec * NSEC + start.tv_nsec;
107 void time_stop(enum timetype type)
109 struct timespec stop;
112 if (type < 0 || type >= TIME__COUNT || !stats_enabled)
116 clock_gettime(CLOCK_MONOTONIC, &stop);
118 nsec = (stop.tv_sec * NSEC + stop.tv_nsec) - tstart[type];
123 if (getenv("EDLIB_STATS_FAST")) {
124 if (stop.tv_sec < last_dump + 5 || tcount[TIME_REFRESH] < 10)
127 if (stop.tv_sec < last_dump + 30 || tcount[TIME_REFRESH] < 100)
130 if (last_dump == 0) {
131 last_dump = stop.tv_sec;
134 if (!getenv("EDLIB_STATS")) {
138 last_dump = stop.tv_sec;
142 static void stat_dump(void)
148 asprintf(&fname, ".edlib_stats-%d", getpid());
150 fname = "/tmp/edlib_stats";
151 dump_file = fopen(fname, "w");
157 fprintf(dump_file, "%ld:", (long)time(NULL));
158 for (i = 0; i< TIME__COUNT; i++) {
159 fprintf(dump_file, " %s:%d:%lld", tnames[i], tcount[i],
160 tsum[i] / (tcount[i]?:1));
166 fprintf(dump_file, "\n");
171 inline static int qhash(char key, unsigned int start)
173 return (start ^ key) * 0x61C88647U;
176 static int hash_str(const char *key safe, int len)
181 for (i = 0; (len < 0 || i < len) && key[i]; i++)
182 h = qhash(key[i], h);
194 static struct khash *khashtab[1024];
196 static struct kstack {
202 void time_start_key(const char *key safe)
204 struct timespec start;
211 clock_gettime(CLOCK_MONOTONIC, &start);
212 kstack[ktos-1].tstart = start.tv_sec * NSEC + start.tv_nsec;
213 kstack[ktos-1].name = key;
216 static struct khash *hash_find(struct khash **table, const char *key safe)
218 struct khash *h, **hp;
221 hash = hash_str(key, -1);
222 hp = &table[hash & 1023];
223 while ( (h = *hp) && (h->hash != hash || strcmp(h->name, key) != 0))
226 h = malloc(sizeof(*h) + strlen(key));
230 strcpy(h->name, key);
237 void time_stop_key(const char *key safe)
239 struct timespec stop;
249 if (key != kstack[ktos].name)
251 clock_gettime(CLOCK_MONOTONIC, &stop);
253 h = hash_find(khashtab, key);
255 h->tsum += stop.tv_sec * NSEC + stop.tv_nsec - kstack[ktos].tstart;
258 static void dump_key_hash(void)
265 for (i = 0; i < 1024; i++) {
268 for (h = khashtab[i]; h ; h = h->next) {
272 fprintf(dump_file, " %s:%d:%lld",
274 h->tsum / (h->tcount?:1));
283 fprintf(dump_file, " khash:%d:%d:%d", cnt, buckets, max);
286 static struct khash *count_tab[1024];
288 void stat_count(char *name safe)
294 h = hash_find(count_tab, name);
298 static void dump_count_hash(void)
305 for (i = 0; i < 1024; i++) {
308 for (h = count_tab[i]; h ; h = h->next) {
310 fprintf(dump_file, " %s:%d:-",
320 fprintf(dump_file, " nhash:%d:%d:%d", cnt, buckets, max);
323 static void hash_free(struct khash **tab safe)
327 for (i = 0; i < 1024; i++) {
330 while ((h = tab[i]) != NULL) {
339 /* stats_enabled is only valid after 30 seconds, so
340 * so we need to check EDLIB_STATS directly
342 if (stats_enabled && getenv("EDLIB_STATS"))
344 hash_free(count_tab);
349 static LIST_HEAD(mem_pools);
351 void *safe do_alloc(struct mempool *pool safe, int size, int zero)
353 void *ret = malloc(size);
356 memset(ret, 0, size);
358 pool->allocations += 1;
359 if (pool->bytes > pool->max_bytes)
360 pool->max_bytes = pool->bytes;
361 if (list_empty(&pool->linkage))
362 list_add(&pool->linkage, &mem_pools);
366 void do_unalloc(struct mempool *pool safe, const void *obj, int size)
370 pool->allocations -= 1;
375 static void dump_mem(void)
379 fprintf(dump_file, "mem:");
380 list_for_each_entry(p, &mem_pools, linkage)
381 fprintf(dump_file, " %s:%ld(%ld):%ld",
382 p->name, p->bytes, p->max_bytes, p->allocations);
383 fprintf(dump_file, "\n");
386 /* UTF-8 handling....
387 * - return wchar (wint_t) and advance pointer
388 * - append encoding to buf, advance pointer, decrease length
391 * - if it starts '0b0', it is a 7bit code point
392 * - if it starts '0b10' it is a non-initial byte and provides 6 bits.
393 * - if it starts '0b110' it is first of 2 and provides 5 of 11 bits
394 * - if it starts '0b1110' it is first of 3 and provides 4 of 16 bits
395 * - if it starts '0b11110' it is first of 4 and provides 3 of 21 bits.
397 wint_t get_utf8(const char **cpp safe, const char *end)
401 const char *cp = *cpp;
406 if (end && end <= cp)
408 c = (unsigned char)*cp++;
418 } else if (c < 0xf0) {
421 } else if (c < 0xf8) {
426 if (end && end < cp + tail)
430 if ((c & 0xc0) != 0x80)
432 ret = (ret << 6) | (c & 0x3f);
438 char *safe put_utf8(char *buf safe, wchar_t ch)
446 } else if (ch < 0x800) {
449 } else if (ch < 0x10000) {
452 } else if (ch < 0x200000) {
458 for (i = 0 ; i < l; i++) {
459 buf[i] = (ch >> ((l-1-i)*6)) & mask;
460 buf[i] |= ~(mask+mask+1);
467 int utf8_strlen(const char *s safe)
472 if ((*s & 0xc0) != 0x80)
479 int utf8_strnlen(const char *s safe, int n)
483 while (*s && n > 0) {
484 if ((*s & 0xc0) != 0x80)
492 int utf8_valid(const char *s safe)
496 while ((c = get_utf8(&s, NULL)) != WEOF) {
505 * When walking backwards through a string, we need to round a point
506 * down to the start of a code-point.
507 * When reading a file into allocated chunks of memory, we want each chunk
508 * to hold a whole number of code points.
509 * For both of these needs, we have utf8_round_len which tries to reduce
510 * the given length to a code-point boundary, if possible.
512 * We only adjust the length if we can find a start-of-code-point in
513 * the last 4 bytes. (longest UTF-8 encoding of 21bit unicode is 4 bytes).
514 * A start of codepoint starts with 0b0 or 0b11, not 0b10.
516 int utf8_round_len(const char *text safe, int len)
518 /* The string at 'text' is *longer* than 'len', or
519 * at least text[len] is defined - it can be nul. If
520 * [len] isn't the start of a new codepoint, and there
521 * is a start marker in the previous 4 bytes,
522 * move back to there.
525 while (i <= len && i <=4)
526 if ((text[len-i] & 0xC0) == 0x80)
527 /* next byte is inside a UTF-8 code point, so
528 * this isn't a good spot to end. Try further
536 static int _debugger_present = -1;
537 static void _sigtrap_handler(int signum)
539 _debugger_present = 0;
540 signal(SIGTRAP, SIG_DFL);
543 bool debugger_is_present(void)
545 if (_debugger_present < 0) {
546 _debugger_present = 1;
547 signal(SIGTRAP, _sigtrap_handler);
550 return _debugger_present;
554 const char *afind_val(const char **cp safe, const char *end)
563 while (c < end && *c != ':' && *c != ',')
570 while (*c == ',' && c < end)
581 while (c < end && *c != ',')
583 while (c < end && *c == ',')
591 char *aupdate(char **cp safe, const char *v)
593 /* duplicate value at v and store in *cp, freeing what is there
598 while (end && *end != ',' && *end >= ' ')
603 *cp = strndup(v, end-v);
609 bool amatch(const char *a safe, const char *m safe)
611 while (*a && *a == *m) {
616 /* Didn't match all of m */
618 if (*a != ':' && *a != ',' && *a >= ' ')
619 /* Didn't match all of a */
624 bool aprefix(const char *a safe, const char *m safe)
626 while (*a && *a == *m) {
631 /* Didn't match all of m */
636 long anum(const char *v safe)
639 long ret = strtol(v, &end, 10);
640 if (end == v || !end ||
641 (*end != ',' && *end >= ' '))
642 /* Not a valid number - use zero */