]> git.neil.brown.name Git - edlib.git/blob - lib-unicode-names.c
TODO: clean out done items.
[edlib.git] / lib-unicode-names.c
1 /*
2  * Copyright Neil Brown ©2023 <neil@brown.name>
3  * May be distributed under terms of GPLv2 - see file:COPYING
4  *
5  * Parse the Unicode NamesList.txt file to find names for
6  * unicode characters.
7  */
8
9 #include <unistd.h>
10 #include <sys/mman.h>
11 #include <fcntl.h>
12 #include "core.h"
13
14 struct unicode_data {
15         struct command c;
16         char *names;
17         int len;
18 };
19
20 static void report_names(struct unicode_data *ud safe, const char *name safe,
21                          int which,
22                          struct pane *p safe, struct command *c safe)
23 {
24         /* name must be start of a word, as either primary or secondary
25          * name.  Ignore case.
26          * If "which" is zero, return them all, else only return the
27          * nth one where which==n
28          */
29         char *ptn = strconcat(p, "?i:^([0-9A-F]{4,5}    |       = ).*\\b", name);
30         int i;
31
32         if (!ud->names)
33                 return;
34
35         for (i = 0; i < ud->len; ) {
36                 int ch, s;
37                 char *cp, *n, *eol;
38
39                 s = call("text-search", p, 0, NULL, ptn,
40                          ud->len - i, NULL, ud->names + i);
41                 if (s <= 0)
42                         break;
43                 i += s-1;
44                 /* i is now the start of the match */
45                 cp = ud->names + i;
46                 eol = strchr(cp, '\n');
47                 if (!eol)
48                         break;
49                 i = (eol - ud->names) + 1;
50                 if (eol[-1] == '\r')
51                         eol -= 1;
52                 if (*cp == '\t') {
53                         /* secondary name "\t= "*/
54                         n = strndup(cp+3, eol-cp-3);
55                         /* find number */
56                         while (cp > ud->names &&
57                                (cp[-1] != '\n' || cp[0] == '\t'))
58                                 cp -= 1;
59                 } else {
60                         /* primary name "XXXXX?\t" */
61                         if (cp[4] == '\t')
62                                 n = strndup(cp+5, eol-cp-5);
63                         else
64                                 n = strndup(cp+6, eol-cp-6);
65                 }
66                 ch = strtoul(cp, &eol, 16);
67                 if (eol == cp+4 || eol == cp+5) {
68                         if (which == 0)
69                                 comm_call(c, "cb", p, ch, NULL, n);
70                         else {
71                                 which -= 1;
72                                 if (which == 0) {
73                                         comm_call(c, "cb", p, ch, NULL, n);
74                                         i = ud->len;
75                                 }
76                         }
77                 }
78                 free(n);
79         }
80 }
81
82 static void unicode_free(struct command *c safe)
83 {
84         struct unicode_data *ud = container_of(c, struct unicode_data, c);
85
86         if (ud->names)
87                 munmap(ud->names, ud->len);
88 }
89
90 DEF_CMD(unicode_names)
91 {
92         struct unicode_data *ud;
93         if (ci->comm == &unicode_names) {
94                 /* This is the first call - need to allocate storage,
95                  * load the NamesList file, and register a new command.
96                  */
97                 char *p;
98                 int fd;
99
100                 alloc(ud, pane);
101                 ud->c = unicode_names;
102                 ud->c.free = unicode_free;
103                 call_comm("global-set-command", ci->home, &ud->c, 0, NULL,
104                           "Unicode-names");
105                 p = call_ret(str, "xdg-find-edlib-file", ci->focus, 0, NULL,
106                              "NamesList.txt", 0, NULL, "data");
107                 if (!p)
108                         return Efail;
109                 fd = open(p, O_RDONLY);
110                 free(p);
111                 if (fd < 0)
112                         return Efail;
113                 ud->len = lseek(fd, 0, 2);
114                 ud->names = mmap(NULL, ud->len, PROT_READ, MAP_SHARED, fd, 0);
115                 close(fd);
116         } else {
117                 ud = container_of(ci->comm, struct unicode_data, c);
118         }
119         if (!ud->names)
120                 return Efail;
121         if (ci->str && ci->comm2)
122                 report_names(ud, ci->str, ci->num, ci->focus, ci->comm2);
123         return 1;
124 }
125
126 void edlib_init(struct pane *ed safe)
127 {
128         call_comm("global-set-command", ed, &unicode_names,
129                   0, NULL, "Unicode-names");
130 }