2 * Copyright Neil Brown ©2017-2023 <neil@brown.name>
3 * May be distributed under terms of GPLv2 - see file:COPYING
5 * Filter a view on a document to convert utf-8 sequences into
6 * the relevant unicode characters.
12 #define DOC_NEXT utf8_next
13 #define DOC_PREV utf8_prev
14 #define PANE_DATA_VOID
17 static struct map *utf8_map safe;
18 DEF_LOOKUP_CMD(utf8_handle, utf8_map);
20 static inline wint_t utf8_next(struct pane *home safe, struct mark *mark safe,
21 struct doc_ref *r, bool bytes)
23 int move = r == &mark->ref;
24 struct pane *p = home->parent;
26 struct mark *m = mark;
33 ch = doc_move(p, m, 1);
35 ch = doc_pending(p, m, 1);
36 if (ch == WEOF || (ch & 0x7f) == ch)
44 while ((ch = doc_following(p, m)) != WEOF &&
45 (ch & 0xc0) == 0x80 && i < 10) {
50 ret = get_utf8(&b, b+i);
52 ret = (unsigned char)buf[0];
58 static inline wint_t utf8_prev(struct pane *home safe, struct mark *mark safe,
59 struct doc_ref *r, bool bytes)
61 int move = r == &mark->ref;
62 struct pane *p = home->parent;
64 struct mark *m = mark;
71 ch = doc_move(p, m, -1);
73 ch = doc_pending(p, m, -1);
74 if (ch == WEOF || (ch & 0x7f) == ch)
82 while (ch != WEOF && (ch & 0xc0) != 0xc0 && i > 0) {
87 ret = get_utf8(&b, buf+10);
89 ret = (unsigned char)buf[i];
98 return do_char_byte(ci);
103 return call("doc:char", ci->home->parent, ci->num, ci->mark, ci->str,
104 ci->num2, ci->mark2, ci->str2, ci->x, ci->y);
109 struct command *cb safe;
116 DEF_CMD(utf8_content_cb)
118 struct utf8cb *c = container_of(ci->comm, struct utf8cb, c);
125 if ((wc & ~0x7f) == 0) {
126 /* 7bit char - easy. Pass following string too,
130 c->expect = c->have = 0;
131 ret = comm_call(c->cb, ci->key, c->p, wc, ci->mark, ci->str,
132 ci->num2, NULL, NULL, c->size, 0);
136 if ((wc & 0xc0) == 0x80) {
137 /* Continuation char */
141 c->b[c->have++] = wc;
142 if (c->have >= c->expect) {
143 const char *b = c->b;
144 wc = get_utf8(&b, b+c->have);
148 ret = comm_call(c->cb, ci->key, c->p,
149 wc, ci->mark, ci->str,
150 ci->num2, NULL, NULL, c->size, 0);
155 /* First char of multi-byte */
170 DEF_CMD(utf8_content)
174 if (!ci->comm2 || !ci->mark)
177 c.c = utf8_content_cb;
182 return home_call_comm(ci->home->parent, ci->key, ci->home,
183 &c.c, 1, ci->mark, NULL, 0, ci->mark2);
190 p = pane_register(ci->focus, 0, &utf8_handle.c);
194 return comm_call(ci->comm2, "callback:attach", p);
197 void edlib_init(struct pane *ed safe)
200 utf8_map = key_alloc();
202 key_add(utf8_map, "doc:char", &utf8_char);
203 key_add(utf8_map, "doc:byte", &utf8_byte);
204 key_add(utf8_map, "doc:content", &utf8_content);
205 /* No doc:content-bytes, that wouldn't make sense */
207 call_comm("global-set-command", ed, &utf8_attach, 0, NULL, "attach-charset-utf-8");
208 call_comm("global-set-command", ed, &utf8_attach, 0, NULL, "attach-utf8");