2 * Copyright Neil Brown ©2016-2021 <neil@brown.name>
3 * May be distributed under terms of GPLv2 - see file:COPYING
5 * lib-rfc822header: parse rfc822 email headers.
6 * When instanciated, headers in the parent document are parsed and a mark
7 * is moved beyond the headers.
8 * Subsequently the "get-header" command and be used to extract headers.
9 * If a focus/point is given, the header is copied into the target pane
10 * with charset decoding performed and some attributes added to allow
11 * control over the display.
12 * If no point is given, the named header is parsed and added to this
13 * pane as an attribute. Optionally comments are removed.
15 * RFC2047 allows headers to contains words:
16 * =?charset?encoding?text?=
17 * "charset" can be an set, e.g. "iso-8859-1" "utf-8" "us-ascii" "Windows-1252"
18 * Currently support utf-8 and us-ascii transparently, others if
20 * "encoding" can be Q or B (or q or b)
21 * Q recognizes '=' and treat next 2 has HEX, and '_' implies SPACE
25 #define _GNU_SOURCE /* for asprintf */
33 #define PANE_DATA_TYPE struct header_info
40 #include "core-pane.h"
42 static char *get_hname(struct pane *p safe, struct mark *m safe)
48 while ((ch = doc_next(p, m)) != ':' &&
49 (ch > ' ' && ch <= '~')) {
55 if (len == 0 || ch != ':')
60 static void find_headers(struct pane *p safe, struct mark *start safe,
61 struct mark *end safe)
63 struct header_info *hi = p->data;
64 struct mark *m, *hm safe;
68 m = vmark_new(p, hi->vnum, p);
71 mark_to_mark(m, start);
72 hm = mark_dup_view(m);
73 while (m->seq < end->seq &&
74 (hname = get_hname(p, m)) != NULL) {
75 attr_set_str(&hm->attrs, "header", hname);
77 while ((ch = doc_next(p, m)) != WEOF &&
80 (ch = doc_following(p, m)) == ' ' || ch == '\t'))
82 hm = mark_dup_view(m);
84 /* Skip over trailing blank line */
85 if (doc_following(p, m) == '\r')
87 if (doc_following(p, m) == '\n')
89 mark_to_mark(start, m);
93 static int from_hex(char c)
95 if (c >= '0' && c <= '9')
97 if (c >= 'a' && c <= 'f')
99 if (c >= 'A' && c <= 'F')
104 static int is_b64(char c)
106 return (c >= 'A' && c <= 'Z') ||
107 (c >= 'a' && c <= 'z') ||
108 (c >= '0' && c <= '9') ||
109 c == '+' || c == '/' || c == '=';
112 static int from_b64(char c)
114 /* This assumes that 'c' is_b64() */
118 return (c - '0') + 52;
122 return (c - 'A') + 0;
126 return (c - 'a') + 26;
129 static char *safe charset_word(struct pane *doc safe, struct mark *m safe)
132 * Search for second '?' and capture charset, detect 'Q' or 'B',
133 * then decode based on that.
134 * Finish on ?= or non-printable
135 * =?charset?encoding?code?=
142 static char *last = NULL;
143 char *charset = NULL;
151 while ((ch = doc_next(doc, m)) != WEOF &&
152 ch > ' ' && ch < 0x7f && qmarks < 4) {
155 charset = buf_final(&buf);
161 if (qmarks < 3 && isupper(ch))
164 /* gathering charset */
165 buf_append(&buf, ch);
168 if (qmarks == 2 && ch == 'q')
170 if (qmarks == 2 && ch == 'b')
176 buf_append(&buf, ch);
180 tmp = (tmp<<4) + from_hex(ch);
183 buf_append_byte(&buf, tmp);
191 buf_append(&buf, ch);
194 buf_append(&buf, ' ');
208 if (!is_b64(ch) || ch == '=')
210 tmp = (tmp << 6) | from_b64(ch);
214 buf_append_byte(&buf, (tmp >> bits) & 255);
220 last = buf_final(&buf);
221 if (charset && last) {
225 asprintf(&cmd, "charset-to-utf8-%s", charset);
227 cvt = call_ret(str, cmd, doc, 0, NULL, last);
233 /* If there is only LWS to the next quoted word,
234 * skip that so words join up
239 while ((ch = doc_next(doc, m2)) == ' ' ||
240 ch == '\t' || ch == '\r' || ch == '\n')
242 if (ch == '=' && doc_following(doc, m2) == '?') {
250 static void add_addr(struct pane *p safe, struct mark *m safe,
251 struct mark *pnt safe, int len,
254 char buf[2*sizeof(int)*8/3 + 3 + 20];
260 tag = attr_find_int(p->attrs, "rfc822-addr-cnt");
264 snprintf(buf, sizeof(buf), "%d,%d,%s", len, tag, hdr);
265 call("doc:set-attr", p, 1, m,
266 "render:rfc822header-addr", 0, NULL, buf);
268 addr = call_ret(str,"doc:get-str", p, 0, m, NULL, 0, pnt);
269 while (addr && utf8_strlen(addr) > len) {
270 int l = utf8_round_len(addr, strlen(addr)-1);
273 snprintf(buf, sizeof(buf), "addr-%d", tag);
274 attr_set_str(&p->attrs, buf, addr);
276 attr_set_int(&p->attrs, "rfc822-addr-cnt", tag);
279 static void copy_header(struct pane *doc safe,
280 const char *hdr safe, const char *hdr_found safe,
282 struct mark *start safe, struct mark *end safe,
283 struct pane *p safe, struct mark *point safe)
285 /* Copy the header in 'doc' from 'start' to 'end' into
286 * the document 'p' at 'point'.
288 * NULL : no explicit wrapping
289 * "text": no explicit wrapping
290 * "list": convert commas to wrap points.
291 * 'hdr' is the name of the header - before the ':'.
292 * '\n', '\r' are copied as a single space, and subsequent
293 * spaces are skipped.
302 int is_list = type && strcmp(type, "list") == 0;
303 struct mark *istart = NULL;
304 int ilen = 0, isince = 0;
305 bool seen_colon = False;
308 hstart = mark_dup(point);
309 /* put hstart before point, so it stays here */
310 mark_step(hstart, 0);
311 while ((ch = doc_next(doc, m)) != WEOF &&
316 if (ch < ' ' && ch != '\t') {
320 if (sol && (ch == ' ' || ch == '\t'))
322 if (sol && !(is_list && ilen == 0)) {
323 call("doc:replace", p, 1, NULL, " ", 0, point);
329 if (ch == '=' && doc_following(doc, m) == '?')
330 b = charset_word(doc, m);
333 for (i = 0; b[i]; i++)
334 if (b[i] > 0 && b[i] < ' ')
336 if (is_list && seen_colon && !istart && b[0] != ',' &&
337 (b[0] != ' ' || b[1] != '\0')) {
338 /* This looks like the start of a list item. */
339 istart = mark_dup(point);
340 mark_step(istart, 0);
345 call("doc:replace", p, 1, NULL, b, 0, point);
346 if (ch == ',' && istart) {
347 add_addr(p, istart, point, ilen, hdr);
351 isince += utf8_strlen(b);
354 if (ch == ',' && is_list) {
355 /* This comma is not in a quoted word, so it really marks
356 * part of a list, and so is a wrap-point. Consume any
357 * following spaces and include just one space in
360 struct mark *p2 = mark_dup(point);
362 while ((ch = doc_following(doc, m)) == ' ')
365 call("doc:replace", p, 1, NULL, " ", 0, point);
366 call("doc:set-attr", p, 1, p2,
367 "render:rfc822header-wrap", 0, NULL, "2");
370 istart = mark_dup(point);
371 mark_step(istart, 0);
376 add_addr(p, istart, point, ilen, hdr);
379 call("doc:replace", p, 1, NULL, "\n", 0, point);
380 snprintf(buf, sizeof(buf), "%zd", strlen(hdr_found)+1);
381 call("doc:set-attr", p, 1, hstart, "render:rfc822header", 0, NULL, buf);
382 snprintf(attr, sizeof(attr), "render:rfc822header:%s", hdr_found);
383 /* make header name lowercase */
384 for (a = attr; *a; a++) {
385 if ((unsigned char)(*a) < 128 && isupper(*a))
388 call("doc:set-attr", p, 1, hstart, attr, 0, NULL, type);
394 static void copy_headers(struct pane *p safe, const char *hdr safe,
396 struct pane *doc safe, struct mark *pt safe,
399 struct header_info *hi = p->data;
402 for (m = vmark_first(p, hi->vnum, p); m ; m = n) {
403 char *h = attr_find(m->attrs, "header");
405 while (resent && h &&
406 strncasecmp(h, "resent-", 7) == 0)
409 if (n && horig && h && strcasecmp(h, hdr) == 0)
410 copy_header(p, hdr, horig, type, m, n, doc, pt);
414 static char *extract_header(struct pane *p safe, struct mark *start safe,
415 struct mark *end safe)
418 /* This is used for headers that control parsing, such as
419 * MIME-Version and Content-type.
429 while ((ch = doc_next(p, m)) != WEOF &&
431 if (!found && ch == ':') {
437 if (ch < ' ' && ch != '\t') {
441 if (sol && (ch == ' ' || ch == '\t'))
444 buf_append(&buf, ' ');
447 if (ch == '=' && doc_following(p, m) == '?') {
448 char *b = charset_word(p, m);
451 buf_append(&buf, ch);
453 return buf_final(&buf);
456 static char *load_header(struct pane *home safe, const char *hdr safe)
458 struct header_info *hi = home->data;
461 for (m = vmark_first(home, hi->vnum, home); m; m = n) {
462 char *h = attr_find(m->attrs, "header");
464 if (n && h && strcasecmp(h, hdr) == 0)
465 return extract_header(home, m, n);
472 const char *hdr = ci->str;
473 const char *type = ci->str2;
474 bool resent = ci->num2 == 1;
482 copy_headers(ci->home, hdr, type, ci->focus, ci->mark, resent);
485 asprintf(&attr, "rfc822-%s", hdr);
488 for (c = attr; *c; c++)
491 t = load_header(ci->home, hdr);
492 attr_set_str(&ci->home->attrs, attr, t);
500 /* Call comm2 for each header matching str */
501 struct header_info *hi = ci->home->data;
504 if (!ci->str || !ci->comm2)
506 for (m = vmark_first(ci->home, hi->vnum, ci->home); m; m = n) {
507 char *h = attr_find(m->attrs, "header");
509 if (n && h && strcasecmp(h, ci->str) == 0) {
510 h = extract_header(ci->home, m, n);
511 if (comm_call(ci->comm2, "cb", ci->focus,
522 struct header_info *hi = ci->home->data;
524 marks_clip(ci->home, ci->mark, ci->mark2, hi->vnum, ci->home, !!ci->num);
528 static struct map *header_map safe;
530 static void header_init_map(void)
532 header_map = key_alloc();
533 key_add(header_map, "get-header", &header_get);
534 key_add(header_map, "list-headers", &header_list);
535 key_add(header_map, "Notify:clip", &header_clip);
538 DEF_LOOKUP_CMD(header_handle, header_map);
539 DEF_CMD(header_attach)
541 struct header_info *hi;
543 struct mark *start = ci->mark;
544 struct mark *end = ci->mark2;
546 p = pane_register(ci->focus, 0, &header_handle.c);
551 hi->vnum = home_call(ci->focus, "doc:add-view", p) - 1;
553 find_headers(p, start, end);
555 return comm_call(ci->comm2, "callback:attach", p);
558 void edlib_init(struct pane *ed safe)
561 call_comm("global-set-command", ed, &header_attach, 0, NULL, "attach-rfc822header");