2 * Copyright Neil Brown ©2016-2021 <neil@brown.name>
3 * May be distributed under terms of GPLv2 - see file:COPYING
5 * doc-email: Present an email message as its intended content, with
6 * part recognition and decoding etc.
8 * A multipart document is created from sets of three documents.
10 * - The first is the original email, overlayed with 'crop' to select
11 * one section, then overlayed with handlers for the transfer-encoding
12 * and (optionally) charset. There will be one for the headers
13 * and either one for the body, or one for each part of the body
14 * is multipart. All nested multiparts have their parts linearized.
15 * - The second is a scratch text document which can contain a transformed
16 * copy of the content when the tranformation is too complex for an
17 * overlay layer. This includes HTML and PDF which can be converted
18 * to text, but the conversion is complex, and the headers, which need to be
19 * re-ordered as well as filtered and decoded. For images, this document has
20 * trivial content but specifier a rendering function that displays the image.
21 * - The final section is a 'spacer' which has fixed content and displays
22 * as a summary line for the part (e.g. MIME-type, file name) and
23 * provides active buttons for acting on the content (save, external viewer
26 * The middle part has attributes set on the document which can be
27 * accessed form the spacer using "multipart-prev:"
28 * - email:path identify the part in the nexted multipart struture
29 * e.g. "header", "body", "body,multipart/mixed:0,mulitpart/alternate:1"
30 * - email:actions a ':' separated list of buttons. "hide:save:view"
31 * - email:content-type the MIME type of the content. "image/png"
35 #define _GNU_SOURCE /* for asprintf */
46 static inline bool is_orig(int p)
48 return p >= 0 && p % 3 == 0;
51 static inline bool is_transformed(int p)
53 return p >= 0 && p % 3 == 1;
56 static inline bool is_spacer(int p)
58 return p >= 0 && p % 3 == 2;
61 static inline int to_orig(int p)
69 struct pane *email safe;
70 struct pane *spacer safe;
73 static bool handle_content(struct pane *p safe, char *type, char *xfer,
74 struct mark *start safe, struct mark *end safe,
75 struct pane *mp safe, struct pane *spacer safe,
78 static bool cond_append(struct buf *b safe, char *txt safe, char *tag safe,
79 int offset, int *cp safe)
81 char *tagf = "active-tag:email-";
82 int prelen = 1 + strlen(tagf) + strlen(tag) + 1 + 1;
84 int len = prelen + strlen(txt) + postlen;
85 if (offset != NO_NUMERIC && offset >= 0 && offset <= b->len + len)
96 buf_concat(b, "]</>");
100 static bool is_attr(char *a safe, char *attrs safe)
103 if (strncmp(a, attrs, l) != 0)
105 if (attrs[l] == ':' || attrs[l] == '\0')
110 DEF_CMD(email_spacer)
114 struct mark *m = ci->mark;
115 struct mark *pm = ci->mark2;
125 /* Count the number of chars before the cursor.
126 * This tells us which button to highlight.
130 while (pm->seq > m->seq && !mark_same(pm, m)) {
131 doc_prev(ci->focus, pm);
138 buf_concat(&b, "<fg:red>");
140 attr = pane_mark_attr(ci->home, m, "multipart-prev:email:path");
142 buf_concat(&b, attr);
146 attr = pane_mark_attr(ci->focus, m, "email:visible");
147 if (attr && strcmp(attr, "none") == 0)
149 attr = pane_mark_attr(ci->home, m, "multipart-prev:email:actions");
153 while (ok && attr && *attr) {
154 char *a = strchr(attr, ':');
157 if (is_attr("hide", attr))
158 ok = cond_append(&b, visible ? "HIDE" : "SHOW", attr,
161 ok = cond_append(&b, attr, attr, o, &cp);
163 doc_next(ci->focus, m);
168 /* end of line, only display if we haven't reached
169 * the cursor or offset
171 * if cp < 0, we aren't looking for a cursor, so don't stop.
172 * if cp > 0, we haven't reached cursor yet, so don't stop
173 * if cp == 0, this is cursor pos, so stop.
175 if (ok && cp != 0 && ((o < 0 || o == NO_NUMERIC))) {
177 buf_concat(&b, "</>");
178 attr = pane_mark_attr(ci->focus, m,
179 "multipart-prev:email:content-type");
182 buf_concat(&b, attr);
184 attr = pane_mark_attr(ci->focus, m,
185 "multipart-prev:email:charset");
188 buf_concat(&b, attr);
190 attr = pane_mark_attr(ci->focus, m,
191 "multipart-prev:email:filename");
193 buf_concat(&b, " file=");
194 buf_concat(&b, attr);
196 buf_concat(&b, "\n");
197 while ((wch = doc_next(ci->focus, m)) &&
198 wch != '\n' && wch != WEOF)
202 ret = comm_call(ci->comm2, "callback:render", ci->focus, 0, NULL,
208 static int get_part(struct pane *p safe, struct mark *m safe)
210 char *a = pane_mark_attr(p, m, "multipart:part-num");
225 p = get_part(ci->home, ci->mark);
226 doc_next(ci->focus, ci->mark);
227 asprintf(&c, "<image:comm:doc:multipart-%d-doc:get-bytes>\n", to_orig(p));
228 ret = comm_call(ci->comm2, "callback:render", ci->focus,
234 DEF_CMD(email_select)
236 /* If mark is on a button, press it... */
237 struct mark *m = ci->mark;
238 char *a, *e, *cmd = NULL;
245 p = get_part(ci->home, m);
248 ch = doc_following(ci->home, m);
249 if (ch == WEOF || !isdigit(ch))
252 a = pane_mark_attr(ci->focus, m, "multipart-prev:email:actions");
266 asprintf(&cmd, "email:select:%.*s", (int)(e-a), a);
269 return call(cmd, ci->focus, 0, m);
272 DEF_CMD(email_select_hide)
276 struct mark *m = ci->mark;
281 a = pane_mark_attr(ci->focus, m, "email:visible");
282 if (a && strcmp(a, "none") == 0)
284 call("doc:set-attr", ci->focus, 1, m, "email:visible", 0, NULL,
285 vis ? "none" : "preferred");
289 static struct map *email_view_map safe;
291 DEF_LOOKUP_CMD(email_view_handle, email_view_map);
293 static char tspecials[] = "()<>@,;:\\\"/[]?=";
295 static int lws(char c) {
296 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
299 static char *get_822_token(char **hdrp safe, int *len safe)
301 /* A "token" is one of:
303 * - single char from tspecials (except '(' or '"')
304 * - string of non-LWS, and non-tspecials
306 * (comments) are skipped.
307 * Start is returned, hdrp is moved, len is reported.
318 while (*hdr && *hdr != ')')
324 while (*hdr && *hdr != '"')
335 if (strchr(tspecials, *hdr)) {
343 while (*hdr && !lws(*hdr) && !strchr(tspecials, *hdr))
351 static char *get_822_attr(char *shdr safe, char *attr safe)
353 /* If 'hdr' contains "$attr=...", return "..."
354 * with "quotes" stripped. Return value can be used
355 * until the next call, when it will be free.
360 static char *last = NULL;
367 while ((h = get_822_token(&hdr, &len)) != NULL &&
368 (len != alen || strncasecmp(h, attr, alen) != 0))
370 h = get_822_token(&hdr, &len);
371 if (!h || len != 1 || *h != '=')
373 h = get_822_token(&hdr, &len);
376 last = strndup(h, len);
382 static char *get_822_word(char *hdr safe)
384 /* Get the first word from header, in static
385 * space (freed on next call)
387 static char *last = NULL;
393 h = get_822_token(&hdr, &len);
396 last = strndup(h, len);
400 static bool tok_matches(char *tok, int len, char *match safe)
404 if (len != (int)strlen(match))
406 return strncasecmp(tok, match, len) == 0;
409 static bool handle_text(struct pane *p safe, char *type, char *xfer,
410 struct mark *start safe, struct mark *end safe,
411 struct pane *mp safe, struct pane *spacer safe,
414 struct pane *h, *transformed = NULL;
415 int need_charset = 0;
416 char *charset = NULL;
417 char *major, *minor = NULL;
422 h = call_ret(pane, "attach-crop", p, 0, start, NULL, 0, end);
428 xfer = get_822_token(&xfer, &xlen);
429 if (xfer && xlen == 16 &&
430 strncasecmp(xfer, "quoted-printable", 16) == 0) {
431 struct pane *hx = call_ret(pane,
432 "attach-quoted_printable",
439 if (xfer && xlen == 6 &&
440 strncasecmp(xfer, "base64", 6) == 0) {
441 struct pane *hx = call_ret(pane, "attach-base64", h);
448 if (type && need_charset &&
449 (charset = get_822_attr(type, "charset")) != NULL) {
451 struct pane *hx = NULL;
452 charset = strsave(h, charset);
453 asprintf(&c, "attach-charset-%s", charset);
454 for (cp = c; cp && *cp; cp++)
458 hx = call_ret(pane, c, h);
461 /* windows-1251 is safer than utf-8 as the latter
462 * rejects some byte sequences, and iso-8859-* has
463 * lots of control characters.
465 hx = call_ret(pane, "attach-charset-windows-1251", h);
469 if (type && (fname = get_822_attr(type, "name")))
470 fname = strsave(h, fname);
471 major = get_822_token(&type, &majlen);
473 minor = get_822_token(&type, &minlen);
474 if (minor && tok_matches(minor, minlen, "/"))
475 minor = get_822_token(&type, &minlen);
480 asprintf(&ctype, "%1.*s/%1.*s", majlen, major, minlen, minor);
482 asprintf(&ctype, "%1.*s", majlen, major);
483 if (ctype && strcmp(ctype, "text/html") == 0)
484 transformed = call_ret(pane, "html-to-text", h);
485 if (ctype && strcmp(ctype, "application/pdf") == 0)
486 transformed = call_ret(pane, "pdf-to-text", h);
487 if (ctype && strcmp(ctype, "application/octet-stream") == 0 &&
488 fname && (strstr(fname, ".pdf") == NULL ||
489 strstr(fname, ".PDF") == NULL))
490 transformed = call_ret(pane, "pdf-to-text", h);
491 if (ctype && strncmp(ctype, "image/", 6) == 0) {
493 transformed = call_ret(pane, "doc:from-text", h,
494 0, NULL, NULL, 0, NULL, "\n");
496 m = vmark_new(transformed, MARK_UNGROUPED, NULL);
497 call("doc:set-ref", transformed, 1, m);
498 call("doc:set-attr", transformed, 1, m, "markup:func", 0,
499 NULL, "doc:email:render-image");
504 attr_set_str(&transformed->attrs, "email:is_transformed", "yes");
505 attr_set_str(&transformed->attrs, "email:preferred", "transformed");
507 transformed = call_ret(pane, "doc:from-text", h,
508 0, NULL, NULL, 0, NULL, "\n");
510 attr_set_str(&transformed->attrs, "email:preferred",
517 call("doc:set:autoclose", transformed, 1);
520 for (i = 0; ctype[i]; i++)
521 if (isupper(ctype[i]))
522 ctype[i] = tolower(ctype[i]);
523 attr_set_str(&transformed->attrs, "email:content-type", ctype);
526 attr_set_str(&h->attrs, "email:content-type", "text/plain");
527 attr_set_str(&transformed->attrs, "email:actions", "hide:save");
528 attr_set_str(&transformed->attrs, "email:path", path);
529 attr_set_str(&transformed->attrs, "email:which", "transformed");
531 attr_set_str(&transformed->attrs, "email:charset", charset);
533 attr_set_str(&transformed->attrs, "email:filename", fname);
534 attr_set_str(&h->attrs, "email:which", "orig");
536 home_call(mp, "multipart-add", h);
537 home_call(mp, "multipart-add", transformed);
538 home_call(mp, "multipart-add", spacer);
542 /* Find a multipart boundary between start and end, moving
543 * 'start' to after the boundary, and 'pos' to just before it.
544 * Return 0 if a non-terminal boundary is found
545 * Return 1 if a terminal boundary is found (trailing --)
546 * Return -1 if nothing is found.
548 #define is_lws(c) ({int __c2 = c; __c2 == ' ' || __c2 == '\t' || is_eol(__c2); })
549 static int find_boundary(struct pane *p safe,
550 struct mark *start safe, struct mark *end safe,
556 int len = strlen(boundary);
558 asprintf(&patn, "^--(?%d:%s)(--)?[ \\t\\r]*$", len, boundary);
559 ret = call("text-search", p, 0, start, patn, 0, end);
565 mark_to_mark(pos, start);
566 while (cnt > 0 && doc_prev(p, pos) != WEOF)
568 /* Previous char is CRLF, and must be swallowed */
569 if (doc_prior(p, pos) == '\n')
571 if (doc_prior(p, pos) == '\r')
574 while (is_lws(doc_prior(p, start))) {
578 while (is_lws(doc_following(p, start)))
582 if (ret == 2 + len + 2)
587 static bool handle_multipart(struct pane *p safe, char *type safe,
588 struct mark *start safe, struct mark *end safe,
589 struct pane *mp safe, struct pane *spacer safe,
592 char *boundary = get_822_attr(type, "boundary");
594 struct mark *pos, *part_end;
601 /* FIXME need a way to say "just display the text" */
604 found_end = find_boundary (p, start, end, NULL, boundary);
607 tok = get_822_token(&type, &len);
609 tok = get_822_token(&type, &len);
610 if (tok && tok[0] == '/')
611 tok = get_822_token(&type, &len);
613 boundary = strdup(boundary);
614 pos = mark_dup(start);
615 part_end = mark_dup(pos);
616 while (found_end == 0 &&
617 (found_end = find_boundary(p, pos, end, part_end,
619 struct pane *hdr = call_ret(pane, "attach-rfc822header", p,
626 call("get-header", hdr, 0, NULL, "content-type",
628 call("get-header", hdr, 0, NULL, "content-transfer-encoding",
630 ptype = attr_find(hdr->attrs, "rfc822-content-type");
631 pxfer = attr_find(hdr->attrs,
632 "rfc822-content-transfer-encoding");
637 asprintf(&newpath, "%s%s%1.*s:%d", path, path[0] ? ",":"",
641 handle_content(p, ptype, pxfer, start, part_end, mp, spacer,
644 mark_to_mark(start, pos);
646 mark_to_mark(start, pos);
653 static bool handle_content(struct pane *p safe, char *type, char *xfer,
654 struct mark *start safe, struct mark *end safe,
655 struct pane *mp safe, struct pane *spacer safe,
659 char *major, *minor = NULL;
666 major = get_822_token(&hdr, &mjlen);
668 minor = get_822_token(&hdr, &mnlen);
669 if (minor && minor[0] == '/')
670 minor = get_822_token(&hdr, &mnlen);
673 tok_matches(major, mjlen, "text"))
674 return handle_text(p, type, xfer, start, end,
677 if (tok_matches(major, mjlen, "multipart"))
678 return handle_multipart(p, type, start, end, mp, spacer, path);
680 /* default to plain text until we get a better default */
681 return handle_text(p, type, xfer, start, end, mp, spacer, path);
687 struct email_info *ei;
688 struct mark *start, *end;
691 char *xfer = NULL, *type = NULL;
695 if (ci->str == NULL ||
696 strncmp(ci->str, "email:", 6) != 0)
698 fd = open(ci->str+6, O_RDONLY);
699 p = call_ret(pane, "doc:open", ci->focus, fd, NULL, ci->str + 6, 1);
702 start = vmark_new(p, MARK_UNGROUPED, NULL);
705 end = mark_dup(start);
706 call("doc:set-ref", p, 0, end);
710 h2 = call_ret(pane, "attach-rfc822header", p, 0, start, NULL, 0, end);
713 attr_set_str(&h2->attrs, "email:which", "orig");
714 p = call_ret(pane, "doc:from-text", p, 0, NULL, NULL, 0, NULL,
720 attr_set_str(&p->attrs, "email:which", "spacer");
722 point = vmark_new(p, MARK_POINT, NULL);
723 call("doc:set-ref", p, 1, point);
724 call("doc:set-attr", p, 1, point, "markup:func", 0,
725 NULL, "doc:email:render-spacer");
728 hdrdoc = call_ret(pane, "attach-doc-text", ci->focus);
731 call("doc:set:autoclose", hdrdoc, 1);
732 point = vmark_new(hdrdoc, MARK_POINT, NULL);
736 /* copy some headers to the header temp document */
737 home_call(h2, "get-header", hdrdoc, 0, point, "From");
738 home_call(h2, "get-header", hdrdoc, 0, point, "Date");
739 home_call(h2, "get-header", hdrdoc, 0, point, "Subject", 0, NULL, "text");
740 home_call(h2, "get-header", hdrdoc, 0, point, "To", 0, NULL, "list");
741 home_call(h2, "get-header", hdrdoc, 0, point, "Cc", 0, NULL, "list");
743 /* copy some headers into attributes for later analysis */
744 call("get-header", h2, 0, NULL, "MIME-Version", 0, NULL, "cmd");
745 call("get-header", h2, 0, NULL, "content-type", 0, NULL, "cmd");
746 call("get-header", h2, 0, NULL, "content-transfer-encoding",
748 mime = attr_find(h2->attrs, "rfc822-mime-version");
750 mime = get_822_word(mime);
751 if (mime && strcmp(mime, "1.0") == 0) {
752 type = attr_find(h2->attrs, "rfc822-content-type");
753 xfer = attr_find(h2->attrs, "rfc822-content-transfer-encoding");
756 p = call_ret(pane, "attach-doc-multipart", ci->home);
759 call("doc:set:autoclose", p, 1);
760 attr_set_str(&hdrdoc->attrs, "email:actions", "hide");
761 attr_set_str(&hdrdoc->attrs, "email:which", "transformed");
762 attr_set_str(&hdrdoc->attrs, "email:content-type", "text/rfc822-headers");
763 attr_set_str(&hdrdoc->attrs, "email:path", "headers");
764 attr_set_str(&hdrdoc->attrs, "email:is_transformed", "yes");
765 home_call(p, "multipart-add", h2);
766 home_call(p, "multipart-add", hdrdoc);
767 home_call(p, "multipart-add", ei->spacer);
769 if (!handle_content(ei->email, type, xfer, start, end,
770 p, ei->spacer, "body"))
775 attr_set_str(&p->attrs, "render-default", "text");
776 attr_set_str(&p->attrs, "filename", ci->str+6);
777 attr_set_str(&p->attrs, "doc-type", "email");
778 return comm_call(ci->comm2, "callback:attach", p);
793 DEF_CMD(email_view_free)
795 struct email_view *evi = ci->home->data;
802 static int count_buttons(struct pane *p safe, struct mark *m safe)
805 char *attr = pane_mark_attr(p, m, "multipart-prev:email:actions");
810 attr = strchr(attr, ':');
819 struct pane *p = ci->home;
820 struct email_view *evi = p->data;
827 ret = home_call(p->parent, ci->key, ci->focus,
828 ci->num, ci->mark, evi->invis,
830 n = get_part(p->parent, ci->mark);
831 if (ci->num2 && is_spacer(n)) {
832 /* Moving in a spacer, If after valid buttons,
836 unsigned int buttons;
837 buttons = count_buttons(p, ci->mark);
838 while ((c = doc_following(p->parent, ci->mark)) != WEOF
839 && iswdigit(c) && (c - '0') >= buttons)
840 doc_next(p->parent, ci->mark);
843 ret = home_call(p->parent, ci->key, ci->focus,
844 ci->num, ci->mark, evi->invis, ci->num2);
845 n = get_part(p->parent, ci->mark);
846 if (is_spacer(n) && ci->num2 &&
847 ret != CHAR_RET(WEOF) && iswdigit(ret & 0x1fffff)) {
848 /* Just stepped back over the 9 at the end of a spacer,
849 * Maybe step further if there aren't 10 buttons.
851 unsigned int buttons = count_buttons(p, ci->mark);
852 wint_t c = ret & 0x1fffff;
854 while (c != WEOF && iswdigit(c) && c - '0' >= buttons)
855 c = doc_prev(p->parent, ci->mark);
862 DEF_CMD(email_content)
864 /* Call the multipart doc:content telling in
865 * what is invisible, marking all spacers as invisible
867 struct pane *p = ci->home;
868 struct email_view *evi = p->data;
869 char *invis2 = strsave(p, evi->invis);
872 for (i = 0; invis2 && invis2[i]; i++)
875 return home_call(p->parent, ci->key, p,
876 ci->num, ci->mark, invis2,
877 ci->num2, ci->mark2, ci->str2,
878 ci->x, ci->y, ci->comm2);
881 DEF_CMD(email_set_ref)
883 struct pane *p = ci->home;
884 struct email_view *evi = p->data;
888 home_call(p->parent, ci->key, ci->focus, ci->num, ci->mark, evi->invis);
892 DEF_CMD(email_view_get_attr)
896 struct email_view *evi = ci->home->data;
898 if (!ci->str || !ci->mark)
900 if (strcmp(ci->str, "email:visible") == 0) {
901 p = get_part(ci->home->parent, ci->mark);
902 /* only parts can be invisible, not separators */
904 if (p < 0 || p >= evi->parts)
906 else if (evi->invis[p] != 'i')
908 else if (evi->invis[p+1] != 'i')
913 return comm_call(ci->comm2, "callback", ci->focus, 0, ci->mark,
914 v, 0, NULL, ci->str);
919 DEF_CMD(email_view_set_attr)
922 struct email_view *evi = ci->home->data;
924 if (!ci->str || !ci->mark)
926 if (strcmp(ci->str, "email:visible") == 0) {
927 struct mark *m1, *m2;
930 p = get_part(ci->home->parent, ci->mark);
931 /* only parts can be invisible, not separators */
933 if (p < 0 || p >= evi->parts)
936 m1 = mark_dup(ci->mark);
937 while (get_part(ci->home->parent, m1) > p &&
938 home_call(ci->home->parent, "doc:step-part",
939 ci->focus, -1, m1) > 0)
943 if (w && strcmp(w, "preferred") == 0) {
944 w = pane_mark_attr(ci->focus, m1,
945 "multipart-next:email:preferred");
948 } else if (w && (strcmp(w, "orig") == 0 ||
949 strcmp(w, "transformed") == 0)) {
950 call("doc:set-attr", ci->focus, 1, m1,
951 "multipart-next:email:preferred", 0, NULL, w);
954 evi->invis[p+1] = 'i';
955 if (w && strcmp(w, "orig") == 0)
957 if (w && strcmp(w, "transformed") == 0)
958 evi->invis[p+1] = 'v';
960 /* Tell viewers that visibility has changed */
963 home_call(ci->home->parent, "doc:step-part", ci->focus,
965 home_call(ci->home->parent, "doc:step-part", ci->focus,
967 call("view:changed", ci->focus, 0, m1, NULL, 0, m2);
968 call("Notify:clip", ci->focus, 0, m1, NULL, 0, m2);
977 DEF_CMD(attach_email_view)
980 struct email_view *evi;
984 m = vmark_new(ci->focus, MARK_UNGROUPED, NULL);
987 call("doc:set-ref", ci->focus, 0, m);
988 n = get_part(ci->focus, m);
990 if (n <= 0 || n > 1000 )
995 evi->invis = calloc(n+1, sizeof(char));
996 memset(evi->invis, 'v', n);
997 p = pane_register(ci->focus, 0, &email_view_handle.c, evi);
1002 attr_set_str(&p->attrs, "render-hide-CR", "yes");
1003 return comm_call(ci->comm2, "callback:attach", p);
1006 static void email_init_map(void)
1008 email_view_map = key_alloc();
1009 key_add(email_view_map, "Free", &email_view_free);
1010 key_add(email_view_map, "doc:step", &email_step);
1011 key_add(email_view_map, "doc:content", &email_content);
1012 key_add(email_view_map, "doc:set-ref", &email_set_ref);
1013 key_add(email_view_map, "doc:set-attr", &email_view_set_attr);
1014 key_add(email_view_map, "doc:get-attr", &email_view_get_attr);
1015 key_add(email_view_map, "doc:email:render-spacer", &email_spacer);
1016 key_add(email_view_map, "doc:email:render-image", &email_image);
1017 key_add(email_view_map, "doc:email:select", &email_select);
1018 key_add(email_view_map, "email:select:hide", &email_select_hide);
1021 void edlib_init(struct pane *ed safe)
1024 call_comm("global-set-command", ed, &open_email, 0, NULL,
1026 call_comm("global-set-command", ed, &attach_email_view, 0, NULL,
1027 "attach-email-view");
1029 call("global-load-module", ed, 0, NULL, "lib-html-to-text");
1030 call("global-load-module", ed, 0, NULL, "lib-pdf-to-text");