2 * Copyright Neil Brown ©2015-2023 <neil@brown.name>
3 * May be distributed under terms of GPLv2 - see file:COPYING
6 * "text-search" command searches from given mark until it
7 * finds the given pattern or end of buffer.
8 * If the pattern is found, then 'm' is left at the extremity of
9 * the match in the direction of search: so the start if search backwards
10 * or the end if searching forwards.
11 * The returned value is the length of the match + 1, or an Efail
12 * In the case of an error, the location of ->mark is undefined.
13 * If mark2 is given, don't go beyond there.
15 * "text-match" is similar to text-search forwards, but requires that
16 * the match starts at ->mark. ->mark is moved to the end of the
17 * match if the text does, in fact, match.
18 * If the match fails, Efalse is returned (different to "text-search")
28 struct match_state *st safe;
39 unsigned short *rxl safe;
42 static void state_free(struct command *c safe)
44 struct search_state *ss = container_of(c, struct search_state, c);
47 rxl_free_state(ss->st);
49 mark_free(ss->endmark);
54 static int is_word(wint_t ch)
56 return ch == '_' || iswalnum(ch);
60 * 'search_test' together with 'stuct search_state' encapsulates
61 * a parsed regexp and some matching state. If called as 'consume'
62 * (or anything starting 'c') it processes one char into the match
63 * and returns 1 if it is worth providing more characters.
64 * Other options for ci->key are:
65 * - reinit - state is re-initialised with flags from ->num, end and
66 * endmark from ->mark and ->mark2
67 * - getinfo - extract total, start, len, since-start from match
68 * - getcapture - get "start" or "len" for a capture in ->num
69 * - interp - interpolate \N captures in ->str
73 struct search_state *ss = container_of(ci->comm,
74 struct search_state, c);
76 if (ci->key[0] == 'c') {
78 wint_t wch = ci->num & 0xFFFFF;
80 int maxlen, since_start;
84 if ((unsigned int)ci->num == WEOF) {
88 if (ss->prev_ch == WEOF)
90 if (is_eol(ss->prev_ch) || ss->prev_ch == WEOF ||
93 switch (is_word(ss->prev_ch) * 2 + is_word(wch)) {
94 case 0: /* in space */
95 case 3: /* within word */
98 case 1: /* start of word */
101 case 2: /* end of word */
107 if (ss->prev_point) {
109 ss->prev_point = False;
111 if (ss->point && ci->mark && mark_same(ss->point, ci->mark))
112 /* Need to assert POINT before next char */
113 ss->prev_point = True;
115 found = rxl_advance(ss->st, wch | flags);
116 anchored = rxl_info(ss->st, &maxlen, NULL, NULL, &since_start);
118 if (found >= RXL_MATCH && ss->endmark && ci->mark &&
119 since_start - maxlen <= 1) {
120 mark_to_mark(ss->endmark, ci->mark);
121 if (found == RXL_MATCH_FLAG)
122 doc_prev(ci->home, ss->endmark);
124 if (ss->end && ci->mark &&
125 mark_ordered_not_same(ss->end, ci->mark)) {
126 /* Mark is *after* the char, so if end and mark
127 * are the same, we haven't passed the 'end' yet,
128 * and it is too early to abort. Hence 'not' above
130 if (ss->anchor_at_end) {
131 found = rxl_advance(ss->st, RXL_ANCHOR);
136 if (found == RXL_DONE)
139 if (!anchored && ci->str &&
140 ss->prefix_len && ci->num2 > ss->prefix_len) {
141 /* It is worth searching for the prefix to improve
144 int pstart = rxl_fast_match(ss->prefix, ss->prefix_len,
146 /* This may not be a full match even for the prefix,
147 * but it is a good place to skip to.
148 * If there was no match, pstart is ci->num2,
149 * so we skip the entire chunk.
150 * We reposition to just before the possible match
151 * so that ->end processesing is handled before the
155 pstart = utf8_round_len(ci->str, pstart - 1);
158 int prev = utf8_round_len(ci->str, pstart - 1);
160 ss->prev_ch = get_utf8(&s, NULL);
167 if (strcmp(ci->key, "reinit") == 0) {
168 rxl_free_state(ss->st);
169 ss->st = rxl_prepare(ss->rxl, ci->num & 3);
170 ss->prev_ch = (unsigned int)ci->num2 ?: WEOF;
172 mark_free(ss->endmark);
174 ss->end = mark_dup(ci->mark);
175 ss->anchor_at_end = ci->num & 4;
179 ss->endmark = mark_dup(ci->mark2);
184 if (strcmp(ci->key, "setpoint") == 0 && ci->mark) {
185 mark_free(ss->point);
186 ss->point = mark_dup(ci->mark);
189 if (strcmp(ci->key, "getinfo") == 0 && ci->str) {
190 int len, total, start, since_start;
191 rxl_info(ss->st, &len, &total, &start, &since_start);
192 if (strcmp(ci->str, "len") == 0)
193 return len < 0 ? Efalse : len+1;
194 if (strcmp(ci->str, "total") == 0)
196 if (strcmp(ci->str, "start") == 0)
197 return start < 0 ? Efalse : start + 1;
198 if (strcmp(ci->str, "since-start") == 0)
199 return since_start < 0 ? Efalse : since_start + 1;
202 if (strcmp(ci->key, "getcapture") == 0 && ci->str) {
204 if (rxl_capture(ss->st, ci->num, ci->num2, &start, &len)) {
205 if (strcmp(ci->str, "start") == 0)
207 if (strcmp(ci->str, "len") == 0)
213 if (strcmp(ci->key, "interp") == 0 && ci->str) {
215 ret = rxl_interp(ss->st, ci->str);
216 comm_call(ci->comm2, "cb", ci->focus, 0, NULL, ret);
220 if (strcmp(ci->key, "forward") == 0) {
221 /* Search forward from @mark in @focus for a match, or
222 * until we hit @mark2. Leave @mark at the end of the
223 * match unless ->endmark was set in which case leave that
227 struct mark *m2 = ci->mark2;
228 struct pane *p = ci->focus;
233 if (m2 && ci->mark->seq >= m2->seq)
236 m = mark_dup(ci->mark); /* search cursor */
237 ss->st = rxl_prepare(ss->rxl, ci->num & 1 ? RXLF_ANCHORED : 0);
238 ss->anchor_at_end = False;
239 ss->prev_ch = doc_prior(p, m);
240 ss->prev_point = ss->point ? mark_same(ss->point, m) : False;
241 call_comm("doc:content", p, &ss->c, 0, m, NULL, 0, m2);
242 rxl_info(ss->st, &maxlen, NULL, NULL, NULL);
243 rxl_free_state(ss->st);
247 if (strcmp(ci->key, "reverse") == 0) {
248 /* Search backward from @mark in @focus for a match, or
249 * until we hit @mark2. Leave @mark at the start of the
250 * match. Return length of match, or negative.
252 * rexel only lets us search forwards, and stepping back one
253 * char at a time to match the pattern is too slow.
254 * So we step back a steadily growing number of chars and search
255 * forward as far as the previous location. Once we
256 * find any match, we check if there is a later one that
259 int step_size = 65536;
262 struct mark *m, *start, *end, *endmark;
263 struct mark *m2 = ci->mark2;
264 struct pane *p = ci->focus;
268 m = mark_dup(ci->mark); /* search cursor */
269 start = mark_dup(ci->mark); /* start of the range being searched */
270 end = mark_dup(ci->mark); /* end of the range being searched */
274 ss->endmark = ci->mark;
275 endmark = ss->endmark;
276 ss->anchor_at_end = True;
280 while (!m2 || m2->seq < start->seq) {
281 mark_to_mark(end, start);
282 call("doc:char", p, -step_size, start, NULL, 0, m2);
283 if (mark_same(start, end))
284 /* We have hit the start(m2), don't continue */
287 ss->prev_ch = doc_prior(p, start);
288 ss->st = rxl_prepare(ss->rxl, 0);
289 ss->prev_point = ss->point ? mark_same(ss->point, m) : False;
291 mark_to_mark(m, start);
292 call_comm("doc:content", p, &ss->c, 0, m);
293 rxl_info(ss->st, &maxlen, NULL, NULL, NULL);
294 rxl_free_state(ss->st);
301 if (pane_too_long(p, 2000)) {
302 /* FIXME returning success is wrong if
303 * we timed out But I want to move the
304 * point, and this is easiest. What do
305 * I really want here? Do I just need
306 * to make reverse search faster?
308 mark_to_mark(endmark, start);
313 while (maxlen >= 0) {
314 /* There is a match starting at 'endmark'.
315 * The might be a later match - check for it.
317 call("doc:char", p, -maxlen, ss->endmark);
318 if (mark_ordered_not_same(end, ss->endmark))
321 if (endmark != ss->endmark &&
322 mark_ordered_or_same(ss->endmark, endmark))
323 /* Didn't move forward!! Presumably
324 * buggy doc:step implementation.
328 mark_to_mark(endmark, ss->endmark);
330 mark_to_mark(start, endmark);
331 ss->prev_ch = doc_next(p, start);
332 ss->st = rxl_prepare(ss->rxl, 0);
333 call_comm("doc:content", p, &ss->c, 0, start);
334 rxl_info(ss->st, &maxlen, NULL, NULL, NULL);
335 rxl_free_state(ss->st);
345 static int search_forward(struct pane *p safe,
346 struct mark *m safe, struct mark *m2,
348 unsigned short *rxl safe,
349 struct mark *endmark, bool anchored)
351 /* Search forward from @m in @p for @rxl looking as far as @m2,
352 * and leaving @endmark at the end point, and returning the
353 * length of the match, or -1.
355 struct search_state ss;
358 if (m2 && m->seq >= m2->seq)
361 ss.prefix_len = rxl_prefix(rxl, ss.prefix, sizeof(ss.prefix));
363 ss.endmark = endmark;
366 return comm_call(&ss.c, "forward", p, anchored, m, NULL, 0, m2);
370 static int search_backward(struct pane *p safe,
371 struct mark *m safe, struct mark *m2,
373 unsigned short *rxl safe,
374 struct mark *endmark safe)
376 /* Search backward from @m in @p for a match of @s. The match
377 * must start at or before m, but may finish later. Only search
378 * as far as @m2 (if set), and leave endmark pointing at the
379 * start of the match, if one is found.
380 * Return length of match, or negative.
382 * rexel only lets us search forwards, and stepping back
383 * one char at a time to match the pattern is too slow.
384 * So we step back a steadily growing number of
385 * chars, and search forward as pfar as the previous location.
386 * Once we find any match, we check if there is a later one
387 * that still satisfies.
389 struct search_state ss;
391 if (m2 && m->seq <= m2->seq)
395 ss.st = rxl_prepare(rxl, 0);
396 ss.prefix_len = rxl_prefix(rxl, ss.prefix, sizeof(ss.prefix));
398 ss.endmark = endmark;
400 ss.prev_point = point ? mark_same(point, m) : False;
402 return comm_call(&ss.c, "reverse", p, 0, m, NULL, 0, m2);
407 struct mark *m, *endmark = NULL;
415 rxl = rxl_parse(ci->str, NULL, ci->num);
423 endmark = mark_dup(m);
424 point = call_ret(mark, "doc:point", ci->focus);
427 if (strcmp(ci->key, "text-match") == 0)
428 since_start = search_forward(ci->focus, m, ci->mark2,
429 point, rxl, endmark, True);
431 since_start = search_backward(ci->focus, m, ci->mark2,
432 point, rxl, endmark);
434 since_start = search_forward(ci->focus, m, ci->mark2,
435 point, rxl, endmark, False);
437 if (since_start >= 0)
438 mark_to_mark(m, endmark);
440 if (since_start < 0) {
441 if (strcmp(ci->key, "text-match") == 0)
442 ret = Efalse; /* non-fatal */
446 ret = since_start + 1;
447 } else if (ci->str2) {
448 struct match_state *st = rxl_prepare(
449 rxl, strcmp(ci->key, "text-match") == 0 ? RXLF_ANCHORED : 0);
450 int flags = RXL_SOL|RXL_SOD;
451 const char *t = ci->str2;
452 int thelen = -1, start = 0;
454 wint_t prev_ch = WEOF;
457 wint_t wc = get_utf8(&t, NULL);
458 if (wc >= WERR|| (ci->num2 > 0 && t > ci->str2 + ci->num2)) {
459 rxl_advance(st, RXL_EOL|RXL_EOD);
462 switch (is_word(prev_ch) * 2 + is_word(wc)) {
463 case 0: /* in space */
464 case 3: /* within word */
467 case 1: /* start of word */
470 case 2: /* end of word */
476 if (prev_ch == WEOF || is_eol(prev_ch))
479 r = rxl_advance(st, wc | flags);
481 if (r >= RXL_MATCH) {
482 /* "start" is in chars, not bytes, so we cannot.
483 * use it. Need since_start and then count
486 rxl_info(st, &thelen, NULL, NULL, &since_start);
487 start = t - ci->str2;
488 while (since_start > 0) {
489 start = utf8_round_len(ci->str2, start-1);
493 } while (r != RXL_DONE);
497 else if (strcmp(ci->key, "text-match") == 0)
510 struct search_state *ss;
516 rxl = rxl_parse(ci->str, NULL, ci->num2);
519 ss = calloc(1, sizeof(*ss));
521 ss->prefix_len = rxl_prefix(rxl, ss->prefix, sizeof(ss->prefix));
523 ss->c.free = state_free;
525 comm_call(&ss->c, "reinit", ci->focus,
526 ci->num, ci->mark, NULL, 0, ci->mark2);
527 comm_call(ci->comm2, "cb", ci->focus,
529 0, NULL, NULL, 0,0, &ss->c);
536 const char *text safe;
542 struct texteql *te = container_of(ci->comm, struct texteql, c);
548 have = ci->num & 0xFFFFF;
549 want = get_utf8(&te->text, NULL);
553 i < ci->num2 && ci->str;
555 if (!te->text[i] || te->text[i] != ci->str[i])
560 if (ci->str && i < ci->num2)
570 if (!ci->str || !ci->mark)
576 call_comm("doc:content", ci->focus, &te.c, 0, ci->mark);
577 return te.matched ? 1 : Efalse;
580 void edlib_init(struct pane *ed safe)
582 call_comm("global-set-command", ed, &text_search, 0, NULL,
584 call_comm("global-set-command", ed, &text_search, 0, NULL,
586 call_comm("global-set-command", ed, &make_search, 0, NULL,
588 call_comm("global-set-command", ed, &text_equals, 0, NULL,