2 * Copyright Neil Brown ©2015-2023 <neil@brown.name>
3 * May be distributed under terms of GPLv2 - see file:COPYING
6 * "text-search" command searches from given mark until it
7 * finds the given pattern or end of buffer.
8 * If the pattern is found, then 'm' is left at the extremity of
9 * the match in the direction of search: so the start if search backwards
10 * or the end if searching forwards.
11 * The returned value is the length of the match + 1, or an Efail
12 * In the case of an error, the location of ->mark is undefined.
13 * If mark2 is given, don't go beyond there.
15 * "text-match" is similar to text-search forwards, but requires that
16 * the match starts at ->mark. ->mark is moved to the end of the
17 * match if the text does, in fact, match.
18 * If the match fails, Efalse is returned (different to "text-search")
28 struct match_state *st safe;
39 unsigned short *rxl safe;
42 static void state_free(struct command *c safe)
44 struct search_state *ss = container_of(c, struct search_state, c);
47 rxl_free_state(ss->st);
49 mark_free(ss->endmark);
54 static int is_word(wint_t ch)
56 return ch == '_' || iswalnum(ch);
60 * 'search_test' together with 'stuct search_state' encapsulates
61 * a parsed regexp and some matching state. If called as 'consume'
62 * (or anything starting 'c') it processes one char into the match
63 * and returns 1 if it is worth providing more characters.
64 * Other options for ci->key are:
65 * - reinit - state is re-initialised with flags from ->num, end and
66 * endmark from ->mark and ->mark2
67 * - getinfo - extract total, start, len, since-start from match
68 * - getcapture - get "start" or "len" for a capture in ->num
69 * - interp - interpolate \N captures in ->str
73 struct search_state *ss = container_of(ci->comm,
74 struct search_state, c);
76 if (ci->key[0] == 'c') {
78 wint_t wch = ci->num & 0xFFFFF;
80 int maxlen, since_start;
84 if ((unsigned int)ci->num == WEOF) {
88 if (ss->prev_ch == WEOF)
90 if (is_eol(ss->prev_ch) || ss->prev_ch == WEOF ||
93 switch (is_word(ss->prev_ch) * 2 + is_word(wch)) {
94 case 0: /* in space */
95 case 3: /* within word */
98 case 1: /* start of word */
101 case 2: /* end of word */
107 if (ss->prev_point) {
109 ss->prev_point = False;
111 if (ss->point && ci->mark && mark_same(ss->point, ci->mark))
112 /* Need to assert POINT before next char */
113 ss->prev_point = True;
115 found = rxl_advance(ss->st, wch | flags);
116 anchored = rxl_info(ss->st, &maxlen, NULL, NULL, &since_start);
118 if (found >= RXL_MATCH && ss->endmark && ci->mark &&
119 since_start - maxlen <= 1) {
120 mark_to_mark(ss->endmark, ci->mark);
121 if (found == RXL_MATCH_FLAG)
122 doc_prev(ci->home, ss->endmark);
124 if (ss->end && ci->mark &&
125 mark_ordered_not_same(ss->end, ci->mark)) {
126 /* Mark is *after* the char, so if end and mark
127 * are the same, we haven't passed the 'end' yet,
128 * and it is too early to abort. Hence 'not' above
130 if (ss->anchor_at_end) {
131 found = rxl_advance(ss->st, RXL_ANCHOR);
136 if (found == RXL_DONE)
139 if (!anchored && ci->str &&
140 ss->prefix_len && ci->num2 > ss->prefix_len) {
141 /* It is worth searching for the prefix to improve
144 int pstart = rxl_fast_match(ss->prefix, ss->prefix_len,
146 /* This may not be a full match even for the prefix,
147 * but it is a good place to skip to.
148 * If there was no match, pstart is ci->num2,
149 * so we skip the entire chunk.
150 * We reposition to just before the possible match
151 * so that ->end processesing is handled before the
155 pstart = utf8_round_len(ci->str, pstart - 1);
158 int prev = utf8_round_len(ci->str, pstart - 1);
160 ss->prev_ch = get_utf8(&s, NULL);
167 if (strcmp(ci->key, "reinit") == 0) {
168 rxl_free_state(ss->st);
169 ss->st = rxl_prepare(ss->rxl, ci->num & 3);
170 ss->prev_ch = (unsigned int)ci->num2 ?: WEOF;
172 mark_free(ss->endmark);
174 ss->end = mark_dup(ci->mark);
175 ss->anchor_at_end = ci->num & 4;
179 ss->endmark = mark_dup(ci->mark2);
184 if (strcmp(ci->key, "setpoint") == 0 && ci->mark) {
185 mark_free(ss->point);
186 ss->point = mark_dup(ci->mark);
189 if (strcmp(ci->key, "getinfo") == 0 && ci->str) {
190 int len, total, start, since_start;
191 rxl_info(ss->st, &len, &total, &start, &since_start);
192 if (strcmp(ci->str, "len") == 0)
193 return len < 0 ? Efalse : len+1;
194 if (strcmp(ci->str, "total") == 0)
196 if (strcmp(ci->str, "start") == 0)
197 return start < 0 ? Efalse : start + 1;
198 if (strcmp(ci->str, "since-start") == 0)
199 return since_start < 0 ? Efalse : since_start + 1;
202 if (strcmp(ci->key, "getcapture") == 0 && ci->str) {
204 if (rxl_capture(ss->st, ci->num, ci->num2, &start, &len)) {
205 if (strcmp(ci->str, "start") == 0)
207 if (strcmp(ci->str, "len") == 0)
213 if (strcmp(ci->key, "interp") == 0 && ci->str) {
215 ret = rxl_interp(ss->st, ci->str);
216 comm_call(ci->comm2, "cb", ci->focus, 0, NULL, ret);
220 if (strcmp(ci->key, "reverse") == 0) {
221 /* Search backward from @mark in @focus for a match, or
222 * until we hit @mark2. Leave @mark at the start of the
223 * match. Return length of match, or negative.
225 * rexel only lets us search forwards, and stepping back one
226 * char at a time to match the pattern is too slow.
227 * So we step back a steadily growing number of chars and search
228 * forward as far as the previous location. Once we
229 * find any match, we check if there is a later one that
232 int step_size = 65536;
235 struct mark *m, *start, *end, *endmark;
236 struct mark *m2 = ci->mark2;
237 struct pane *p = ci->focus;
241 m = mark_dup(ci->mark); /* search cursor */
242 start = mark_dup(ci->mark); /* start of the range being searched */
243 end = mark_dup(ci->mark); /* end of the range being searched */
247 ss->endmark = ci->mark;
248 endmark = ss->endmark;
249 ss->anchor_at_end = True;
253 while (!m2 || m2->seq < start->seq) {
254 mark_to_mark(end, start);
255 call("doc:char", p, -step_size, start, NULL, 0, m2);
256 if (mark_same(start, end))
257 /* We have hit the start(m2), don't continue */
260 ss->prev_ch = doc_prior(p, start);
261 ss->st = rxl_prepare(ss->rxl, 0);
262 ss->prev_point = ss->point ? mark_same(ss->point, m) : False;
264 mark_to_mark(m, start);
265 call_comm("doc:content", p, &ss->c, 0, m);
266 rxl_info(ss->st, &maxlen, NULL, NULL, NULL);
267 rxl_free_state(ss->st);
274 if (pane_too_long(p, 2000)) {
275 /* FIXME returning success is wrong if
276 * we timed out But I want to move the
277 * point, and this is easiest. What do
278 * I really want here? Do I just need
279 * to make reverse search faster?
281 mark_to_mark(endmark, start);
286 while (maxlen >= 0) {
287 /* There is a match starting at 'endmark'.
288 * The might be a later match - check for it.
290 call("doc:char", p, -maxlen, ss->endmark);
291 if (mark_ordered_not_same(end, ss->endmark))
294 if (endmark != ss->endmark &&
295 mark_ordered_or_same(ss->endmark, endmark))
296 /* Didn't move forward!! Presumably
297 * buggy doc:step implementation.
301 mark_to_mark(endmark, ss->endmark);
303 mark_to_mark(start, endmark);
304 ss->prev_ch = doc_next(p, start);
305 ss->st = rxl_prepare(ss->rxl, 0);
306 call_comm("doc:content", p, &ss->c, 0, start);
307 rxl_info(ss->st, &maxlen, NULL, NULL, NULL);
308 rxl_free_state(ss->st);
318 static int search_forward(struct pane *p safe,
319 struct mark *m safe, struct mark *m2,
321 unsigned short *rxl safe,
322 struct mark *endmark, bool anchored)
324 /* Search forward from @m in @p for @rxl looking as far as @m2,
325 * and leaving @endmark at the end point, and returning the
326 * length of the match, or -1.
328 struct search_state ss;
331 if (m2 && m->seq >= m2->seq)
333 ss.st = rxl_prepare(rxl, anchored ? RXLF_ANCHORED : 0);
334 ss.prefix_len = rxl_prefix(rxl, ss.prefix, sizeof(ss.prefix));
336 ss.endmark = endmark;
338 ss.prev_point = point ? mark_same(point, m) : False;
340 ss.prev_ch = doc_prior(p, m);
341 ss.anchor_at_end = False;
342 call_comm("doc:content", p, &ss.c, 0, m, NULL, 0, m2);
343 rxl_info(ss.st, &maxlen, NULL, NULL, NULL);
344 rxl_free_state(ss.st);
348 static int search_backward(struct pane *p safe,
349 struct mark *m safe, struct mark *m2,
351 unsigned short *rxl safe,
352 struct mark *endmark safe)
354 /* Search backward from @m in @p for a match of @s. The match
355 * must start at or before m, but may finish later. Only search
356 * as far as @m2 (if set), and leave endmark pointing at the
357 * start of the match, if one is found.
358 * Return length of match, or negative.
360 * rexel only lets us search forwards, and stepping back
361 * one char at a time to match the pattern is too slow.
362 * So we step back a steadily growing number of
363 * chars, and search forward as pfar as the previous location.
364 * Once we find any match, we check if there is a later one
365 * that still satisfies.
367 struct search_state ss;
369 if (m2 && m->seq <= m2->seq)
373 ss.st = rxl_prepare(rxl, 0);
374 ss.prefix_len = rxl_prefix(rxl, ss.prefix, sizeof(ss.prefix));
376 ss.endmark = endmark;
378 ss.prev_point = point ? mark_same(point, m) : False;
380 return comm_call(&ss.c, "reverse", p, 0, m, NULL, 0, m2);
385 struct mark *m, *endmark = NULL;
393 rxl = rxl_parse(ci->str, NULL, ci->num);
401 endmark = mark_dup(m);
402 point = call_ret(mark, "doc:point", ci->focus);
405 if (strcmp(ci->key, "text-match") == 0)
406 since_start = search_forward(ci->focus, m, ci->mark2,
407 point, rxl, endmark, True);
409 since_start = search_backward(ci->focus, m, ci->mark2,
410 point, rxl, endmark);
412 since_start = search_forward(ci->focus, m, ci->mark2,
413 point, rxl, endmark, False);
415 if (since_start >= 0)
416 mark_to_mark(m, endmark);
418 if (since_start < 0) {
419 if (strcmp(ci->key, "text-match") == 0)
420 ret = Efalse; /* non-fatal */
424 ret = since_start + 1;
425 } else if (ci->str2) {
426 struct match_state *st = rxl_prepare(
427 rxl, strcmp(ci->key, "text-match") == 0 ? RXLF_ANCHORED : 0);
428 int flags = RXL_SOL|RXL_SOD;
429 const char *t = ci->str2;
430 int thelen = -1, start = 0;
432 wint_t prev_ch = WEOF;
435 wint_t wc = get_utf8(&t, NULL);
436 if (wc >= WERR|| (ci->num2 > 0 && t > ci->str2 + ci->num2)) {
437 rxl_advance(st, RXL_EOL|RXL_EOD);
440 switch (is_word(prev_ch) * 2 + is_word(wc)) {
441 case 0: /* in space */
442 case 3: /* within word */
445 case 1: /* start of word */
448 case 2: /* end of word */
454 if (prev_ch == WEOF || is_eol(prev_ch))
457 r = rxl_advance(st, wc | flags);
459 if (r >= RXL_MATCH) {
460 /* "start" is in chars, not bytes, so we cannot.
461 * use it. Need since_start and then count
464 rxl_info(st, &thelen, NULL, NULL, &since_start);
465 start = t - ci->str2;
466 while (since_start > 0) {
467 start = utf8_round_len(ci->str2, start-1);
471 } while (r != RXL_DONE);
475 else if (strcmp(ci->key, "text-match") == 0)
488 struct search_state *ss;
494 rxl = rxl_parse(ci->str, NULL, ci->num2);
497 ss = calloc(1, sizeof(*ss));
499 ss->prefix_len = rxl_prefix(rxl, ss->prefix, sizeof(ss->prefix));
501 ss->c.free = state_free;
503 comm_call(&ss->c, "reinit", ci->focus,
504 ci->num, ci->mark, NULL, 0, ci->mark2);
505 comm_call(ci->comm2, "cb", ci->focus,
507 0, NULL, NULL, 0,0, &ss->c);
514 const char *text safe;
520 struct texteql *te = container_of(ci->comm, struct texteql, c);
526 have = ci->num & 0xFFFFF;
527 want = get_utf8(&te->text, NULL);
531 i < ci->num2 && ci->str;
533 if (!te->text[i] || te->text[i] != ci->str[i])
538 if (ci->str && i < ci->num2)
548 if (!ci->str || !ci->mark)
554 call_comm("doc:content", ci->focus, &te.c, 0, ci->mark);
555 return te.matched ? 1 : Efalse;
558 void edlib_init(struct pane *ed safe)
560 call_comm("global-set-command", ed, &text_search, 0, NULL,
562 call_comm("global-set-command", ed, &text_search, 0, NULL,
564 call_comm("global-set-command", ed, &make_search, 0, NULL,
566 call_comm("global-set-command", ed, &text_equals, 0, NULL,