2 * Copyright Neil Brown ©2015-2023 <neil@brown.name>
3 * May be distributed under terms of GPLv2 - see file:COPYING
6 * "text-search" command searches from given mark until it
7 * finds the given pattern or end of buffer.
8 * If the pattern is found, then 'm' is left at the extremity of
9 * the match in the direction of search: so the start if search backwards
10 * or the end if searching forwards.
11 * The returned value is the length of the match + 1, or an Efail
12 * In the case of an error, the location of ->mark is undefined.
13 * If mark2 is given, don't go beyond there.
15 * "text-match" is similar to text-search forwards, but requires that
16 * the match starts at ->mark. ->mark is moved to the end of the
17 * match if the text does, in fact, match.
18 * If the match fails, Efalse is returned (different to "text-search")
28 struct match_state *st safe;
39 unsigned short *rxl safe;
42 static void state_free(struct command *c safe)
44 struct search_state *ss = container_of(c, struct search_state, c);
47 rxl_free_state(ss->st);
49 mark_free(ss->endmark);
54 static int is_word(wint_t ch)
56 return ch == '_' || iswalnum(ch);
60 * 'search_test' together with 'stuct search_state' encapsulates
61 * a parsed regexp and some matching state. If called as 'consume'
62 * (or anything starting 'c') it processes one char into the match
63 * and returns 1 if it is worth providing more characters.
64 * Other options for ci->key are:
65 * - reinit - state is re-initialised with flags from ->num, end and
66 * endmark from ->mark and ->mark2
67 * - getinfo - extract total, start, len, since-start from match
68 * - getcapture - get "start" or "len" for a capture in ->num
69 * - interp - interpolate \N captures in ->str
73 struct search_state *ss = container_of(ci->comm,
74 struct search_state, c);
76 if (ci->key[0] == 'c') {
78 wint_t wch = ci->num & 0xFFFFF;
80 int maxlen, since_start;
84 if ((unsigned int)ci->num == WEOF) {
88 if (ss->prev_ch == WEOF)
90 if (is_eol(ss->prev_ch) || ss->prev_ch == WEOF ||
93 switch (is_word(ss->prev_ch) * 2 + is_word(wch)) {
94 case 0: /* in space */
95 case 3: /* within word */
98 case 1: /* start of word */
101 case 2: /* end of word */
107 if (ss->prev_point) {
109 ss->prev_point = False;
111 if (ss->point && ci->mark && mark_same(ss->point, ci->mark))
112 /* Need to assert POINT before next char */
113 ss->prev_point = True;
115 found = rxl_advance(ss->st, wch | flags);
116 anchored = rxl_info(ss->st, &maxlen, NULL, NULL, &since_start);
118 if (found >= RXL_MATCH && ss->endmark && ci->mark &&
119 since_start - maxlen <= 1) {
120 mark_to_mark(ss->endmark, ci->mark);
121 if (found == RXL_MATCH_FLAG)
122 doc_prev(ci->home, ss->endmark);
124 if (ss->end && ci->mark &&
125 mark_ordered_not_same(ss->end, ci->mark)) {
126 /* Mark is *after* the char, so if end and mark
127 * are the same, we haven't passed the 'end' yet,
128 * and it is too early to abort. Hence 'not' above
130 if (ss->anchor_at_end) {
131 found = rxl_advance(ss->st, RXL_ANCHOR);
136 if (found == RXL_DONE)
139 if (!anchored && ci->str &&
140 ss->prefix_len && ci->num2 > ss->prefix_len) {
141 /* It is worth searching for the prefix to improve
144 int pstart = rxl_fast_match(ss->prefix, ss->prefix_len,
146 /* This may not be a full match even for the prefix,
147 * but it is a good place to skip to.
148 * If there was no match, pstart is ci->num2,
149 * so we skip the entire chunk.
150 * We reposition to just before the possible match
151 * so that ->end processesing is handled before the
155 pstart = utf8_round_len(ci->str, pstart - 1);
158 int prev = utf8_round_len(ci->str, pstart - 1);
160 ss->prev_ch = get_utf8(&s, NULL);
167 if (strcmp(ci->key, "reinit") == 0) {
168 rxl_free_state(ss->st);
169 ss->st = rxl_prepare(ss->rxl, ci->num & 3);
170 ss->prev_ch = (unsigned int)ci->num2 ?: WEOF;
172 mark_free(ss->endmark);
174 ss->end = mark_dup(ci->mark);
175 ss->anchor_at_end = ci->num & 4;
179 ss->endmark = mark_dup(ci->mark2);
184 if (strcmp(ci->key, "setpoint") == 0 && ci->mark) {
185 mark_free(ss->point);
186 ss->point = mark_dup(ci->mark);
189 if (strcmp(ci->key, "getinfo") == 0 && ci->str) {
190 int len, total, start, since_start;
191 rxl_info(ss->st, &len, &total, &start, &since_start);
192 if (strcmp(ci->str, "len") == 0)
193 return len < 0 ? Efalse : len+1;
194 if (strcmp(ci->str, "total") == 0)
196 if (strcmp(ci->str, "start") == 0)
197 return start < 0 ? Efalse : start + 1;
198 if (strcmp(ci->str, "since-start") == 0)
199 return since_start < 0 ? Efalse : since_start + 1;
202 if (strcmp(ci->key, "getcapture") == 0 && ci->str) {
204 if (rxl_capture(ss->st, ci->num, ci->num2, &start, &len)) {
205 if (strcmp(ci->str, "start") == 0)
207 if (strcmp(ci->str, "len") == 0)
213 if (strcmp(ci->key, "interp") == 0 && ci->str) {
215 ret = rxl_interp(ss->st, ci->str);
216 comm_call(ci->comm2, "cb", ci->focus, 0, NULL, ret);
223 static int search_forward(struct pane *p safe,
224 struct mark *m safe, struct mark *m2,
226 unsigned short *rxl safe,
227 struct mark *endmark, bool anchored)
229 /* Search forward from @m in @p for @rxl looking as far as @m2,
230 * and leaving @endmark at the end point, and returning the
231 * length of the match, or -1.
233 struct search_state ss;
236 if (m2 && m->seq >= m2->seq)
238 ss.st = rxl_prepare(rxl, anchored ? RXLF_ANCHORED : 0);
239 ss.prefix_len = rxl_prefix(rxl, ss.prefix, sizeof(ss.prefix));
241 ss.endmark = endmark;
243 ss.prev_point = point ? mark_same(point, m) : False;
245 ss.prev_ch = doc_prior(p, m);
246 ss.anchor_at_end = False;
247 call_comm("doc:content", p, &ss.c, 0, m, NULL, 0, m2);
248 rxl_info(ss.st, &maxlen, NULL, NULL, NULL);
249 rxl_free_state(ss.st);
253 static int search_backward(struct pane *p safe,
254 struct mark *m safe, struct mark *m2,
256 unsigned short *rxl safe,
257 struct mark *endmark safe)
259 /* Search backward from @m in @p for a match of @s. The match
260 * must start at or before m, but may finish later. Only search
261 * as far as @m2 (if set), and leave endmark pointing at the
262 * start of the match, if one is found.
263 * Return length of match, or negative.
265 * rexel only lets us search forwards, and stepping back
266 * one char at a time to match the pattern is too slow.
267 * So we step back a steadily growing number of
268 * chars, and search forward as far as the previous location.
269 * Once we find any match, we check if there is a later one
270 * that still satisfies.
272 struct search_state ss;
273 int step_size = 65536;
276 struct mark *start = mark_dup(m); /* Start of the range to search */
277 struct mark *end = mark_dup(m);
280 ss.endmark = endmark;
283 ss.prefix_len = rxl_prefix(rxl, ss.prefix, sizeof(ss.prefix));
284 ss.anchor_at_end = True;
288 while (!m2 || m2->seq < start->seq) {
289 mark_to_mark(end, start);
290 call("doc:char", p, -step_size, start, NULL, 0, m2);
291 if (mark_same(start, end))
292 /* We have hit the start, don't continue */
295 ss.prev_ch = doc_prior(p, start);
296 ss.st = rxl_prepare(rxl, 0);
297 ss.prev_point = point ? mark_same(point, m) : False;
299 mark_to_mark(m, start);
300 call_comm("doc:content", p, &ss.c, 0, m);
301 rxl_info(ss.st, &maxlen, NULL, NULL, NULL);
302 rxl_free_state(ss.st);
309 if (pane_too_long(p, 2000)) {
310 /* FIXME returning success is wrong if we timed out
311 * But I want to move the point, and this is easiest.
312 * What do I really want here?
313 * Do I just need to make reverse search faster?
315 mark_to_mark(endmark, start);
320 while (maxlen >= 0) {
321 /* There is a match starting at 'endmark'.
322 * The might be a later match - check for it.
324 call("doc:char", p, -maxlen, ss.endmark);
325 if (mark_ordered_not_same(end, ss.endmark))
328 if (endmark != ss.endmark &&
329 mark_ordered_or_same(ss.endmark, endmark))
330 /* Didn't move forward!! Presumably
331 * buggy doc:step implementation.
335 mark_to_mark(endmark, ss.endmark);
337 mark_to_mark(start, endmark);
338 ss.prev_ch = doc_next(p, start);
339 ss.st = rxl_prepare(rxl, 0);
340 call_comm("doc:content", p, &ss.c, 0, start);
341 rxl_info(ss.st, &maxlen, NULL, NULL, NULL);
342 rxl_free_state(ss.st);
351 struct mark *m, *endmark = NULL;
359 rxl = rxl_parse(ci->str, NULL, ci->num);
367 endmark = mark_dup(m);
368 point = call_ret(mark, "doc:point", ci->focus);
371 if (strcmp(ci->key, "text-match") == 0)
372 since_start = search_forward(ci->focus, m, ci->mark2,
373 point, rxl, endmark, True);
375 since_start = search_backward(ci->focus, m, ci->mark2,
376 point, rxl, endmark);
378 since_start = search_forward(ci->focus, m, ci->mark2,
379 point, rxl, endmark, False);
381 if (since_start >= 0)
382 mark_to_mark(m, endmark);
384 if (since_start < 0) {
385 if (strcmp(ci->key, "text-match") == 0)
386 ret = Efalse; /* non-fatal */
390 ret = since_start + 1;
391 } else if (ci->str2) {
392 struct match_state *st = rxl_prepare(
393 rxl, strcmp(ci->key, "text-match") == 0 ? RXLF_ANCHORED : 0);
394 int flags = RXL_SOL|RXL_SOD;
395 const char *t = ci->str2;
396 int thelen = -1, start = 0;
398 wint_t prev_ch = WEOF;
401 wint_t wc = get_utf8(&t, NULL);
402 if (wc >= WERR|| (ci->num2 > 0 && t > ci->str2 + ci->num2)) {
403 rxl_advance(st, RXL_EOL|RXL_EOD);
406 switch (is_word(prev_ch) * 2 + is_word(wc)) {
407 case 0: /* in space */
408 case 3: /* within word */
411 case 1: /* start of word */
414 case 2: /* end of word */
420 if (prev_ch == WEOF || is_eol(prev_ch))
423 r = rxl_advance(st, wc | flags);
425 if (r >= RXL_MATCH) {
426 /* "start" is in chars, not bytes, so we cannot.
427 * use it. Need since_start and then count
430 rxl_info(st, &thelen, NULL, NULL, &since_start);
431 start = t - ci->str2;
432 while (since_start > 0) {
433 start = utf8_round_len(ci->str2, start-1);
437 } while (r != RXL_DONE);
441 else if (strcmp(ci->key, "text-match") == 0)
454 struct search_state *ss;
460 rxl = rxl_parse(ci->str, NULL, ci->num2);
463 ss = calloc(1, sizeof(*ss));
465 ss->prefix_len = rxl_prefix(rxl, ss->prefix, sizeof(ss->prefix));
467 ss->c.free = state_free;
469 comm_call(&ss->c, "reinit", ci->focus,
470 ci->num, ci->mark, NULL, 0, ci->mark2);
471 comm_call(ci->comm2, "cb", ci->focus,
473 0, NULL, NULL, 0,0, &ss->c);
480 const char *text safe;
486 struct texteql *te = container_of(ci->comm, struct texteql, c);
492 have = ci->num & 0xFFFFF;
493 want = get_utf8(&te->text, NULL);
497 i < ci->num2 && ci->str;
499 if (!te->text[i] || te->text[i] != ci->str[i])
504 if (ci->str && i < ci->num2)
514 if (!ci->str || !ci->mark)
520 call_comm("doc:content", ci->focus, &te.c, 0, ci->mark);
521 return te.matched ? 1 : Efalse;
524 void edlib_init(struct pane *ed safe)
526 call_comm("global-set-command", ed, &text_search, 0, NULL,
528 call_comm("global-set-command", ed, &text_search, 0, NULL,
530 call_comm("global-set-command", ed, &make_search, 0, NULL,
532 call_comm("global-set-command", ed, &text_equals, 0, NULL,