]> git.neil.brown.name Git - wiggle.git/blob - merge2.c
Encourage more context into conflict reports.
[wiggle.git] / merge2.c
1 /*
2  * wiggle - apply rejected patches
3  *
4  * Copyright (C) 2005 Neil Brown <neilb@cse.unsw.edu.au>
5  * Copyright (C) 2010 Neil Brown <neilb@suse.de>
6  *
7  *
8  *    This program is free software; you can redistribute it and/or modify
9  *    it under the terms of the GNU General Public License as published by
10  *    the Free Software Foundation; either version 2 of the License, or
11  *    (at your option) any later version.
12  *
13  *    This program is distributed in the hope that it will be useful,
14  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *    GNU General Public License for more details.
17  *
18  *    You should have received a copy of the GNU General Public License
19  *    along with this program; if not, write to the Free Software Foundation, Inc.,
20  *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  *    Author: Neil Brown
23  *    Email: <neilb@suse.de>
24  */
25
26 #include "wiggle.h"
27 #include <stdlib.h>
28
29 /*
30  * Second attempt at merging....
31  *
32  * We want to create a mergelist which identifies 'orig' and 'after'
33  * sections (from a and c) and conflicts (which are ranges of a,b,c which
34  * all don't match).
35  * It is also helpful to differentiate 'orig' sections that aren't
36  * matched in 'b' with orig sections that are.
37  * To help with highlighting, it will be useful to know where
38  * the conflicts match the csl lists.
39  *
40  * This can all be achieved with a list of (a,b,c,c1,c1) 5-tuples.
41  * If two consecutive differ in more than one of a,b,c, it is a
42  * conflict.
43  * If only 'a' differ, it is un-matched original.
44  * If only 'b' differ, it is matched, unchanged original
45  * If only 'c' differ, it is 1
46  */
47
48 static inline int min(int a, int b)
49 {
50         return a < b ? a : b;
51 }
52 static inline void assert(int a)
53 {
54         if (!a)
55                 abort();
56 }
57
58 static int check_alreadyapplied(struct file af, struct file cf,
59                                 struct merge *m)
60 {
61         int i;
62         if (m->al != m->cl)
63                 return 0;
64         for (i = 0; i < m->al; i++) {
65                 if (af.list[m->a+i].len != cf.list[m->c+i].len)
66                         return 0;
67                 if (strncmp(af.list[m->a+i].start,
68                             cf.list[m->c+i].start,
69                             af.list[m->a+i].len) != 0)
70                         return 0;
71         }
72         if (do_trace) {
73                 printf("already applied %d,%d,%d - %d,%d,%d\n",
74                        m->a, m->b, m->c, m->al, m->bl, m->cl);
75                 printf(" %.10s - %.10s\n", af.list[m->a].start,
76                        cf.list[m->c].start);
77         }
78         m->type = AlreadyApplied;
79         return 1;
80 }
81
82 /* A 'cut-point' is a location in the merger where it is reasonable
83  * the change the mode of display - between displaying the merger
84  * and displaying the separate streams.
85  * A 'conflict' can only be displayed as separate stream so when
86  * one is found, we need to find a preceeding and trailing cut-point
87  * and enlarge the conflict to that range.
88  * A suitable location is one where all three streams are at a line-end.
89  */
90 static int is_cutpoint(struct merge m,
91                        struct file af, struct file bf, struct file cf)
92 {
93         return ((m.a == 0 || ends_line(af.list[m.a-1])) &&
94                 (m.b == 0 || ends_line(bf.list[m.b-1])) &&
95                 (m.c == 0 || ends_line(cf.list[m.c-1])));
96 }
97
98 static int isolate_conflicts(struct file af, struct file bf, struct file cf,
99                              struct csl *csl1, struct csl *csl2, int words,
100                              struct merge *m, int show_wiggles)
101 {
102         /* A conflict indicates that something is definitely wrong
103          * and so we need to be a bit suspicious of nearby apparent matches.
104          * To display a conflict effectively we expands it's effect to
105          * include any Extraneous, Unmatched, Changed or AlreadyApplied text.
106          * Also, unless 'words', we need to include any partial lines
107          * in the Unchanged text that forms the border of a conflict.
108          *
109          * A Changed text may also border a conflict, but it can
110          * only border one conflict (where as an Unchanged can border
111          * a preceeding and a following conflict).
112          * The 'new' section of a Changed text appears in the
113          * conflict as does any part of the original before
114          * a newline.
115          *
116          * If 'show_wiggles' is set we treat wiggles like conflicts.
117          * A 'wiggle' is implied by any Extraneous text being ignored,
118          * or any line that has both Changed and Unmatched content.
119          * (Unmatched content where nothing is changed is common and not
120          *  really a 'wiggle').
121          *
122          * A hunk header is never considered part of a conflict.  It
123          * thereby can serve as a separator between conflicts.
124          *
125          * We need to ensure there is adequate context for the conflict.
126          * So ensure there are at least 3 newlines in Extraneous or
127          * Unchanged on both sides of a Conflict - but don't go so far
128          * as including a hunk header.
129          * If there are 3, and they are all in 'Unchanged' sections, then
130          * that much context is not really needed - reduce it a bit.
131          */
132         int i, j, k;
133         int cnt = 0;
134         int changed = 0;
135         int unmatched = 0;
136
137         for (i = 0; m[i].type != End; i++) {
138                 if (m[i].type == Changed)
139                         changed = 1;
140                 if (m[i].type == Unmatched)
141                         unmatched = 1;
142                 if (m[i].type == Conflict ||
143                     (show_wiggles && ((changed && unmatched)
144                                         || m[i].type == Extraneous))) {
145                         /* We have a conflict (or wiggle) here.
146                          * First search backwards for an Unchanged marking
147                          * things as in_conflict.  Then find the
148                          * cut-point in the Unchanged.  If there isn't one,
149                          * keep looking.
150                          *
151                          * Then search forward doing the same thing.
152                          */
153                         int newlines = 0;
154                         cnt++;
155                         m[i].in_conflict = 1;
156                         j = i;
157                         while (--j >= 0) {
158                                 if (m[j].type == Extraneous &&
159                                     bf.list[m[j].b].start[0] == '\0')
160                                         /* hunk header - not conflict any more */
161                                         break;
162                                 if (!m[j].in_conflict) {
163                                         m[j].in_conflict = 1;
164                                         m[j].lo = 0;
165                                 } else if (m[j].type == Changed) {
166                                         /* This can no longer form a border */
167                                         m[j].hi = -1;
168                                         /* We merge these conflicts and stop searching */
169                                         cnt--;
170                                         break;
171                                 }
172                                 if (m[j].type == Extraneous) {
173                                         for (k = m[j].bl; k > 0; k--)
174                                                 if (ends_line(bf.list[m[j].b+k-1]))
175                                                         newlines++;
176                                 }
177
178                                 if (m[j].type == Unchanged || m[j].type == Changed) {
179                                         /* If we find enough newlines in this section,
180                                          * then we only really need 1, but would rather
181                                          * it wasn't the first one.  'firstk' allows us
182                                          * to track which newline we actually use
183                                          */
184                                         int firstk = m[j].al;
185                                         if (words) {
186                                                 m[j].hi = m[j].al;
187                                                 break;
188                                         }
189                                         /* need to find the last line-break, which
190                                          * might be after the last newline, if there
191                                          * is one, or might be at the start
192                                          */
193                                         for (k = m[j].al; k > 0; k--)
194                                                 if (ends_line(af.list[m[j].a+k-1])) {
195                                                         if (firstk == m[j].al)
196                                                                 firstk = k;
197                                                         newlines++;
198                                                         if (newlines >= 3) {
199                                                                 k = firstk;
200                                                                 break;
201                                                         }
202                                                 }
203                                         if (k > 0)
204                                                 m[j].hi = k;
205                                         else if (is_cutpoint(m[j], af,bf,cf))
206                                                 m[j].hi = 0;
207                                         else
208                                                 /* no start-of-line found... */
209                                                 m[j].hi = -1;
210                                         if (m[j].hi > 0 && m[j].type == Changed) {
211                                                 /* this can only work if start is
212                                                  * also a line break */
213                                                 if (is_cutpoint(m[j], af,bf,cf))
214                                                         /* ok */;
215                                                 else
216                                                         m[j].hi = -1;
217                                         }
218                                         if (m[j].hi >= 0)
219                                                 break;
220                                 }
221                         }
222
223                         /* now the forward search */
224                         newlines = 0;
225                         for (j = i+1; m[j].type != End; j++) {
226                                 if (m[j].type == Extraneous &&
227                                     bf.list[m[j].b].start[0] == '\0')
228                                         /* hunk header - not conflict any more */
229                                         break;
230                                 m[j].in_conflict = 1;
231                                 if (m[j].type == Extraneous) {
232                                         for (k = 0; k < m[j].bl; k++)
233                                                 if (ends_line(bf.list[m[j].b+k]))
234                                                         newlines++;
235                                 }
236                                 if (m[j].type == Unchanged || m[j].type == Changed) {
237                                         m[j].hi = m[j].al;
238                                         if (words) {
239                                                 m[j].lo = 0;
240                                                 break;
241                                         }
242                                         /* need to find a line-break, which might be at
243                                          * the very beginning, or might be after the
244                                          * first newline - if there is one
245                                          */
246                                         if (is_cutpoint(m[j], af,bf,cf))
247                                                 m[j].lo = 0;
248                                         else {
249                                                 /* If we find enough newlines in this section,
250                                                  * then we only really need 1, but would rather
251                                                  * it wasn't the first one.  'firstk' allows us
252                                                  * to track which newline we actually use
253                                                  */
254                                                 int firstk = 0;
255                                                 for (k = 0 ; k < m[j].al ; k++)
256                                                         if (ends_line(af.list[m[j].a+k])) {
257                                                                 if (firstk == 0)
258                                                                         firstk = k;
259                                                                 newlines++;
260                                                                 if (newlines >= 3) {
261                                                                         k = firstk;
262                                                                         break;
263                                                                 }
264                                                         }
265                                                 if (k < m[j].al)
266                                                         m[j].lo = k+1;
267                                                 else
268                                                         /* no start-of-line found */
269                                                         m[j].lo = m[j].al+1;
270                                         }
271                                         if (m[j].lo <= m[j].al+1 && m[j].type == Changed) {
272                                                 /* this can only work if the end is a line break */
273                                                 if (is_cutpoint(m[j+1], af,bf,cf))
274                                                         /* ok */;
275                                                 else
276                                                         m[j].lo = m[j].al+1;
277                                         }
278                                         if (m[j].lo < m[j].al+1)
279                                                 break;
280                                 }
281                         }
282                         i = j - 1;
283                 }
284                 if (ends_line(af.list[m[i].a+m[i].al-1])) {
285                         unmatched = 0;
286                         changed = 0;
287                 }
288         }
289         return cnt;
290 }
291
292 struct ci make_merger(struct file af, struct file bf, struct file cf,
293                       struct csl *csl1, struct csl *csl2, int words,
294                       int ignore_already, int show_wiggles)
295 {
296         /* find the wiggles and conflicts between csl1 and csl2
297          */
298         struct ci rv;
299         int i, l;
300         int a, b, c, c1, c2;
301         int wiggle_found = 0;
302
303         rv.conflicts = rv.wiggles = rv.ignored = 0;
304
305         for (i = 0; csl1[i].len; i++)
306                 ;
307         l = i;
308         for (i = 0; csl2[i].len; i++)
309                 ;
310         l += i;
311         /* maybe a bit of slack at each end */
312         l = l * 4 + 10;
313
314         rv.merger = xmalloc(sizeof(struct merge)*l);
315
316         a = b = c = c1 = c2 = 0;
317         i = 0;
318         while (1) {
319                 int match1, match2;
320                 match1 = (a >= csl1[c1].a && b >= csl1[c1].b); /* c1 doesn't match */
321                 match2 = (b >= csl2[c2].a && c >= csl2[c2].b);
322
323                 rv.merger[i].a = a;
324                 rv.merger[i].b = b;
325                 rv.merger[i].c = c;
326                 rv.merger[i].c1 = c1;
327                 rv.merger[i].c2 = c2;
328                 rv.merger[i].in_conflict = 0;
329
330                 if (!match1 && match2) {
331                         /* This is either Unmatched or Extraneous - probably both.
332                          * If the match2 is a hunk-header Extraneous, it must
333                          * align with an end-of-line in 'a', so adjust endpoint
334                          */
335                         int newa = csl1[c1].a;
336                         if (bf.list[b].start && bf.list[b].start[0] == '\0') {
337                                 while (newa > a &&
338                                        !ends_line(af.list[newa-1]))
339                                         newa--;
340                                 while (newa < af.elcnt && !(newa == 0 || ends_line(af.list[newa-1])))
341                                         newa++;
342                         }
343                         if (a < newa) {
344                                 /* some unmatched text */
345                                 rv.merger[i].type = Unmatched;
346                                 rv.merger[i].al = newa - a;
347                                 rv.merger[i].bl = 0;
348                                 rv.merger[i].cl = 0;
349                                 wiggle_found++;
350                         } else {
351                                 int newb;
352                                 int j;
353                                 assert(b < csl1[c1].b);
354                                 /* some Extraneous text */
355                                 /* length is min of unmatched on left
356                                  * and matched on right.
357                                  * However a hunk-header must be an
358                                  * Extraneous section by itself, so if this
359                                  * start with one, the length is 1, and if
360                                  * there is one in the middle, only take the
361                                  * text up to there for now.
362                                  */
363                                 rv.merger[i].type = Extraneous;
364                                 rv.merger[i].al = 0;
365                                 newb = b +
366                                         min(csl1[c1].b - b,
367                                             csl2[c2].len - (b-csl2[c2].a));
368                                 if (bf.list[b].start[0] == '\0')
369                                         newb = b + 1;
370                                 for (j = b; j < newb; j++) {
371                                         if (bf.list[j].start[0] == '\0') {
372                                                 if (wiggle_found > 1)
373                                                         rv.wiggles++;
374                                                 wiggle_found = 0;
375                                                 if (j > b)
376                                                         newb = j;
377                                         } else
378                                                 wiggle_found++;
379                                 }
380                                 rv.merger[i].cl =
381                                         rv.merger[i].bl = newb - b;
382                         }
383                 } else if (match1 && !match2) {
384                         /* some changed text
385                          * if 'c' is currently at a suitable cut-point, then
386                          * we can look for a triple-cut-point for start.
387                          * Also, if csl2[c2].b isn't in a conflict, and is
388                          * a suitable cut-point, then we could make a
389                          * triple-cut-point for end of a conflict.
390                          */
391
392                         rv.merger[i].type = Changed;
393                         rv.merger[i].bl = min(csl1[c1].b+csl1[c1].len, csl2[c2].a) - b;
394                         rv.merger[i].al = rv.merger[i].bl;
395                         rv.merger[i].cl = csl2[c2].b - c;
396                 } else if (match1 && match2) {
397                         /* Some unchanged text
398                          */
399                         rv.merger[i].type = Unchanged;
400                         rv.merger[i].bl =
401                                 min(csl1[c1].len - (b-csl1[c1].b),
402                                     csl2[c2].len - (b-csl2[c2].a));
403                         rv.merger[i].al = rv.merger[i].cl =
404                                 rv.merger[i].bl;
405                 } else {
406                         /* must be a conflict.
407                          * Move a and c to next match, and b to closest of the two
408                          */
409                         rv.merger[i].type = Conflict;
410                         rv.merger[i].al = csl1[c1].a - a;
411                         rv.merger[i].cl = csl2[c2].b - c;
412                         rv.merger[i].bl = min(csl1[c1].b, csl2[c2].a) - b;
413                         if (ignore_already &&
414                             check_alreadyapplied(af, cf, &rv.merger[i]))
415                                 rv.ignored++;
416                 }
417                 a += rv.merger[i].al;
418                 b += rv.merger[i].bl;
419                 c += rv.merger[i].cl;
420                 i++;
421
422                 while (csl1[c1].a + csl1[c1].len <= a && csl1[c1].len)
423                         c1++;
424                 assert(csl1[c1].b + csl1[c1].len >= b);
425                 while (csl2[c2].b + csl2[c2].len <= c && csl2[c2].len)
426                         c2++;
427                 assert(csl2[c2].a + csl2[c2].len >= b);
428                 if (csl1[c1].len == 0 && csl2[c2].len == 0 &&
429                     a == csl1[c1].a && b == csl1[c1].b &&
430                     b == csl2[c2].a && c == csl2[c2].b)
431                         break;
432         }
433         rv.merger[i].type = End;
434         rv.merger[i].in_conflict = 0;
435         assert(i < l);
436         rv.conflicts = isolate_conflicts(af, bf, cf, csl1, csl2, words,
437                                          rv.merger, show_wiggles);
438         if (wiggle_found)
439                 rv.wiggles++;
440         return rv;
441 }
442
443 static void printrange(FILE *out, struct file *f, int start, int len)
444 {
445         while (len > 0) {
446                 printword(out, f->list[start]);
447                 start++;
448                 len--;
449         }
450 }
451
452 struct ci print_merge2(FILE *out, struct file *a, struct file *b, struct file *c,
453                        struct csl *c1, struct csl *c2,
454                        int words, int ignore_already, int show_wiggles)
455 {
456         struct ci rv = make_merger(*a, *b, *c, c1, c2,
457                                    words, ignore_already, show_wiggles);
458         struct merge *m;
459
460         for (m = rv.merger; m->type != End ; m++) {
461                 struct merge *cm;
462                 if (do_trace)
463                         printf("[%s: %d-%d,%d-%d,%d-%d%s(%d,%d)]\n",
464                                m->type==Unmatched ? "Unmatched" :
465                                m->type==Unchanged ? "Unchanged" :
466                                m->type==Extraneous ? "Extraneous" :
467                                m->type==Changed ? "Changed" :
468                                m->type==AlreadyApplied ? "AlreadyApplied" :
469                                m->type==Conflict ? "Conflict":"unknown",
470                                m->a, m->a+m->al-1,
471                                m->b, m->b+m->bl-1,
472                                m->c, m->c+m->cl-1,
473                                m->in_conflict ? " in_conflict" : "",
474                                m->lo, m->hi);
475
476                 while (m->in_conflict) {
477                         /* need to print from 'hi' to 'lo' of next
478                          * Unchanged which is < it's hi
479                          */
480                         int found_conflict = 0;
481                         int st = 0, st1;
482                         if (m->type == Unchanged || m->type == Changed)
483                                 if (m->hi >= m->lo)
484                                         st = m->hi;
485                         st1 = st;
486
487                         if (m->type == Unchanged)
488                                 printrange(out, a, m->a+m->lo, m->hi - m->lo);
489
490                         if (do_trace)
491                                 for (cm = m; cm->in_conflict; cm++) {
492                                         printf("{%s: %d-%d,%d-%d,%d-%d%s(%d,%d)}\n",
493                                                cm->type==Unmatched?"Unmatched":
494                                                cm->type==Unchanged?"Unchanged":
495                                                cm->type==Extraneous?"Extraneous":
496                                                cm->type==Changed?"Changed":
497                                                cm->type==AlreadyApplied?"AlreadyApplied":
498                                                cm->type==Conflict?"Conflict":"unknown",
499                                                cm->a, cm->a+cm->al-1,
500                                                cm->b, cm->b+cm->bl-1,
501                                                cm->c, cm->c+cm->cl-1,
502                                                cm->in_conflict ? " in_conflict" : "",
503                                                cm->lo, cm->hi);
504                                         if ((cm->type == Unchanged || cm->type == Changed)
505                                             && cm != m && cm->lo < cm->hi)
506                                                 break;
507                                 }
508
509                         fputs(words ? "<<<---" : "<<<<<<<\n", out);
510                         for (cm = m; cm->in_conflict; cm++) {
511                                 if (cm->type == Conflict)
512                                         found_conflict = 1;
513                                 if ((cm->type == Unchanged || cm->type == Changed)
514                                     && cm != m && cm->lo < cm->hi) {
515                                         printrange(out, a, cm->a, cm->lo);
516                                         break;
517                                 }
518                                 printrange(out, a, cm->a+st1, cm->al-st1);
519                                 st1 = 0;
520                         }
521                         fputs(words ? "|||" : "|||||||\n", out);
522                         st1 = st;
523                         for (cm = m; cm->in_conflict; cm++) {
524                                 if ((cm->type == Unchanged || cm->type == Changed)
525                                     && cm != m && cm->lo < cm->hi) {
526                                         printrange(out, b, cm->b, cm->lo);
527                                         break;
528                                 }
529                                 printrange(out, b, cm->b+st1, cm->bl-st1);
530                                 st1 = 0;
531                         }
532                         fputs(words ? "===" : "=======\n", out);
533                         st1 = st;
534                         for (cm = m; cm->in_conflict; cm++) {
535                                 if (cm->type == Unchanged &&
536                                     cm != m && cm->lo < cm->hi) {
537                                         printrange(out, c, cm->c, cm->lo);
538                                         break;
539                                 }
540                                 if (cm->type == Changed)
541                                         st1 = 0; /* All of result of change must be printed */
542                                 printrange(out, c, cm->c+st1, cm->cl-st1);
543                                 st1 = 0;
544                         }
545                         if (!found_conflict) {
546                                 /* This section was wiggled in successfully,
547                                  * but full conflict display was requested.
548                                  * So now print out the wiggled result as well.
549                                  */
550                                 fputs(words ? "&&&" : "&&&&&&&\n", out);
551                                 st1 = st;
552                                 for (cm = m; cm->in_conflict; cm++) {
553                                         int last = 0;
554                                         if ((cm->type == Unchanged || cm->type == Changed)
555                                             && cm != m && cm->lo < cm->hi)
556                                                 last = 1;
557                                         switch (cm->type) {
558                                         case Unchanged:
559                                         case AlreadyApplied:
560                                         case Unmatched:
561                                                 printrange(out, a, cm->a+st1,
562                                                            last ? cm->lo : cm->al-st1);
563                                                 break;
564                                         case Extraneous:
565                                                 break;
566                                         case Changed:
567                                                 printrange(out, c, cm->c+st1,
568                                                            last ? cm->lo : cm->cl-st1);
569                                                 break;
570                                         case Conflict:
571                                         case End:
572                                                 assert(0);
573                                         }
574                                         if (last)
575                                                 break;
576                                         st1 = 0;
577                                 }
578                         }
579                         fputs(words ? "--->>>" : ">>>>>>>\n", out);
580                         m = cm;
581                         if (m->in_conflict && m->type == Unchanged
582                             && m->hi >= m->al) {
583                                 printrange(out, a, m->a+m->lo, m->hi-m->lo);
584                                 m++;
585                         }
586                 }
587
588                 /* there is always some non-conflict after a conflict,
589                  * unless we hit the end
590                  */
591                 if (m->type == End)
592                         break;
593
594                 if (do_trace) {
595                         printf("<<%s: %d-%d,%d-%d,%d-%d%s(%d,%d)>>\n",
596                                m->type==Unmatched?"Unmatched":
597                                m->type==Unchanged?"Unchanged":
598                                m->type==Extraneous?"Extraneous":
599                                m->type==Changed?"Changed":
600                                m->type==AlreadyApplied?"AlreadyApplied":
601                                m->type==Conflict?"Conflict":"unknown",
602                                m->a, m->a+m->al-1,
603                                m->b, m->b+m->bl-1,
604                                m->c, m->c+m->cl-1,
605                                m->in_conflict ? " in_conflict" : "",
606                                m->lo, m->hi);
607                 }
608
609                 switch (m->type) {
610                 case Unchanged:
611                 case AlreadyApplied:
612                 case Unmatched:
613                         printrange(out, a, m->a, m->al);
614                         break;
615                 case Extraneous:
616                         break;
617                 case Changed:
618                         printrange(out, c, m->c, m->cl);
619                         break;
620                 case Conflict:
621                 case End:
622                         assert(0);
623                 }
624         }
625         return rv;
626 }