]> git.neil.brown.name Git - wiggle.git/blob - merge2.c
Ensure that a hunk-header never appears in the middle of an Extraneous range.
[wiggle.git] / merge2.c
1 /*
2  * wiggle - apply rejected patches
3  *
4  * Copyright (C) 2005 Neil Brown <neilb@cse.unsw.edu.au>
5  * Copyright (C) 2010 Neil Brown <neilb@suse.de>
6  *
7  *
8  *    This program is free software; you can redistribute it and/or modify
9  *    it under the terms of the GNU General Public License as published by
10  *    the Free Software Foundation; either version 2 of the License, or
11  *    (at your option) any later version.
12  *
13  *    This program is distributed in the hope that it will be useful,
14  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *    GNU General Public License for more details.
17  *
18  *    You should have received a copy of the GNU General Public License
19  *    along with this program; if not, write to the Free Software Foundation, Inc.,
20  *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  *    Author: Neil Brown
23  *    Email: <neilb@suse.de>
24  */
25
26 #include "wiggle.h"
27 #include <stdlib.h>
28
29 /*
30  * Second attempt at merging....
31  *
32  * We want to create a mergelist which identifies 'orig' and 'after'
33  * sections (from a and c) and conflicts (which are ranges of a,b,c which
34  * all don't match).
35  * It is also helpful to differentiate 'orig' sections that aren't
36  * matched in 'b' with orig sections that are.
37  * To help with highlighting, it will be useful to know where
38  * the conflicts match the csl lists.
39  *
40  * This can all be achieved with a list of (a,b,c,c1,c1) 5-tuples.
41  * If two consecutive differ in more than one of a,b,c, it is a
42  * conflict.
43  * If only 'a' differ, it is un-matched original.
44  * If only 'b' differ, it is matched, unchanged original
45  * If only 'c' differ, it is 1
46  */
47
48 static inline int min(int a, int b)
49 {
50         return a < b ? a : b;
51 }
52 static inline void assert(int a)
53 {
54         if (!a)
55                 abort();
56 }
57
58 static int check_alreadyapplied(struct file af, struct file cf,
59                                 struct merge *m)
60 {
61         int i;
62         if (m->al != m->cl)
63                 return 0;
64         for (i = 0; i < m->al; i++) {
65                 if (af.list[m->a+i].len != cf.list[m->c+i].len)
66                         return 0;
67                 if (strncmp(af.list[m->a+i].start,
68                             cf.list[m->c+i].start,
69                             af.list[m->a+i].len) != 0)
70                         return 0;
71         }
72         if (do_trace) {
73                 printf("already applied %d,%d,%d - %d,%d,%d\n",
74                        m->a, m->b, m->c, m->al, m->bl, m->cl);
75                 printf(" %.10s - %.10s\n", af.list[m->a].start,
76                        cf.list[m->c].start);
77         }
78         m->type = AlreadyApplied;
79         return 1;
80 }
81
82 static int isolate_conflicts(struct file af, struct file bf, struct file cf,
83                              struct csl *csl1, struct csl *csl2, int words,
84                              struct merge *m, int show_wiggles)
85 {
86         /* A conflict indicates that something is definitely wrong
87          * and so we need to be a bit suspicious of nearby apparent matches.
88          * To display a conflict effectively we expands it's effect to
89          * include any Extraneous, Unmatched, Changed or AlreadyApplied text.
90          * Also, unless 'words', we need to include any partial lines
91          * in the Unchanged text that forms the border of a conflict.
92          *
93          * A Changed text may also border a conflict, but it can
94          * only border one conflict (where as an Unchanged can border
95          * a preceeding and a following conflict).
96          * The 'new' section of a Changed text appears in the
97          * conflict as does any part of the original before
98          * a newline.
99          *
100          * If 'show_wiggles' is set we treat wiggles like conflicts.
101          * A 'wiggle' is implied by any Extraneous text being ignored,
102          * or any line that has both Changed and Unmatched content.
103          * (Unmatched content where nothing is changed is common and not
104          *  really a 'wiggle').
105          *
106          * A hunk header is never considered part of a conflict.  It
107          * thereby can serve as a separator between conflicts.
108          */
109         int i, j, k;
110         int cnt = 0;
111         int changed = 0;
112         int unmatched = 0;
113
114         for (i = 0; m[i].type != End; i++) {
115                 if (m[i].type == Changed)
116                         changed = 1;
117                 if (m[i].type == Unmatched)
118                         unmatched = 1;
119                 if (m[i].type == Conflict ||
120                     (show_wiggles && ((changed && unmatched)
121                                         || m[i].type == Extraneous))) {
122                         /* We have a conflict (or wiggle) here.
123                          * First search backwards for an Unchanged marking
124                          * things as in_conflict.  Then find the
125                          * cut-point in the Unchanged.  If there isn't one,
126                          * keep looking.
127                          *
128                          * Then search forward doing the same thing.
129                          */
130                         cnt++;
131                         m[i].in_conflict = 1;
132                         j = i;
133                         while (--j >= 0) {
134                                 if (!m[j].in_conflict) {
135                                         m[j].in_conflict = 1;
136                                         m[j].lo = 0;
137                                 } else if (m[j].type == Changed) {
138                                         /* This can no longer form a border */
139                                         m[j].hi = -1;
140                                         /* We merge these conflicts and stop searching */
141                                         cnt--;
142                                         break;
143                                 }
144
145                                 if (m[j].type == Unchanged || m[j].type == Changed) {
146                                         if (words) {
147                                                 m[j].hi = m[j].al;
148                                                 break;
149                                         }
150                                         /* need to find the last line-break, which
151                                          * might be after the last newline, if there
152                                          * is one, or might be at the start
153                                          */
154                                         for (k = m[j].al; k > 0; k--)
155                                                 if (ends_line(af.list[m[j].a+k-1]))
156                                                         break;
157                                         if (k > 0)
158                                                 m[j].hi = k;
159                                         else if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
160                                                  (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
161                                                  (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
162                                                 m[j].hi = 0;
163                                         else
164                                                 /* no start-of-line found... */
165                                                 m[j].hi = -1;
166                                         if (m[j].hi > 0 && m[j].type == Changed) {
167                                                 /* this can only work if start is also a line break */
168                                                 if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
169                                                     (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
170                                                     (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
171                                                         /* ok */;
172                                                 else
173                                                         m[j].hi = -1;
174                                         }
175                                         if (m[j].hi >= 0)
176                                                 break;
177                                 }
178                         }
179
180                         /* now the forward search */
181                         for (j = i+1; m[j].type != End; j++) {
182                                 m[j].in_conflict = 1;
183                                 if (m[j].type == Unchanged || m[j].type == Changed) {
184                                         m[j].hi = m[j].al;
185                                         if (words) {
186                                                 m[j].lo = 0;
187                                                 break;
188                                         }
189                                         /* need to find a line-break, which might be at
190                                          * the very beginning, or might be after the
191                                          * first newline - if there is one
192                                          */
193                                         if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
194                                             (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
195                                             (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
196                                                 m[j].lo = 0;
197                                         else {
198                                                 for (k = 0 ; k < m[j].al ; k++)
199                                                         if (ends_line(af.list[m[j].a+k]))
200                                                                 break;
201                                                 if (k < m[j].al)
202                                                         m[j].lo = k+1;
203                                                 else
204                                                         /* no start-of-line found */
205                                                         m[j].lo = m[j].al+1;
206                                         }
207                                         if (m[j].lo <= m[j].al+1 && m[j].type == Changed) {
208                                                 /* this can only work if the end is a line break */
209                                                 if (ends_line(af.list[m[j].a+m[j].al-1]) &&
210                                                     ends_line(bf.list[m[j].b+m[j].bl-1]) &&
211                                                     ends_line(cf.list[m[j].c+m[j].cl-1]))
212                                                         /* ok */;
213                                                 else
214                                                         m[j].lo = m[j].al+1;
215                                         }
216                                         if (m[j].lo < m[j].al+1)
217                                                 break;
218                                 }
219                         }
220                         i = j - 1;
221                 }
222                 if (ends_line(af.list[m[i].a+m[i].al-1])) {
223                         unmatched = 0;
224                         changed = 0;
225                 }
226         }
227         return cnt;
228 }
229
230 struct ci make_merger(struct file af, struct file bf, struct file cf,
231                       struct csl *csl1, struct csl *csl2, int words,
232                       int ignore_already, int show_wiggles)
233 {
234         /* find the wiggles and conflicts between csl1 and csl2
235          */
236         struct ci rv;
237         int i, l;
238         int a, b, c, c1, c2;
239         int wiggle_found = 0;
240
241         rv.conflicts = rv.wiggles = rv.ignored = 0;
242
243         for (i = 0; csl1[i].len; i++)
244                 ;
245         l = i;
246         for (i = 0; csl2[i].len; i++)
247                 ;
248         l += i;
249         /* maybe a bit of slack at each end */
250         l = l * 4 + 10;
251
252         rv.merger = xmalloc(sizeof(struct merge)*l);
253
254         a = b = c = c1 = c2 = 0;
255         i = 0;
256         while (1) {
257                 int match1, match2;
258                 match1 = (a >= csl1[c1].a && b >= csl1[c1].b); /* c1 doesn't match */
259                 match2 = (b >= csl2[c2].a && c >= csl2[c2].b);
260
261                 rv.merger[i].a = a;
262                 rv.merger[i].b = b;
263                 rv.merger[i].c = c;
264                 rv.merger[i].c1 = c1;
265                 rv.merger[i].c2 = c2;
266                 rv.merger[i].in_conflict = 0;
267
268                 if (!match1 && match2) {
269                         if (a < csl1[c1].a) {
270                                 /* some unmatched text */
271                                 rv.merger[i].type = Unmatched;
272                                 rv.merger[i].al = csl1[c1].a - a;
273                                 rv.merger[i].bl = 0;
274                                 rv.merger[i].cl = 0;
275                                 wiggle_found++;
276                         } else {
277                                 int newb;
278                                 int j;
279                                 assert(b < csl1[c1].b);
280                                 /* some Extraneous text */
281                                 /* length is min of unmatched on left
282                                  * and matched on right.
283                                  * However a hunk-header is not allowed
284                                  * in the middle of an extraneous section,
285                                  * only at the start.
286                                  */
287                                 rv.merger[i].type = Extraneous;
288                                 rv.merger[i].al = 0;
289                                 newb = b +
290                                         min(csl1[c1].b - b,
291                                             csl2[c2].len - (b-csl2[c2].a));
292                                 for (j = b; j < newb; j++) {
293                                         if (bf.list[j].start[0] == '\0') {
294                                                 if (wiggle_found > 1)
295                                                         rv.wiggles++;
296                                                 wiggle_found = 0;
297                                                 if (j > b)
298                                                         newb = j;
299                                         } else
300                                                 wiggle_found++;
301                                 }
302                                 rv.merger[i].cl =
303                                         rv.merger[i].bl = newb - b;
304                         }
305                 } else if (match1 && !match2) {
306                         /* some changed text
307                          * if 'c' is currently at a suitable cut-point, then
308                          * we can look for a triple-cut-point for start.
309                          * Also, if csl2[c2].b isn't in a conflict, and is
310                          * a suitable cut-point, then we could make a
311                          * triple-cut-point for end of a conflict.
312                          */
313
314                         rv.merger[i].type = Changed;
315                         rv.merger[i].bl = min(csl1[c1].b+csl1[c1].len, csl2[c2].a) - b;
316                         rv.merger[i].al = rv.merger[i].bl;
317                         rv.merger[i].cl = csl2[c2].b - c;
318                 } else if (match1 && match2) {
319                         /* Some unchanged text
320                          */
321                         rv.merger[i].type = Unchanged;
322                         rv.merger[i].bl =
323                                 min(csl1[c1].len - (b-csl1[c1].b),
324                                     csl2[c2].len - (b-csl2[c2].a));
325                         rv.merger[i].al = rv.merger[i].cl =
326                                 rv.merger[i].bl;
327                 } else {
328                         /* must be a conflict.
329                          * Move a and c to next match, and b to closest of the two
330                          */
331                         rv.merger[i].type = Conflict;
332                         rv.merger[i].al = csl1[c1].a - a;
333                         rv.merger[i].cl = csl2[c2].b - c;
334                         rv.merger[i].bl = min(csl1[c1].b, csl2[c2].a) - b;
335                         if (ignore_already &&
336                             check_alreadyapplied(af, cf, &rv.merger[i]))
337                                 rv.ignored++;
338                 }
339                 a += rv.merger[i].al;
340                 b += rv.merger[i].bl;
341                 c += rv.merger[i].cl;
342                 i++;
343
344                 while (csl1[c1].a + csl1[c1].len <= a && csl1[c1].len)
345                         c1++;
346                 assert(csl1[c1].b + csl1[c1].len >= b);
347                 while (csl2[c2].b + csl2[c2].len <= c && csl2[c2].len)
348                         c2++;
349                 assert(csl2[c2].a + csl2[c2].len >= b);
350                 if (csl1[c1].len == 0 && csl2[c2].len == 0 &&
351                     a == csl1[c1].a && b == csl1[c1].b &&
352                     b == csl2[c2].a && c == csl2[c2].b)
353                         break;
354         }
355         rv.merger[i].type = End;
356         rv.merger[i].in_conflict = 0;
357         assert(i < l);
358         rv.conflicts = isolate_conflicts(af, bf, cf, csl1, csl2, words,
359                                          rv.merger, show_wiggles);
360         if (wiggle_found)
361                 rv.wiggles++;
362         return rv;
363 }
364
365 static void printrange(FILE *out, struct file *f, int start, int len)
366 {
367         while (len > 0) {
368                 printword(out, f->list[start]);
369                 start++;
370                 len--;
371         }
372 }
373
374 struct ci print_merge2(FILE *out, struct file *a, struct file *b, struct file *c,
375                        struct csl *c1, struct csl *c2,
376                        int words, int ignore_already, int show_wiggles)
377 {
378         struct ci rv = make_merger(*a, *b, *c, c1, c2,
379                                    words, ignore_already, show_wiggles);
380         struct merge *m;
381
382         for (m = rv.merger; m->type != End ; m++) {
383                 struct merge *cm;
384                 if (do_trace)
385                         printf("[%s: %d-%d,%d-%d,%d-%d%s(%d,%d)]\n",
386                                m->type==Unmatched ? "Unmatched" :
387                                m->type==Unchanged ? "Unchanged" :
388                                m->type==Extraneous ? "Extraneous" :
389                                m->type==Changed ? "Changed" :
390                                m->type==AlreadyApplied ? "AlreadyApplied" :
391                                m->type==Conflict ? "Conflict":"unknown",
392                                m->a, m->a+m->al-1,
393                                m->b, m->b+m->bl-1,
394                                m->c, m->c+m->cl-1,
395                                m->in_conflict ? " in_conflict" : "",
396                                m->lo, m->hi);
397
398                 while (m->in_conflict) {
399                         /* need to print from 'hi' to 'lo' of next
400                          * Unchanged which is < it's hi
401                          */
402                         int found_conflict = 0;
403                         int st = 0, st1;
404                         if (m->type == Unchanged || m->type == Changed)
405                                 if (m->hi >= m->lo)
406                                         st = m->hi;
407                         st1 = st;
408
409                         if (m->type == Unchanged)
410                                 printrange(out, a, m->a+m->lo, m->hi - m->lo);
411
412                         if (do_trace)
413                                 for (cm = m; cm->in_conflict; cm++) {
414                                         printf("{%s: %d-%d,%d-%d,%d-%d%s(%d,%d)}\n",
415                                                cm->type==Unmatched?"Unmatched":
416                                                cm->type==Unchanged?"Unchanged":
417                                                cm->type==Extraneous?"Extraneous":
418                                                cm->type==Changed?"Changed":
419                                                cm->type==AlreadyApplied?"AlreadyApplied":
420                                                cm->type==Conflict?"Conflict":"unknown",
421                                                cm->a, cm->a+cm->al-1,
422                                                cm->b, cm->b+cm->bl-1,
423                                                cm->c, cm->c+cm->cl-1,
424                                                cm->in_conflict ? " in_conflict" : "",
425                                                cm->lo, cm->hi);
426                                         if ((cm->type == Unchanged || cm->type == Changed)
427                                             && cm != m && cm->lo < cm->hi)
428                                                 break;
429                                 }
430
431                         fputs(words ? "<<<---" : "<<<<<<<\n", out);
432                         for (cm = m; cm->in_conflict; cm++) {
433                                 if (cm->type == Conflict)
434                                         found_conflict = 1;
435                                 if ((cm->type == Unchanged || cm->type == Changed)
436                                     && cm != m && cm->lo < cm->hi) {
437                                         printrange(out, a, cm->a, cm->lo);
438                                         break;
439                                 }
440                                 printrange(out, a, cm->a+st1, cm->al-st1);
441                                 st1 = 0;
442                         }
443                         fputs(words ? "|||" : "|||||||\n", out);
444                         st1 = st;
445                         for (cm = m; cm->in_conflict; cm++) {
446                                 if ((cm->type == Unchanged || cm->type == Changed)
447                                     && cm != m && cm->lo < cm->hi) {
448                                         printrange(out, b, cm->b, cm->lo);
449                                         break;
450                                 }
451                                 printrange(out, b, cm->b+st1, cm->bl-st1);
452                                 st1 = 0;
453                         }
454                         fputs(words ? "===" : "=======\n", out);
455                         st1 = st;
456                         for (cm = m; cm->in_conflict; cm++) {
457                                 if (cm->type == Unchanged &&
458                                     cm != m && cm->lo < cm->hi) {
459                                         printrange(out, c, cm->c, cm->lo);
460                                         break;
461                                 }
462                                 if (cm->type == Changed)
463                                         st1 = 0; /* All of result of change must be printed */
464                                 printrange(out, c, cm->c+st1, cm->cl-st1);
465                                 st1 = 0;
466                         }
467                         if (!found_conflict) {
468                                 /* This section was wiggled in successfully,
469                                  * but full conflict display was requested.
470                                  * So now print out the wiggled result as well.
471                                  */
472                                 fputs(words ? "&&&" : "&&&&&&&\n", out);
473                                 st1 = st;
474                                 for (cm = m; cm->in_conflict; cm++) {
475                                         int last = 0;
476                                         if ((cm->type == Unchanged || cm->type == Changed)
477                                             && cm != m && cm->lo < cm->hi)
478                                                 last = 1;
479                                         switch (cm->type) {
480                                         case Unchanged:
481                                         case AlreadyApplied:
482                                         case Unmatched:
483                                                 printrange(out, a, cm->a+st1,
484                                                            last ? cm->lo : cm->al-st1);
485                                                 break;
486                                         case Extraneous:
487                                                 break;
488                                         case Changed:
489                                                 printrange(out, c, cm->c+st1,
490                                                            last ? cm->lo : cm->cl-st1);
491                                                 break;
492                                         case Conflict:
493                                         case End:
494                                                 assert(0);
495                                         }
496                                         if (last)
497                                                 break;
498                                         st1 = 0;
499                                 }
500                         }
501                         fputs(words ? "--->>>" : ">>>>>>>\n", out);
502                         m = cm;
503                         if (m->in_conflict && m->type == Unchanged
504                             && m->hi >= m->al) {
505                                 printrange(out, a, m->a+m->lo, m->hi-m->lo);
506                                 m++;
507                         }
508                 }
509
510                 /* there is always some non-conflict after a conflict,
511                  * unless we hit the end
512                  */
513                 if (m->type == End)
514                         break;
515
516                 if (do_trace) {
517                         printf("<<%s: %d-%d,%d-%d,%d-%d%s(%d,%d)>>\n",
518                                m->type==Unmatched?"Unmatched":
519                                m->type==Unchanged?"Unchanged":
520                                m->type==Extraneous?"Extraneous":
521                                m->type==Changed?"Changed":
522                                m->type==AlreadyApplied?"AlreadyApplied":
523                                m->type==Conflict?"Conflict":"unknown",
524                                m->a, m->a+m->al-1,
525                                m->b, m->b+m->bl-1,
526                                m->c, m->c+m->cl-1,
527                                m->in_conflict ? " in_conflict" : "",
528                                m->lo, m->hi);
529                 }
530
531                 switch (m->type) {
532                 case Unchanged:
533                 case AlreadyApplied:
534                 case Unmatched:
535                         printrange(out, a, m->a, m->al);
536                         break;
537                 case Extraneous:
538                         break;
539                 case Changed:
540                         printrange(out, c, m->c, m->cl);
541                         break;
542                 case Conflict:
543                 case End:
544                         assert(0);
545                 }
546         }
547         return rv;
548 }