]> git.neil.brown.name Git - wiggle.git/blob - merge2.c
Introduce xmalloc and use it.
[wiggle.git] / merge2.c
1 /*
2  * wiggle - apply rejected patches
3  *
4  * Copyright (C) 2005 Neil Brown <neilb@cse.unsw.edu.au>
5  * Copyright (C) 2010 Neil Brown <neilb@suse.de>
6  *
7  *
8  *    This program is free software; you can redistribute it and/or modify
9  *    it under the terms of the GNU General Public License as published by
10  *    the Free Software Foundation; either version 2 of the License, or
11  *    (at your option) any later version.
12  *
13  *    This program is distributed in the hope that it will be useful,
14  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *    GNU General Public License for more details.
17  *
18  *    You should have received a copy of the GNU General Public License
19  *    along with this program; if not, write to the Free Software Foundation, Inc.,
20  *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  *    Author: Neil Brown
23  *    Email: <neilb@suse.de>
24  */
25
26 #include "wiggle.h"
27 #include <stdlib.h>
28
29 /*
30  * Second attempt at merging....
31  *
32  * We want to create a mergelist which identifies 'orig' and 'after'
33  * sections (from a and c) and conflicts (which are ranges of a,b,c which
34  * all don't match).
35  * It is also helpful to differentiate 'orig' sections that aren't
36  * matched in 'b' with orig sections that are.
37  * To help with highlighting, it will be useful to know where
38  * the conflicts match the csl lists.
39  *
40  * This can all be achieved with a list of (a,b,c,c1,c1) 5-tuples.
41  * If two consecutive differ in more than one of a,b,c, it is a
42  * conflict.
43  * If only 'a' differ, it is un-matched original.
44  * If only 'b' differ, it is matched, unchanged original
45  * If only 'c' differ, it is 1
46  */
47
48 static inline int min(int a, int b)
49 {
50         return a < b ? a : b;
51 }
52 static inline void assert(int a)
53 {
54         if (!a)
55                 abort();
56 }
57
58 static int check_alreadyapplied(struct file af, struct file cf,
59                                 struct merge *m)
60 {
61         int i;
62         if (m->al != m->cl)
63                 return 0;
64         for (i = 0; i < m->al; i++) {
65                 if (af.list[m->a+i].len != cf.list[m->c+i].len)
66                         return 0;
67                 if (strncmp(af.list[m->a+i].start,
68                             cf.list[m->c+i].start,
69                             af.list[m->a+i].len) != 0)
70                         return 0;
71         }
72 #if 0
73         printf("already applied %d,%d,%d - %d,%d,%d\n",
74                m->a, m->b, m->c, m->al, m->bl, m->cl);
75         printf(" %.10s - %.10s\n", af.list[m->a].start,
76                cf.list[m->c].start);
77 #endif
78         m->type = AlreadyApplied;
79         return 1;
80 }
81
82 static int isolate_conflicts(struct file af, struct file bf, struct file cf,
83                              struct csl *csl1, struct csl *csl2, int words,
84                              struct merge *m)
85 {
86         /* A conflict indicates that something is definitely wrong
87          * and so we need to be a bit suspicious of nearby apparent matches.
88          * To display a conflict effectively we expands it's effect to
89          * include any Extraneous, Unmatched, Changed or AlreadyApplied text.
90          * Also, unless 'words', we need to include any partial lines
91          * in the Unchanged text that forms the border of a conflict.
92          *
93          * A Changed text may also border a conflict, but it can
94          * only border one conflict (where as an Unchanged can border
95          * a preceeding and a following conflict).
96          * The 'new' section of a Changed text appears in the
97          * conflict as does any part of the original before
98          * a newline.
99          *
100          */
101         int i, j, k;
102         int cnt = 0;
103
104         for (i = 0; m[i].type != End; i++)
105                 if (m[i].type == Conflict) {
106                         /* We have a conflict here.
107                          * First search backwards for an Unchanged marking
108                          * things as in_conflict.  Then find the
109                          * cut-point in the Unchanged.  If there isn't one,
110                          * keep looking.
111                          *
112                          * Then search forward doing the same thing.
113                          */
114                         cnt++;
115                         m[i].in_conflict = 1;
116                         j = i;
117                         while (--j >= 0) {
118                                 if (!m[j].in_conflict) {
119                                         m[j].in_conflict = 1;
120                                         m[j].lo = 0;
121                                 } else if (m[j].type == Changed) {
122                                         /* This can no longer form a border */
123                                         m[j].hi = -1;
124                                         /* We merge these conflicts and stop searching */
125                                         cnt--;
126                                         break;
127                                 }
128
129                                 if (m[j].type == Unchanged || m[j].type == Changed) {
130                                         if (words) {
131                                                 m[j].hi = m[j].al;
132                                                 break;
133                                         }
134                                         /* need to find the last line-break, which
135                                          * might be after the last newline, if there
136                                          * is one, or might be at the start
137                                          */
138                                         for (k = m[j].al; k > 0; k--)
139                                                 if (ends_line(af.list[m[j].a+k-1]))
140                                                         break;
141                                         if (k > 0)
142                                                 m[j].hi = k;
143                                         else if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
144                                                  (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
145                                                  (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
146                                                 m[j].hi = 0;
147                                         else
148                                                 /* no start-of-line found... */
149                                                 m[j].hi = -1;
150                                         if (m[j].hi > 0 && m[j].type == Changed) {
151                                                 /* this can only work if start is also a line break */
152                                                 if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
153                                                     (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
154                                                     (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
155                                                         /* ok */;
156                                                 else
157                                                         m[j].hi = -1;
158                                         }
159                                         if (m[j].hi >= 0)
160                                                 break;
161                                 }
162                         }
163 #if 0
164                         if (j >= 0 && m[j].in_conflict && m[j].type == Unchanged &&
165                             m[j].hi == m[j].lo) {
166                                 /* nothing to separate conflicts, merge them */
167                                 m[j].lo = 0;
168                                 m[j].hi = -1;
169                                 cnt--;
170                         }
171 #endif
172                         /* now the forward search */
173                         for (j = i+1; m[j].type != End; j++) {
174                                 m[j].in_conflict = 1;
175                                 if (m[j].type == Unchanged || m[j].type == Changed) {
176                                         m[j].hi = m[j].al;
177                                         if (words) {
178                                                 m[j].lo = 0;
179                                                 break;
180                                         }
181                                         /* need to find a line-break, which might be at
182                                          * the very beginning, or might be after the
183                                          * first newline - if there is one
184                                          */
185                                         if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
186                                             (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
187                                             (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
188                                                 m[j].lo = 0;
189                                         else {
190                                                 for (k = 0 ; k < m[j].al ; k++)
191                                                         if (ends_line(af.list[m[j].a+k]))
192                                                                 break;
193                                                 if (k < m[j].al)
194                                                         m[j].lo = k+1;
195                                                 else
196                                                         /* no start-of-line found */
197                                                         m[j].lo = m[j].al+1;
198                                         }
199                                         if (m[j].lo <= m[j].al+1 && m[j].type == Changed) {
200                                                 /* this can only work if the end is a line break */
201                                                 if (ends_line(af.list[m[j].a+m[j].al-1]) &&
202                                                     ends_line(bf.list[m[j].b+m[j].bl-1]) &&
203                                                     ends_line(cf.list[m[j].c+m[j].cl-1]))
204                                                         /* ok */;
205                                                 else
206                                                         m[j].lo = m[j].al+1;
207                                         }
208                                         if (m[j].lo < m[j].al+1)
209                                                 break;
210                                 }
211                         }
212                         i = j - 1;
213                 }
214         return cnt;
215 }
216
217 struct ci make_merger(struct file af, struct file bf, struct file cf,
218                       struct csl *csl1, struct csl *csl2, int words,
219                       int ignore_already)
220 {
221         /* find the wiggles and conflicts between csl1 and csl2
222          */
223         struct ci rv;
224         int i, l;
225         int a, b, c, c1, c2;
226         int wiggle_found = 0;
227
228         rv.conflicts = rv.wiggles = rv.ignored = 0;
229
230         for (i = 0; csl1[i].len; i++)
231                 ;
232         l = i;
233         for (i = 0; csl2[i].len; i++)
234                 ;
235         l += i;
236         /* maybe a bit of slack at each end */
237         l = l * 4 + 10;
238
239         rv.merger = xmalloc(sizeof(struct merge)*l);
240
241         a = b = c = c1 = c2 = 0;
242         i = 0;
243         while (1) {
244                 int match1, match2;
245                 match1 = (a >= csl1[c1].a && b >= csl1[c1].b); /* c1 doesn't match */
246                 match2 = (b >= csl2[c2].a && c >= csl2[c2].b);
247
248                 rv.merger[i].a = a;
249                 rv.merger[i].b = b;
250                 rv.merger[i].c = c;
251                 rv.merger[i].c1 = c1;
252                 rv.merger[i].c2 = c2;
253                 rv.merger[i].in_conflict = 0;
254
255                 if (!match1 && match2) {
256                         if (a < csl1[c1].a) {
257                                 /* some unmatched text */
258                                 rv.merger[i].type = Unmatched;
259                                 rv.merger[i].al = csl1[c1].a - a;
260                                 rv.merger[i].bl = 0;
261                                 rv.merger[i].cl = 0;
262                                 wiggle_found++;
263                         } else {
264                                 int newb;
265                                 int j;
266                                 assert(b < csl1[c1].b);
267                                 /* some Extraneous text */
268                                 /* length is min of unmatched on left
269                                  * and matched on right
270                                  */
271                                 rv.merger[i].type = Extraneous;
272                                 rv.merger[i].al = 0;
273                                 rv.merger[i].cl =
274                                         rv.merger[i].bl =
275                                         min(csl1[c1].b - b,
276                                             csl2[c2].len - (b-csl2[c2].a));
277                                 newb = b+rv.merger[i].bl;
278                                 for (j = b; j < newb; j++) {
279                                         if (bf.list[j].start[0] == '\0') {
280                                                 if (wiggle_found > 1)
281                                                         rv.wiggles++;
282                                                 wiggle_found = 0;
283                                         } else
284                                                 wiggle_found++;
285                                 }
286                         }
287                 } else if (match1 && !match2) {
288                         /* some changed text
289                          * if 'c' is currently at a suitable cut-point, then
290                          * we can look for a triple-cut-point for start.
291                          * Also, if csl2[c2].b isn't in a conflict, and is
292                          * a suitable cut-point, then we could make a
293                          * triple-cut-point for end of a conflict.
294                          */
295
296                         rv.merger[i].type = Changed;
297                         rv.merger[i].bl = min(csl1[c1].b+csl1[c1].len, csl2[c2].a) - b;
298                         rv.merger[i].al = rv.merger[i].bl;
299                         rv.merger[i].cl = csl2[c2].b - c;
300                 } else if (match1 && match2) {
301                         /* Some unchanged text
302                          */
303                         rv.merger[i].type = Unchanged;
304                         rv.merger[i].bl =
305                                 min(csl1[c1].len - (b-csl1[c1].b),
306                                     csl2[c2].len - (b-csl2[c2].a));
307                         rv.merger[i].al = rv.merger[i].cl =
308                                 rv.merger[i].bl;
309                 } else {
310                         /* must be a conflict.
311                          * Move a and c to next match, and b to closest of the two
312                          */
313                         rv.merger[i].type = Conflict;
314                         rv.merger[i].al = csl1[c1].a - a;
315                         rv.merger[i].cl = csl2[c2].b - c;
316                         rv.merger[i].bl = min(csl1[c1].b, csl2[c2].a) - b;
317                         if (ignore_already &&
318                             check_alreadyapplied(af, cf, &rv.merger[i]))
319                                 rv.ignored++;
320                 }
321                 a += rv.merger[i].al;
322                 b += rv.merger[i].bl;
323                 c += rv.merger[i].cl;
324                 i++;
325
326                 while (csl1[c1].a + csl1[c1].len <= a && csl1[c1].len)
327                         c1++;
328                 assert(csl1[c1].b + csl1[c1].len >= b);
329                 while (csl2[c2].b + csl2[c2].len <= c && csl2[c2].len)
330                         c2++;
331                 assert(csl2[c2].a + csl2[c2].len >= b);
332                 if (csl1[c1].len == 0 && csl2[c2].len == 0 &&
333                     a == csl1[c1].a && b == csl1[c1].b &&
334                     b == csl2[c2].a && c == csl2[c2].b)
335                         break;
336         }
337         rv.merger[i].type = End;
338         rv.merger[i].in_conflict = 0;
339         rv.conflicts = isolate_conflicts(af, bf, cf, csl1, csl2, words, rv.merger);
340         if (wiggle_found)
341                 rv.wiggles++;
342         return rv;
343 }
344
345 static void printrange(FILE *out, struct file *f, int start, int len)
346 {
347         while (len > 0) {
348                 printword(out, f->list[start]);
349                 start++;
350                 len--;
351         }
352 }
353
354 struct ci print_merge2(FILE *out, struct file *a, struct file *b, struct file *c,
355                        struct csl *c1, struct csl *c2,
356                        int words, int ignore_already)
357 {
358         struct ci rv = make_merger(*a, *b, *c, c1, c2,
359                                    words, ignore_already);
360         struct merge *m;
361
362         for (m = rv.merger; m->type != End ; m++) {
363                 struct merge *cm;
364 #if 1
365                 static int v = -1;
366                 if (v == -1) {
367                         if (getenv("WiggleVerbose"))
368                                 v = 1;
369                         else
370                                 v = 0;
371                 }
372                 if (v)
373                         printf("[%s: %d-%d,%d-%d,%d-%d%s(%d,%d)]\n",
374                                m->type==Unmatched ? "Unmatched" :
375                                m->type==Unchanged ? "Unchanged" :
376                                m->type==Extraneous ? "Extraneous" :
377                                m->type==Changed ? "Changed" :
378                                m->type==AlreadyApplied ? "AlreadyApplied" :
379                                m->type==Conflict ? "Conflict":"unknown",
380                                m->a, m->a+m->al-1,
381                                m->b, m->b+m->bl-1,
382                                m->c, m->c+m->cl-1,
383                                m->in_conflict ? " in_conflict" : "",
384                                m->lo, m->hi);
385 #endif
386                 while (m->in_conflict) {
387                         /* need to print from 'hi' to 'lo' of next
388                          * Unchanged which is < it's hi
389                          */
390                         int st = 0, st1;
391                         if (m->type == Unchanged || m->type == Changed)
392                                 if (m->hi >= m->lo)
393                                         st = m->hi;
394                         st1 = st;
395
396                         if (m->type == Unchanged)
397                                 printrange(out, a, m->a+m->lo, m->hi - m->lo);
398
399 #if 1
400                 if (v)
401                         for (cm = m; cm->in_conflict; cm++) {
402                                 printf("{%s: %d-%d,%d-%d,%d-%d%s(%d,%d)}\n",
403                                        cm->type==Unmatched?"Unmatched":
404                                        cm->type==Unchanged?"Unchanged":
405                                        cm->type==Extraneous?"Extraneous":
406                                        cm->type==Changed?"Changed":
407                                        cm->type==AlreadyApplied?"AlreadyApplied":
408                                        cm->type==Conflict?"Conflict":"unknown",
409                                        cm->a, cm->a+cm->al-1,
410                                        cm->b, cm->b+cm->bl-1,
411                                        cm->c, cm->c+cm->cl-1,
412                                        cm->in_conflict ? " in_conflict" : "",
413                                        cm->lo, cm->hi);
414                                 if ((cm->type == Unchanged || cm->type == Changed) && cm != m && cm->lo < cm->hi)
415                                         break;
416                         }
417 #endif
418
419                         fputs(words ? "<<<---" : "<<<<<<<\n", out);
420                         for (cm = m; cm->in_conflict; cm++) {
421                                 if ((cm->type == Unchanged || cm->type == Changed) && cm != m && cm->lo < cm->hi) {
422                                         printrange(out, a, cm->a, cm->lo);
423                                         break;
424                                 }
425                                 printrange(out, a, cm->a+st1, cm->al-st1);
426                                 st1 = 0;
427                         }
428                         fputs(words ? "|||" : "|||||||\n", out);
429                         st1 = st;
430                         for (cm = m; cm->in_conflict; cm++) {
431                                 if ((cm->type == Unchanged || cm->type == Changed) && cm != m && cm->lo < cm->hi) {
432                                         printrange(out, b, cm->b, cm->lo);
433                                         break;
434                                 }
435                                 printrange(out, b, cm->b+st1, cm->bl-st1);
436                                 st1 = 0;
437                         }
438                         fputs(words ? "===" : "=======\n", out);
439                         st1 = st;
440                         for (cm = m; cm->in_conflict; cm++) {
441                                 if (cm->type == Unchanged && cm != m && cm->lo < cm->hi) {
442                                         printrange(out, c, cm->c, cm->lo);
443                                         break;
444                                 }
445                                 if (cm->type == Changed)
446                                         st1 = 0; /* All of result of change must be printed */
447                                 printrange(out, c, cm->c+st1, cm->cl-st1);
448                                 st1 = 0;
449                         }
450                         fputs(words ? "--->>>" : ">>>>>>>\n", out);
451                         m = cm;
452                         if (m->in_conflict && m->type == Unchanged
453                             && m->hi >= m->al) {
454                                 printrange(out, a, m->a+m->lo, m->hi-m->lo);
455                                 m++;
456                         }
457                 }
458
459                 /* there is always some non-conflict after a conflict,
460                  * unless we hit the end
461                  */
462                 if (m->type == End)
463                         break;
464 #if 1
465                 if (v) {
466                         printf("<<%s: %d-%d,%d-%d,%d-%d%s(%d,%d)>>\n",
467                                m->type==Unmatched?"Unmatched":
468                                m->type==Unchanged?"Unchanged":
469                                m->type==Extraneous?"Extraneous":
470                                m->type==Changed?"Changed":
471                                m->type==AlreadyApplied?"AlreadyApplied":
472                                m->type==Conflict?"Conflict":"unknown",
473                                m->a, m->a+m->al-1,
474                                m->b, m->b+m->bl-1,
475                                m->c, m->c+m->cl-1,
476                                m->in_conflict ? " in_conflict" : "",
477                                m->lo, m->hi);
478                 }
479 #endif
480                 switch (m->type) {
481                 case Unchanged:
482                 case AlreadyApplied:
483                 case Unmatched:
484                         printrange(out, a, m->a, m->al);
485                         break;
486                 case Extraneous:
487                         break;
488                 case Changed:
489                         printrange(out, c, m->c, m->cl);
490                         break;
491                 case Conflict:
492                 case End:
493                         assert(0);
494                 }
495         }
496         return rv;
497 }