]> git.neil.brown.name Git - wiggle.git/commitdiff
Introduce --non-space option
authorNeilBrown <neil@brown.name>
Sat, 29 Aug 2020 08:15:52 +0000 (18:15 +1000)
committerNeilBrown <neil@brown.name>
Sat, 29 Aug 2020 08:33:26 +0000 (18:33 +1000)
This can significantly reduce the number of words by treating
punctuation as part of the surrounding word, rather than as single-char
words.
Fewer words can mean much faster comparisons.

Signed-off-by: NeilBrown <neil@brown.name>
ReadMe.c
split.c
wiggle.1
wiggle.c
wiggle.h

index 05d8ec2f94ac6b42280fa44c4048f46bd228e992..847e6973d8370ed740c3b4307b3023bb1318e710 100644 (file)
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -58,9 +58,10 @@ struct option long_options[] = {
        {"no-ignore",   0, 0, 'i'},
        {"show-wiggles",0, 0, 'W'},
        {"ignore-blanks",0,0, 'b'},
-       {"no-backup",   0, 0, NO_BACKUP },
-       {"self-test",   0, 0, SELF_TEST},
+       {"no-backup",   0, 0, NO_BACKUP },
+       {"self-test",   0, 0, SELF_TEST},
        {"report-wiggles", 0, 0, REPORT_WIGGLES},
+       {"non-space",   0, 0, NON_SPACE},
        {0, 0, 0, 0}
 };
 
@@ -84,6 +85,7 @@ char Help[] =  "\n"
 "\n"
 "   --words     -w    : word-wise diff and merge.\n"
 "   --lines     -l    : line-wise diff and merge.\n"
+"   --non-space       : words are separated by spaces.\n"
 "\n"
 "   --patch     -p    : treat last file as a patch file.\n"
 "   -1  -2  -3        : select which component of patch or merge to use.\n"
diff --git a/split.c b/split.c
index 27f9b7e7acb573dc4bbe3d4b269fe7263f7721a1..a9d66b01d52cc12a2e49be9cdc33e2cdc02b28e8 100644 (file)
--- a/split.c
+++ b/split.c
@@ -58,7 +58,7 @@ static int split_internal(char *start, char *end, int type,
                char *cp2;
                int prefix = 0;
 
-               if (type == (ByWord | IgnoreBlanks))
+               if ((type & ByWord) && (type & IgnoreBlanks))
                        while (cp < end &&
                               (*cp == ' ' || *cp == '\t')) {
                                prefix++;
@@ -79,24 +79,28 @@ static int split_internal(char *start, char *end, int type,
                                        cp++;
                                break;
                        case ByWord:
-                               if (isalnum(*cp) || *cp == '_') {
+                               if (*cp == ' ' || *cp == '\t') {
                                        do
                                                cp++;
                                        while (cp < end
-                                              && (isalnum(*cp)
-                                                  || *cp == '_'));
-                               } else if (*cp == ' ' || *cp == '\t') {
+                                              && (*cp == ' '
+                                                  || *cp == '\t'));
+                               } else if ((type & WholeWord) ||
+                                          isalnum(*cp) || *cp == '_') {
                                        do
                                                cp++;
                                        while (cp < end
-                                              && (*cp == ' '
-                                                  || *cp == '\t'));
+                                              && (((type & WholeWord)
+                                                   && *cp != ' ' && *cp != '\t'
+                                                   && *cp != '\n')
+                                                  || isalnum(*cp)
+                                                  || *cp == '_'));
                                } else
                                        cp++;
                                break;
                        }
                cp2 = cp;
-               if (type == (ByWord | IgnoreBlanks) &&
+               if ((type & ByWord) && (type & IgnoreBlanks) &&
                    *start && *start != '\n')
                        while (cp2 < end &&
                               (*cp2 == ' ' || *cp2 == '\t' || *cp2 == '\n')) {
index f14166f229aa8309bdfba67a85f04f74787a9fc2..89047cb1303e925944a6c4a164e8a7f081453ce8 100644 (file)
--- a/wiggle.1
+++ b/wiggle.1
@@ -144,6 +144,11 @@ patch to be wiggled in to place.
 Request that all operations and display be word based.  This is the
 default for the "diff" function.
 .TP
+.BR \-\-non\-space
+Request that words be defined as sequences of non-white-space.  Without
+this flag words are sequences of alphanumerics or single non-white-space
+characters.
+.TP
 .BR \-l ", " \-\-lines
 Request that all operations and display be line based.
 .TP
index aef2d7596ac665372fe5be9ca590839243ec9dfd..2347ffe9a65c03a8c7a898d0c9a6b5465970c3fb 100644 (file)
--- a/wiggle.c
+++ b/wiggle.c
@@ -759,6 +759,10 @@ int main(int argc, char *argv[])
                                Cmd, mode, opt);
                        exit(2);
 
+               case NON_SPACE:
+                       ignore_blanks |= WholeWord;
+                       continue;
+
                case 'w':
                case 'l':
                        if (obj == 0 || obj == opt) {
@@ -784,7 +788,7 @@ int main(int argc, char *argv[])
                        continue;
 
                case 'b':
-                       ignore_blanks = IgnoreBlanks;
+                       ignore_blanks |= IgnoreBlanks;
                        continue;
 
                case 'i':
index 1befdd58ab6f3aabb83c9338d173826cd3428678..dfde4b7e28f3baf97a1bce8440e3acf66fc6b327 100644 (file)
--- a/wiggle.h
+++ b/wiggle.h
@@ -199,8 +199,9 @@ extern char short_options[];
 extern struct option long_options[];
 enum other_options {
        SELF_TEST = 300,
-       REPORT_WIGGLES = 301,
+       REPORT_WIGGLES,
        NO_BACKUP,
+       NON_SPACE,
 };
 extern char Usage[];
 extern char Help[];
@@ -216,4 +217,5 @@ enum {
        ByWord = 1,
        ByMask = 3,
        IgnoreBlanks = 8, /* 'or'ed in */
+       WholeWord = 16,
 };