core/coreutils/coreutils-uniq.patch.new


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

--- old/src/uniq.c	2022-04-08 13:22:18.000000000 +0200
+++ new/src/uniq.c	2022-05-15 08:30:20.222140293 +0200
@@ -53,6 +53,9 @@
 /* Number of fields to skip on each line when doing comparisons. */
 static size_t skip_fields;
 
+/* Number of fields to compare. */
+static size_t check_fields;
+
 /* Number of chars to skip after skipping any fields. */
 static size_t skip_chars;
 
@@ -148,6 +151,7 @@
   {"ignore-case", no_argument, NULL, 'i'},
   {"unique", no_argument, NULL, 'u'},
   {"skip-fields", required_argument, NULL, 'f'},
+  {"check-fields", required_argument, NULL, 'm'},
   {"skip-chars", required_argument, NULL, 's'},
   {"check-chars", required_argument, NULL, 'w'},
   {"zero-terminated", no_argument, NULL, 'z'},
@@ -195,6 +199,11 @@
 "), stdout);
      fputs (_("\
   -i, --ignore-case     ignore differences in case when comparing\n\
+"), stdout);
+     fputs (_("\
+  -m, --check-fields=N  compare no more than N fields\n\
+"), stdout);
+     fputs (_("\
   -s, --skip-chars=N    avoid comparing the first N characters\n\
   -u, --unique          only print unique lines\n\
 "), stdout);
@@ -254,7 +263,7 @@
 
 ATTRIBUTE_PURE
 static char *
-find_field (struct linebuffer const *line)
+find_field (struct linebuffer const *line, size_t *len)
 {
   size_t count;
   char const *lp = line->buffer;
@@ -271,6 +280,21 @@
 
   i += MIN (skip_chars, size - i);
 
+  if (check_fields == 0)
+    (*len) = size;
+  else 
+    {
+      (*len) = i;
+      for (count = 0; count < check_fields && (*len) < size; count ++)
+        {
+          while ((*len) < size && field_sep (lp[*len]))
+            (*len)++;
+          while ((*len) < size && !field_sep (lp[*len]))
+            (*len)++;
+        }
+    }
+  (*len) = (*len) - i;
+
   return line->buffer + i;
 }
 
@@ -366,8 +390,7 @@
           if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
             break;
 
-          thisfield = find_field (thisline);
-          thislen = thisline->length - 1 - (thisfield - thisline->buffer);
+          thisfield = find_field (thisline,&thislen);
 
           new_group = (!prevfield
                        || different (thisfield, prevfield, thislen, prevlen));
@@ -401,8 +424,7 @@
 
       if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
         goto closefiles;
-      prevfield = find_field (prevline);
-      prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
+      prevfield = find_field (prevline,&prevlen);
 
       while (!feof (stdin))
         {
@@ -415,8 +437,7 @@
                 goto closefiles;
               break;
             }
-          thisfield = find_field (thisline);
-          thislen = thisline->length - 1 - (thisfield - thisline->buffer);
+          thisfield = find_field (thisline,&thislen);
           match = !different (thisfield, prevfield, thislen, prevlen);
           match_count += match;
 
@@ -496,6 +517,7 @@
 
   skip_chars = 0;
   skip_fields = 0;
+  check_fields = 0;
   check_chars = SIZE_MAX;
   output_unique = output_first_repeated = true;
   output_later_repeated = false;
@@ -511,7 +533,7 @@
       if (optc == -1
           || (posixly_correct && nfiles != 0)
           || ((optc = getopt_long (argc, argv,
-                                   "-0123456789Dcdf:is:uw:z", longopts, NULL))
+                                   "-0123456789Dcdf:im:s:uw:z", longopts, NULL))
               == -1))
         {
           if (argc <= optind)
@@ -605,6 +627,11 @@
           ignore_case = true;
           break;
 
+        case 'm':
+          check_fields = size_opt (optarg,
+                                  N_("invalid number of fields to compare"));
+          break;
+
         case 's':
           skip_chars = size_opt (optarg,
                                  N_("invalid number of bytes to skip"));