From 71063bc858cd927e3622b511297e66b3e13f7453 Mon Sep 17 00:00:00 2001
From: Dylan Cali <calid1984@gmail.com>
Date: Fri, 5 Sep 2014 04:42:02 -0500
Subject: numfmt: implement support for field ranges

* src/numfmt.c: Replace field handling code with logic that understands
field range specifiers.  Instead of processing a single field and
printing line prefix/suffix around it, process each field in the line
checking whether it has been included for conversion.  If so convert and
print, otherwise just print the unaltered field.
(extract_fields): Removed.
(skip_fields): Removed.
(process_line): Gutted and heavily reworked.
(process_suffixed_number): FIELD is now passed as an arg instead of
using a global.
(parse_field_arg): New function that parses field range specifiers.
(next_field): New function that returns pointers to the next field in
a line.
(process_field): New function that wraps the field conversion logic
(include_field): New function that checks whether a field should be
converted
(compare_field): New function used for field value comparisons in a
gl_list.
(free_field): New function used for freeing field values in a gl_list.
Global variable FIELD removed.
New global variable all_fields indicates whether all fields should be
processed.
New global variable all_fields_after stores the first field of a N-
style range.
New global variable all_fields_before stores the last field of a -M
style range.
New global variable field_list stores explicitly specified fields to
process (N N,M or N-M style specifiers).
(usage): Document newly supported field range specifiers.
* bootstrap.conf: Include xlist and linked-list modules.  numfmt now
uses the gl_linked_list implementation to store the field ranges.
* tests/misc/numfmt.pl: Add tests for 'cut style' field ranges.
Adjust existing tests as partial output can occur before an error
Remove test for the 'invalid' field -5.. this is now a valid range.
* gnulib: update to avoid compiler warnings in linked-list.
* NEWS: Mention the new feature.
---
 NEWS                 |   2 +
 bootstrap.conf       |   2 +
 doc/coreutils.texi   |  14 +-
 gnulib               |   2 +-
 src/numfmt.c         | 355 +++++++++++++++++++++++++++++++++++----------------
 tests/misc/numfmt.pl |  54 ++++----
 6 files changed, 291 insertions(+), 138 deletions(-)

diff --git a/NEWS b/NEWS
index 9d69da330..9c551d514 100644
--- a/NEWS
+++ b/NEWS
@@ -70,6 +70,8 @@ GNU coreutils NEWS                                    -*- outline -*-
   dd accepts a new status=progress level to print data transfer statistics
   on stderr approximately every second.
 
+  numfmt can now process multiple fields using field ranges similar to cut.
+
   split accepts a new --separator option to select a record separator character
   other than the default newline character.
 
diff --git a/bootstrap.conf b/bootstrap.conf
index 320e7f581..5b6ec58e5 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -34,6 +34,7 @@ gnulib_modules="
   argv-iter
   assert
   autobuild
+  linked-list
   backupfile
   base64
   buffer-lcm
@@ -270,6 +271,7 @@ gnulib_modules="
   xgetcwd
   xgetgroups
   xgethostname
+  xlist
   xmemcoll
   xnanosleep
   xprintf
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 08316c928..9197cb426 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -16892,9 +16892,19 @@ Print (to standard error) warning messages about possible erroneous usage.
 Use the character @var{d} as input field separator (default: whitespace).
 @emph{Note}: Using non-default delimiter turns off automatic padding.
 
-@item --field=@var{n}
+@item --field=@var{fields}
 @opindex --field
-Convert the number in input field @var{n} (default: 1).
+Convert the number in input field @var{fields} (default: 1).
+@var{fields} supports @command{cut} style field ranges:
+
+@example
+N    N'th field, counted from 1
+N-   from N'th field, to end of line
+N-M  from N'th to M'th field (inclusive)
+-M   from first to M'th field (inclusive)
+-    all fields
+@end example
+
 
 @item --format=@var{format}
 @opindex --format
diff --git a/gnulib b/gnulib
index 9a417cf7d..d0302f003 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 9a417cf7d48fa231c937c53626da6c45d09e6b3e
+Subproject commit d0302f003873b8c633d2023ab98aa6c4045b32e8
diff --git a/src/numfmt.c b/src/numfmt.c
index c03329f04..18243dd9f 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -29,6 +29,8 @@
 #include "system.h"
 #include "xstrtol.h"
 #include "xstrndup.h"
+#include "gl_linked_list.h"
+#include "gl_xlist.h"
 
 /* The official name of this program (e.g., no 'g' prefix).  */
 #define PROGRAM_NAME "numfmt"
@@ -182,7 +184,10 @@ static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
 /* auto-pad each line based on skipped whitespace.  */
 static int auto_padding = 0;
 static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
-static long int field = 1;
+static bool all_fields = false;
+static size_t all_fields_after = 0;
+static size_t all_fields_before = 0;
+static gl_list_t field_list;
 static int delimiter = DELIMITER_DEFAULT;
 
 /* if non-zero, the first 'header' lines from STDIN are skipped.  */
@@ -854,7 +859,8 @@ Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
   -d, --delimiter=X    use X instead of whitespace for field delimiter\n\
 "), stdout);
       fputs (_("\
-      --field=N        replace the number in input field N (default is 1)\n\
+      --field=FIELDS   replace the numbers in these input fields (default=1)\n\
+                         see FIELDS below\n\
 "), stdout);
       fputs (_("\
       --format=FORMAT  use printf style floating-point FORMAT;\n\
@@ -932,6 +938,16 @@ UNIT options:\n"), stdout);
                1Mi = 1048576,\n\
                ...\n"), stdout);
 
+      fputs (_("\n\
+FIELDS supports cut(1) style field ranges:\n\
+  N    N'th field, counted from 1\n\
+  N-   from N'th field, to end of line\n\
+  N-M  from N'th to M'th field (inclusive)\n\
+  -M   from first to M'th field (inclusive)\n\
+  -    all fields\n\
+Multiple fields/ranges can be separated with commas\n\
+"), stdout);
+
       fputs (_("\n\
 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
@@ -960,7 +976,7 @@ Examples:\n\
            -> \"1000\"\n\
   $ echo 1K | %s --from=iec\n\
            -> \"1024\"\n\
-  $ df -B1 | %s --header --field 2 --to=si\n\
+  $ df -B1 | %s --header --field 2-4 --to=si\n\
   $ ls -l  | %s --header --field 5 --to=iec\n\
   $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
   $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
@@ -1182,7 +1198,8 @@ print_padded_number (void)
 /* Converts the TEXT number string to the requested representation,
    and handles automatic suffix addition.  */
 static int
-process_suffixed_number (char *text, long double *result, size_t *precision)
+process_suffixed_number (char *text, long double *result,
+                         size_t *precision, long int field)
 {
   if (suffix && strlen (text) > strlen (suffix))
     {
@@ -1233,139 +1250,253 @@ process_suffixed_number (char *text, long double *result, size_t *precision)
   return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
 }
 
-/* Skip the requested number of fields in the input string.
-   Returns a pointer to the *delimiter* of the requested field,
-   or a pointer to NUL (if reached the end of the string).  */
-static inline char * _GL_ATTRIBUTE_PURE
-skip_fields (char *buf, int fields)
+typedef struct range_pair
 {
-  char *ptr = buf;
-  if (delimiter != DELIMITER_DEFAULT)
-    {
-      if (*ptr == delimiter)
-        fields--;
-      while (*ptr && fields--)
-        {
-          while (*ptr && *ptr == delimiter)
-            ++ptr;
-          while (*ptr && *ptr != delimiter)
-            ++ptr;
-        }
-    }
-  else
-    while (*ptr && fields--)
-      {
-        while (*ptr && isblank (to_uchar (*ptr)))
-          ++ptr;
-        while (*ptr && !isblank (to_uchar (*ptr)))
-          ++ptr;
-      }
-  return ptr;
+  size_t lo;
+  size_t hi;
+} range_pair_t;
+
+static int
+sort_field (const void *elt1, const void *elt2)
+{
+  range_pair_t* rp1 = (range_pair_t*) elt1;
+  range_pair_t* rp2 = (range_pair_t*) elt2;
+
+  if (rp1->lo < rp2->lo)
+    return -1;
+
+  return rp1->lo > rp2->lo;
 }
 
-/* Parse a delimited string, and extracts the requested field.
-   NOTE: the input buffer is modified.
+static int
+match_field (const void *elt1, const void *elt2)
+{
+  range_pair_t* rp = (range_pair_t*) elt1;
+  size_t field = *(size_t*) elt2;
 
-   TODO:
-     Maybe support multiple fields, though can always pipe output
-     into another numfmt to process other fields.
-     Maybe default to processing all fields rather than just first?
+  if (rp->lo <= field && field <= rp->hi)
+    return 0;
+
+  if (rp->lo < field)
+    return -1;
+
+  return 1;
+}
 
-   Output:
-     _PREFIX, _DATA, _SUFFIX will point to the relevant positions
-     in the input string, or be NULL if such a part doesn't exist.  */
 static void
-extract_fields (char *line, int _field,
-                char ** _prefix, char ** _data, char ** _suffix)
+free_field (const void *elt)
 {
-  char *ptr = line;
-  *_prefix = NULL;
-  *_data = NULL;
-  *_suffix = NULL;
+  void *p = (void *)elt;
+  free (p);
+}
 
-  devmsg ("extracting Fields:\n  input: %s\n  field: %d\n",
-          quote (line), _field);
+/* Add the specified fields to field_list.
+   The format recognized is similar to cut.
+   TODO: Refactor the more performant cut implementation
+   for use by both utilities.  */
+static void
+parse_field_arg (char *optarg)
+{
 
-  if (field > 1)
+  char *start, *end;
+  range_pair_t *rp;
+  size_t field_val;
+  size_t range_val = 0;
+
+  start = end = optarg;
+
+  if (STREQ (optarg, "-"))
     {
-      /* skip the requested number of fields.  */
-      *_prefix = line;
-      ptr = skip_fields (line, field - 1);
-      if (*ptr == '\0')
-        {
-          /* not enough fields in the input - print warning?  */
-          devmsg ("  TOO FEW FIELDS!\n  prefix: %s\n", quote (*_prefix));
-          return;
-        }
+      all_fields = true;
 
-      *ptr = '\0';
-      ++ptr;
+      return;
     }
 
-  *_data = ptr;
-  *_suffix = skip_fields (*_data, 1);
-  if (**_suffix)
+  if (*start == '-')
     {
-      /* there is a suffix (i.e., the field is not the last on the line),
-         so null-terminate the _data before it.  */
-      **_suffix = '\0';
-      ++(*_suffix);
+      /* range -M */
+      ++start;
+
+      all_fields_before = strtol (start, &end, 10);
+
+      if (start == end || all_fields_before <=0)
+        error (EXIT_FAILURE, 0, _("invalid field value %s"),
+               quote (start));
+
+      return;
     }
-  else
-    *_suffix = NULL;
 
-  devmsg ("  prefix: %s\n  number: %s\n  suffix: %s\n",
-          quote_n (0, *_prefix ? *_prefix : ""),
-          quote_n (1, *_data),
-          quote_n (2, *_suffix ? *_suffix : ""));
-}
+  field_list = gl_list_create_empty (GL_LINKED_LIST,
+                                     NULL, NULL, free_field, false);
 
+  while (*end != '\0') {
+    field_val = strtol (start, &end, 10);
 
-/* Convert a number in a given line of text.
-   NEWLINE specifies whether to output a '\n' for this "line".  */
-static int
-process_line (char *line, bool newline)
-{
-  char *pre, *num, *suf;
-  long double val = 0;
-  size_t precision = 0;
-  int valid_number = 0;
+    if (start == end || field_val <=0)
+      error (EXIT_FAILURE, 0, _("invalid field value %s"),
+             quote (start));
 
-  extract_fields (line, field, &pre, &num, &suf);
-  if (!num)
-    if (inval_style != inval_ignore)
-      error (conv_exit_code, 0, _("input line is too short, "
-                                  "no numbers found to convert in field %ld"),
-           field);
+    if (! range_val)
+      {
+        /* field N */
+        rp = xmalloc (sizeof (*rp));
+        rp->lo = rp->hi = field_val;
+        gl_sortedlist_add (field_list, sort_field, rp);
+      }
+    else
+      {
+        /* range N-M
+           The last field was the start of the field range. The current
+           field is the end of the field range.  We already added the
+           start field, so increment and add all the fields through
+           range end. */
+        if (field_val < range_val)
+          error (EXIT_FAILURE, 0, _("invalid decreasing range"));
+        rp = xmalloc (sizeof (*rp));
+        rp->lo = range_val + 1;
+        rp->hi = field_val;
+        gl_sortedlist_add (field_list, sort_field, rp);
+
+        range_val = 0;
+      }
 
-  if (num)
-    {
-      valid_number = process_suffixed_number (num, &val, &precision);
-      if (valid_number)
-        valid_number = prepare_padded_number (val, precision);
+    switch (*end) {
+      case ',':
+        /* discrete field separator */
+        ++end;
+        start = end;
+        break;
+
+      case '-':
+        /* field range separator */
+        ++end;
+        start = end;
+        range_val = field_val;
+        break;
     }
+  }
 
-  if (pre)
-    fputs (pre, stdout);
+  if (range_val)
+    {
+      /* range N-
+         range_val was not reset indicating optarg
+         ended with a trailing '-' */
+      all_fields_after = range_val;
+    }
+}
 
-  if (pre && num)
-    fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
+/* Return a pointer to the beginning of the next field in line.
+   The line pointer is moved to the end of the next field. */
+static char*
+next_field (char **line)
+{
+  char *field_start = *line;
+  char *field_end   = field_start;
 
-  if (valid_number)
+  if (delimiter != DELIMITER_DEFAULT)
     {
-      print_padded_number ();
+      if (*field_start != delimiter)
+        {
+          while (*field_end && *field_end != delimiter)
+            ++field_end;
+        }
+      /* else empty field */
     }
   else
     {
-      if (num)
-        fputs (num, stdout);
+      /* keep any space prefix in the returned field */
+      while (*field_end && isblank (to_uchar (*field_end)))
+        ++field_end;
+
+      while (*field_end && !isblank (to_uchar (*field_end)))
+        ++field_end;
     }
 
-  if (suf)
+  *line = field_end;
+  return field_start;
+}
+
+static bool
+include_field (size_t field)
+{
+  if (all_fields)
+    return true;
+
+  if (all_fields_after && all_fields_after <= field)
+    return true;
+
+  if (all_fields_before && field <= all_fields_before)
+    return true;
+
+  /* default to field 1 */
+  if (! field_list)
+    return field == 1;
+
+  return gl_sortedlist_search (field_list, match_field, &field);
+}
+
+/* Convert and output the given field. If it is not included in the set
+   of fields to process just output the original */
+static bool
+process_field (char *text, size_t field)
+{
+  long double val = 0;
+  size_t precision = 0;
+  bool valid_number = true;
+
+  if (include_field (field))
     {
-      fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
-      fputs (suf, stdout);
+      valid_number =
+        process_suffixed_number (text, &val, &precision, field);
+
+      if (valid_number)
+        valid_number = prepare_padded_number (val, precision);
+
+      if (valid_number)
+        print_padded_number ();
+      else
+        fputs (text, stdout);
     }
+  else
+    fputs (text, stdout);
+
+  return valid_number;
+}
+
+/* Convert number in a given line of text.
+   NEWLINE specifies whether to output a '\n' for this "line".  */
+static int
+process_line (char *line, bool newline)
+{
+  char *next;
+  size_t field = 0;
+  bool valid_number = true;
+
+  while (true) {
+    ++field;
+    next = next_field (&line);
+
+    if (*line != '\0')
+      {
+        /* nul terminate the current field string and process */
+        *line = '\0';
+
+        if (! process_field (next, field))
+          valid_number = false;
+
+        fputc ((delimiter == DELIMITER_DEFAULT) ?
+               ' ' : delimiter, stdout);
+        ++line;
+      }
+    else
+      {
+        /* end of the line, process the last field and finish */
+        if (! process_field (next, field))
+          valid_number = false;
+
+        break;
+      }
+  }
 
   if (newline)
     putchar ('\n');
@@ -1441,10 +1572,12 @@ main (int argc, char **argv)
           break;
 
         case FIELD_OPTION:
-          if (xstrtol (optarg, NULL, 10, &field, "") != LONGINT_OK
-              || field <= 0)
-            error (EXIT_FAILURE, 0, _("invalid field value %s"),
-                   quote (optarg));
+          if (all_fields || all_fields_before || all_fields_after || field_list)
+            {
+              error (EXIT_FAILURE, 0,
+                     _("multiple field specifications"));
+            }
+          parse_field_arg (optarg);
           break;
 
         case 'd':
@@ -1556,10 +1689,14 @@ main (int argc, char **argv)
         error (0, errno, _("error reading input"));
     }
 
+#ifdef lint
   free (padding_buffer);
   free (format_str_prefix);
   free (format_str_suffix);
 
+  if (field_list)
+    gl_list_free (field_list);
+#endif
 
   if (debug && !valid_numbers)
     error (0, 0, _("failed to convert some of the input numbers"));
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index e8640c0f7..630d18707 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -194,21 +194,16 @@ my @Tests =
      ['delim-3', '--delimiter=" " --from=auto "40M Foo"',{OUT=>'40000000 Foo'}],
      ['delim-4', '--delimiter=: --from=auto 40M:60M',  {OUT=>'40000000:60M'}],
      ['delim-5', '-d: --field=2 --from=auto :40M:60M',  {OUT=>':40000000:60M'}],
-     ['delim-6', '--delimiter=: --field 3 --from=auto 40M:60M',
-             {EXIT=>2},
-             {ERR=>"$prog: input line is too short, no numbers found " .
-                   "to convert in field 3\n"}],
+     ['delim-6', '-d: --field 3 --from=auto 40M:60M', {OUT=>"40M:60M"}],
 
      #Fields
      ['field-1', '--field A',
              {ERR => "$prog: invalid field value 'A'\n"},
              {EXIT => '1'}],
-     ['field-1.1', '--field -5',
-             {ERR => "$prog: invalid field value '-5'\n"},
-             {EXIT => '1'}],
      ['field-2', '--field 2 --from=auto "Hello 40M World 90G"',
              {OUT=>'Hello 40000000 World 90G'}],
      ['field-3', '--field 3 --from=auto "Hello 40M World 90G"',
+             {OUT=>"Hello 40M "},
              {ERR=>"$prog: invalid number: 'World'\n"},
              {EXIT => 2},],
      # Last field - no text after number
@@ -223,10 +218,32 @@ my @Tests =
              {OUT=>"Hello:40000000:World:90G"}],
 
      # not enough fields
-     ['field-8', '--field 3 --to=si "Hello World"',
-             {EXIT=>2},
-             {ERR=>"$prog: input line is too short, no numbers found " .
-                   "to convert in field 3\n"}],
+     ['field-8', '--field 3 --to=si "Hello World"', {OUT=>"Hello World"}],
+
+     # Multiple fields
+     ['field-range-1', '--field 2,4 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1000 2.0K 3000 4.0K 5000"}],
+
+     ['field-range-2', '--field 2-4 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1000 2.0K 3.0K 4.0K 5000"}],
+
+     ['field-range-3', '--field 1,2,3-5 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
+
+     ['field-range-4', '--field 1-5 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
+
+     ['field-range-5', '--field 1-3,5 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4000 5.0K"}],
+
+     ['field-range-6', '--field 3- --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1000 2000 3.0K 4.0K 5.0K"}],
+
+     ['field-range-7', '--field -3 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4000 5000"}],
+
+     ['all-fields-1', '--field=- --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
 
      # Auto-consume white-space, setup auto-padding
      ['whitespace-1', '--to=si --field 2 "A    500 B"', {OUT=>"A    500 B"}],
@@ -679,9 +696,6 @@ my @Tests =
      ['devdebug-11', '---debug --format "%\'-10f" 10000',{OUT=>"10000     "},
              {ERR=>""},
              {ERR_SUBST=>"s/.*//msg"}],
-     ['devdebug-12', '---debug --field 2 A',{OUT=>""},
-             {ERR=>""}, {EXIT=>2},
-             {ERR_SUBST=>"s/.*//msg"}],
 
      # Invalid parameters
      ['help-1', '--foobar',
@@ -787,11 +801,6 @@ my @Tests =
              {ERR => "$prog: invalid number: 'World'\n"},
              {OUT => "Hello 40M World 90G\n"},
              {EXIT => 2}],
-     ['ign-err-6', '--invalid=fail --field 3 --to=si "Hello World"',
-             {ERR => "$prog: input line is too short, no numbers found " .
-                     "to convert in field 3\n"},
-             {OUT => "Hello World\n"},
-             {EXIT => 2}],
      ['ign-err-7', '--invalid=fail --from=si "foo"',
              {ERR => "$prog: invalid number: 'foo'\n"},
              {OUT => "foo\n"},
@@ -855,13 +864,6 @@ my @Tests =
              {OUT => "A 1000 x\nB Foo y\nC 2.8G z\n"},
              {ERR => "$prog: invalid number: 'Foo'\n"},
              {EXIT => 2}],
-     # one of the lines is too short
-     ['ign-err-m3.2', '--invalid=fail --field 2 --from=si --to=iec',
-             {IN_PIPE => "A 1K x\nB\nC 3G z\n"},
-             {OUT => "A 1000 x\nB\nC 2.8G z\n"},
-             {ERR => "$prog: input line is too short, no numbers found " .
-                     "to convert in field 2\n"},
-             {EXIT => 2}],
     );
 
 my @Locale_Tests =
-- 
cgit v1.2.3-70-g09d2