summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDylan Cali <calid1984@gmail.com>2014-09-05 04:42:02 -0500
committerPádraig Brady <P@draigBrady.com>2015-06-19 19:59:21 +0100
commit71063bc858cd927e3622b511297e66b3e13f7453 (patch)
treef7574dc51bb55b27881a2d063fb37c76198a4be4
parent5863426dcfec2336cf0e1a28255e9080889fcb4c (diff)
downloadcoreutils-71063bc858cd927e3622b511297e66b3e13f7453.tar.xz
numfmt: implement support for field ranges
* src/numfmt.c: Replace field handling code with logic that understands field range specifiers. Instead of processing a single field and printing line prefix/suffix around it, process each field in the line checking whether it has been included for conversion. If so convert and print, otherwise just print the unaltered field. (extract_fields): Removed. (skip_fields): Removed. (process_line): Gutted and heavily reworked. (process_suffixed_number): FIELD is now passed as an arg instead of using a global. (parse_field_arg): New function that parses field range specifiers. (next_field): New function that returns pointers to the next field in a line. (process_field): New function that wraps the field conversion logic (include_field): New function that checks whether a field should be converted (compare_field): New function used for field value comparisons in a gl_list. (free_field): New function used for freeing field values in a gl_list. Global variable FIELD removed. New global variable all_fields indicates whether all fields should be processed. New global variable all_fields_after stores the first field of a N- style range. New global variable all_fields_before stores the last field of a -M style range. New global variable field_list stores explicitly specified fields to process (N N,M or N-M style specifiers). (usage): Document newly supported field range specifiers. * bootstrap.conf: Include xlist and linked-list modules. numfmt now uses the gl_linked_list implementation to store the field ranges. * tests/misc/numfmt.pl: Add tests for 'cut style' field ranges. Adjust existing tests as partial output can occur before an error Remove test for the 'invalid' field -5.. this is now a valid range. * gnulib: update to avoid compiler warnings in linked-list. * NEWS: Mention the new feature.
-rw-r--r--NEWS2
-rw-r--r--bootstrap.conf2
-rw-r--r--doc/coreutils.texi14
m---------gnulib0
-rw-r--r--src/numfmt.c355
-rwxr-xr-xtests/misc/numfmt.pl54
6 files changed, 290 insertions, 137 deletions
diff --git a/NEWS b/NEWS
index 9d69da330..9c551d514 100644
--- a/NEWS
+++ b/NEWS
@@ -70,6 +70,8 @@ GNU coreutils NEWS -*- outline -*-
dd accepts a new status=progress level to print data transfer statistics
on stderr approximately every second.
+ numfmt can now process multiple fields using field ranges similar to cut.
+
split accepts a new --separator option to select a record separator character
other than the default newline character.
diff --git a/bootstrap.conf b/bootstrap.conf
index 320e7f581..5b6ec58e5 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -34,6 +34,7 @@ gnulib_modules="
argv-iter
assert
autobuild
+ linked-list
backupfile
base64
buffer-lcm
@@ -270,6 +271,7 @@ gnulib_modules="
xgetcwd
xgetgroups
xgethostname
+ xlist
xmemcoll
xnanosleep
xprintf
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 08316c928..9197cb426 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -16892,9 +16892,19 @@ Print (to standard error) warning messages about possible erroneous usage.
Use the character @var{d} as input field separator (default: whitespace).
@emph{Note}: Using non-default delimiter turns off automatic padding.
-@item --field=@var{n}
+@item --field=@var{fields}
@opindex --field
-Convert the number in input field @var{n} (default: 1).
+Convert the number in input field @var{fields} (default: 1).
+@var{fields} supports @command{cut} style field ranges:
+
+@example
+N N'th field, counted from 1
+N- from N'th field, to end of line
+N-M from N'th to M'th field (inclusive)
+-M from first to M'th field (inclusive)
+- all fields
+@end example
+
@item --format=@var{format}
@opindex --format
diff --git a/gnulib b/gnulib
-Subproject 9a417cf7d48fa231c937c53626da6c45d09e6b3
+Subproject d0302f003873b8c633d2023ab98aa6c4045b32e
diff --git a/src/numfmt.c b/src/numfmt.c
index c03329f04..18243dd9f 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -29,6 +29,8 @@
#include "system.h"
#include "xstrtol.h"
#include "xstrndup.h"
+#include "gl_linked_list.h"
+#include "gl_xlist.h"
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "numfmt"
@@ -182,7 +184,10 @@ static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
/* auto-pad each line based on skipped whitespace. */
static int auto_padding = 0;
static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
-static long int field = 1;
+static bool all_fields = false;
+static size_t all_fields_after = 0;
+static size_t all_fields_before = 0;
+static gl_list_t field_list;
static int delimiter = DELIMITER_DEFAULT;
/* if non-zero, the first 'header' lines from STDIN are skipped. */
@@ -854,7 +859,8 @@ Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
-d, --delimiter=X use X instead of whitespace for field delimiter\n\
"), stdout);
fputs (_("\
- --field=N replace the number in input field N (default is 1)\n\
+ --field=FIELDS replace the numbers in these input fields (default=1)\n\
+ see FIELDS below\n\
"), stdout);
fputs (_("\
--format=FORMAT use printf style floating-point FORMAT;\n\
@@ -933,6 +939,16 @@ UNIT options:\n"), stdout);
...\n"), stdout);
fputs (_("\n\
+FIELDS supports cut(1) style field ranges:\n\
+ N N'th field, counted from 1\n\
+ N- from N'th field, to end of line\n\
+ N-M from N'th to M'th field (inclusive)\n\
+ -M from first to M'th field (inclusive)\n\
+ - all fields\n\
+Multiple fields/ranges can be separated with commas\n\
+"), stdout);
+
+ fputs (_("\n\
FORMAT must be suitable for printing one floating-point argument '%f'.\n\
Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
@@ -960,7 +976,7 @@ Examples:\n\
-> \"1000\"\n\
$ echo 1K | %s --from=iec\n\
-> \"1024\"\n\
- $ df -B1 | %s --header --field 2 --to=si\n\
+ $ df -B1 | %s --header --field 2-4 --to=si\n\
$ ls -l | %s --header --field 5 --to=iec\n\
$ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
$ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
@@ -1182,7 +1198,8 @@ print_padded_number (void)
/* Converts the TEXT number string to the requested representation,
and handles automatic suffix addition. */
static int
-process_suffixed_number (char *text, long double *result, size_t *precision)
+process_suffixed_number (char *text, long double *result,
+ size_t *precision, long int field)
{
if (suffix && strlen (text) > strlen (suffix))
{
@@ -1233,139 +1250,253 @@ process_suffixed_number (char *text, long double *result, size_t *precision)
return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
}
-/* Skip the requested number of fields in the input string.
- Returns a pointer to the *delimiter* of the requested field,
- or a pointer to NUL (if reached the end of the string). */
-static inline char * _GL_ATTRIBUTE_PURE
-skip_fields (char *buf, int fields)
+typedef struct range_pair
{
- char *ptr = buf;
- if (delimiter != DELIMITER_DEFAULT)
- {
- if (*ptr == delimiter)
- fields--;
- while (*ptr && fields--)
- {
- while (*ptr && *ptr == delimiter)
- ++ptr;
- while (*ptr && *ptr != delimiter)
- ++ptr;
- }
- }
- else
- while (*ptr && fields--)
- {
- while (*ptr && isblank (to_uchar (*ptr)))
- ++ptr;
- while (*ptr && !isblank (to_uchar (*ptr)))
- ++ptr;
- }
- return ptr;
+ size_t lo;
+ size_t hi;
+} range_pair_t;
+
+static int
+sort_field (const void *elt1, const void *elt2)
+{
+ range_pair_t* rp1 = (range_pair_t*) elt1;
+ range_pair_t* rp2 = (range_pair_t*) elt2;
+
+ if (rp1->lo < rp2->lo)
+ return -1;
+
+ return rp1->lo > rp2->lo;
}
-/* Parse a delimited string, and extracts the requested field.
- NOTE: the input buffer is modified.
+static int
+match_field (const void *elt1, const void *elt2)
+{
+ range_pair_t* rp = (range_pair_t*) elt1;
+ size_t field = *(size_t*) elt2;
- TODO:
- Maybe support multiple fields, though can always pipe output
- into another numfmt to process other fields.
- Maybe default to processing all fields rather than just first?
+ if (rp->lo <= field && field <= rp->hi)
+ return 0;
+
+ if (rp->lo < field)
+ return -1;
+
+ return 1;
+}
- Output:
- _PREFIX, _DATA, _SUFFIX will point to the relevant positions
- in the input string, or be NULL if such a part doesn't exist. */
static void
-extract_fields (char *line, int _field,
- char ** _prefix, char ** _data, char ** _suffix)
+free_field (const void *elt)
{
- char *ptr = line;
- *_prefix = NULL;
- *_data = NULL;
- *_suffix = NULL;
+ void *p = (void *)elt;
+ free (p);
+}
- devmsg ("extracting Fields:\n input: %s\n field: %d\n",
- quote (line), _field);
+/* Add the specified fields to field_list.
+ The format recognized is similar to cut.
+ TODO: Refactor the more performant cut implementation
+ for use by both utilities. */
+static void
+parse_field_arg (char *optarg)
+{
- if (field > 1)
+ char *start, *end;
+ range_pair_t *rp;
+ size_t field_val;
+ size_t range_val = 0;
+
+ start = end = optarg;
+
+ if (STREQ (optarg, "-"))
{
- /* skip the requested number of fields. */
- *_prefix = line;
- ptr = skip_fields (line, field - 1);
- if (*ptr == '\0')
- {
- /* not enough fields in the input - print warning? */
- devmsg (" TOO FEW FIELDS!\n prefix: %s\n", quote (*_prefix));
- return;
- }
+ all_fields = true;
- *ptr = '\0';
- ++ptr;
+ return;
}
- *_data = ptr;
- *_suffix = skip_fields (*_data, 1);
- if (**_suffix)
+ if (*start == '-')
{
- /* there is a suffix (i.e., the field is not the last on the line),
- so null-terminate the _data before it. */
- **_suffix = '\0';
- ++(*_suffix);
+ /* range -M */
+ ++start;
+
+ all_fields_before = strtol (start, &end, 10);
+
+ if (start == end || all_fields_before <=0)
+ error (EXIT_FAILURE, 0, _("invalid field value %s"),
+ quote (start));
+
+ return;
}
- else
- *_suffix = NULL;
- devmsg (" prefix: %s\n number: %s\n suffix: %s\n",
- quote_n (0, *_prefix ? *_prefix : ""),
- quote_n (1, *_data),
- quote_n (2, *_suffix ? *_suffix : ""));
-}
+ field_list = gl_list_create_empty (GL_LINKED_LIST,
+ NULL, NULL, free_field, false);
+ while (*end != '\0') {
+ field_val = strtol (start, &end, 10);
-/* Convert a number in a given line of text.
- NEWLINE specifies whether to output a '\n' for this "line". */
-static int
-process_line (char *line, bool newline)
-{
- char *pre, *num, *suf;
- long double val = 0;
- size_t precision = 0;
- int valid_number = 0;
+ if (start == end || field_val <=0)
+ error (EXIT_FAILURE, 0, _("invalid field value %s"),
+ quote (start));
- extract_fields (line, field, &pre, &num, &suf);
- if (!num)
- if (inval_style != inval_ignore)
- error (conv_exit_code, 0, _("input line is too short, "
- "no numbers found to convert in field %ld"),
- field);
+ if (! range_val)
+ {
+ /* field N */
+ rp = xmalloc (sizeof (*rp));
+ rp->lo = rp->hi = field_val;
+ gl_sortedlist_add (field_list, sort_field, rp);
+ }
+ else
+ {
+ /* range N-M
+ The last field was the start of the field range. The current
+ field is the end of the field range. We already added the
+ start field, so increment and add all the fields through
+ range end. */
+ if (field_val < range_val)
+ error (EXIT_FAILURE, 0, _("invalid decreasing range"));
+ rp = xmalloc (sizeof (*rp));
+ rp->lo = range_val + 1;
+ rp->hi = field_val;
+ gl_sortedlist_add (field_list, sort_field, rp);
+
+ range_val = 0;
+ }
- if (num)
- {
- valid_number = process_suffixed_number (num, &val, &precision);
- if (valid_number)
- valid_number = prepare_padded_number (val, precision);
+ switch (*end) {
+ case ',':
+ /* discrete field separator */
+ ++end;
+ start = end;
+ break;
+
+ case '-':
+ /* field range separator */
+ ++end;
+ start = end;
+ range_val = field_val;
+ break;
}
+ }
- if (pre)
- fputs (pre, stdout);
+ if (range_val)
+ {
+ /* range N-
+ range_val was not reset indicating optarg
+ ended with a trailing '-' */
+ all_fields_after = range_val;
+ }
+}
- if (pre && num)
- fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
+/* Return a pointer to the beginning of the next field in line.
+ The line pointer is moved to the end of the next field. */
+static char*
+next_field (char **line)
+{
+ char *field_start = *line;
+ char *field_end = field_start;
- if (valid_number)
+ if (delimiter != DELIMITER_DEFAULT)
{
- print_padded_number ();
+ if (*field_start != delimiter)
+ {
+ while (*field_end && *field_end != delimiter)
+ ++field_end;
+ }
+ /* else empty field */
}
else
{
- if (num)
- fputs (num, stdout);
+ /* keep any space prefix in the returned field */
+ while (*field_end && isblank (to_uchar (*field_end)))
+ ++field_end;
+
+ while (*field_end && !isblank (to_uchar (*field_end)))
+ ++field_end;
}
- if (suf)
+ *line = field_end;
+ return field_start;
+}
+
+static bool
+include_field (size_t field)
+{
+ if (all_fields)
+ return true;
+
+ if (all_fields_after && all_fields_after <= field)
+ return true;
+
+ if (all_fields_before && field <= all_fields_before)
+ return true;
+
+ /* default to field 1 */
+ if (! field_list)
+ return field == 1;
+
+ return gl_sortedlist_search (field_list, match_field, &field);
+}
+
+/* Convert and output the given field. If it is not included in the set
+ of fields to process just output the original */
+static bool
+process_field (char *text, size_t field)
+{
+ long double val = 0;
+ size_t precision = 0;
+ bool valid_number = true;
+
+ if (include_field (field))
{
- fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
- fputs (suf, stdout);
+ valid_number =
+ process_suffixed_number (text, &val, &precision, field);
+
+ if (valid_number)
+ valid_number = prepare_padded_number (val, precision);
+
+ if (valid_number)
+ print_padded_number ();
+ else
+ fputs (text, stdout);
}
+ else
+ fputs (text, stdout);
+
+ return valid_number;
+}
+
+/* Convert number in a given line of text.
+ NEWLINE specifies whether to output a '\n' for this "line". */
+static int
+process_line (char *line, bool newline)
+{
+ char *next;
+ size_t field = 0;
+ bool valid_number = true;
+
+ while (true) {
+ ++field;
+ next = next_field (&line);
+
+ if (*line != '\0')
+ {
+ /* nul terminate the current field string and process */
+ *line = '\0';
+
+ if (! process_field (next, field))
+ valid_number = false;
+
+ fputc ((delimiter == DELIMITER_DEFAULT) ?
+ ' ' : delimiter, stdout);
+ ++line;
+ }
+ else
+ {
+ /* end of the line, process the last field and finish */
+ if (! process_field (next, field))
+ valid_number = false;
+
+ break;
+ }
+ }
if (newline)
putchar ('\n');
@@ -1441,10 +1572,12 @@ main (int argc, char **argv)
break;
case FIELD_OPTION:
- if (xstrtol (optarg, NULL, 10, &field, "") != LONGINT_OK
- || field <= 0)
- error (EXIT_FAILURE, 0, _("invalid field value %s"),
- quote (optarg));
+ if (all_fields || all_fields_before || all_fields_after || field_list)
+ {
+ error (EXIT_FAILURE, 0,
+ _("multiple field specifications"));
+ }
+ parse_field_arg (optarg);
break;
case 'd':
@@ -1556,10 +1689,14 @@ main (int argc, char **argv)
error (0, errno, _("error reading input"));
}
+#ifdef lint
free (padding_buffer);
free (format_str_prefix);
free (format_str_suffix);
+ if (field_list)
+ gl_list_free (field_list);
+#endif
if (debug && !valid_numbers)
error (0, 0, _("failed to convert some of the input numbers"));
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index e8640c0f7..630d18707 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -194,21 +194,16 @@ my @Tests =
['delim-3', '--delimiter=" " --from=auto "40M Foo"',{OUT=>'40000000 Foo'}],
['delim-4', '--delimiter=: --from=auto 40M:60M', {OUT=>'40000000:60M'}],
['delim-5', '-d: --field=2 --from=auto :40M:60M', {OUT=>':40000000:60M'}],
- ['delim-6', '--delimiter=: --field 3 --from=auto 40M:60M',
- {EXIT=>2},
- {ERR=>"$prog: input line is too short, no numbers found " .
- "to convert in field 3\n"}],
+ ['delim-6', '-d: --field 3 --from=auto 40M:60M', {OUT=>"40M:60M"}],
#Fields
['field-1', '--field A',
{ERR => "$prog: invalid field value 'A'\n"},
{EXIT => '1'}],
- ['field-1.1', '--field -5',
- {ERR => "$prog: invalid field value '-5'\n"},
- {EXIT => '1'}],
['field-2', '--field 2 --from=auto "Hello 40M World 90G"',
{OUT=>'Hello 40000000 World 90G'}],
['field-3', '--field 3 --from=auto "Hello 40M World 90G"',
+ {OUT=>"Hello 40M "},
{ERR=>"$prog: invalid number: 'World'\n"},
{EXIT => 2},],
# Last field - no text after number
@@ -223,10 +218,32 @@ my @Tests =
{OUT=>"Hello:40000000:World:90G"}],
# not enough fields
- ['field-8', '--field 3 --to=si "Hello World"',
- {EXIT=>2},
- {ERR=>"$prog: input line is too short, no numbers found " .
- "to convert in field 3\n"}],
+ ['field-8', '--field 3 --to=si "Hello World"', {OUT=>"Hello World"}],
+
+ # Multiple fields
+ ['field-range-1', '--field 2,4 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1000 2.0K 3000 4.0K 5000"}],
+
+ ['field-range-2', '--field 2-4 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1000 2.0K 3.0K 4.0K 5000"}],
+
+ ['field-range-3', '--field 1,2,3-5 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
+
+ ['field-range-4', '--field 1-5 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
+
+ ['field-range-5', '--field 1-3,5 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3.0K 4000 5.0K"}],
+
+ ['field-range-6', '--field 3- --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1000 2000 3.0K 4.0K 5.0K"}],
+
+ ['field-range-7', '--field -3 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3.0K 4000 5000"}],
+
+ ['all-fields-1', '--field=- --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
# Auto-consume white-space, setup auto-padding
['whitespace-1', '--to=si --field 2 "A 500 B"', {OUT=>"A 500 B"}],
@@ -679,9 +696,6 @@ my @Tests =
['devdebug-11', '---debug --format "%\'-10f" 10000',{OUT=>"10000 "},
{ERR=>""},
{ERR_SUBST=>"s/.*//msg"}],
- ['devdebug-12', '---debug --field 2 A',{OUT=>""},
- {ERR=>""}, {EXIT=>2},
- {ERR_SUBST=>"s/.*//msg"}],
# Invalid parameters
['help-1', '--foobar',
@@ -787,11 +801,6 @@ my @Tests =
{ERR => "$prog: invalid number: 'World'\n"},
{OUT => "Hello 40M World 90G\n"},
{EXIT => 2}],
- ['ign-err-6', '--invalid=fail --field 3 --to=si "Hello World"',
- {ERR => "$prog: input line is too short, no numbers found " .
- "to convert in field 3\n"},
- {OUT => "Hello World\n"},
- {EXIT => 2}],
['ign-err-7', '--invalid=fail --from=si "foo"',
{ERR => "$prog: invalid number: 'foo'\n"},
{OUT => "foo\n"},
@@ -855,13 +864,6 @@ my @Tests =
{OUT => "A 1000 x\nB Foo y\nC 2.8G z\n"},
{ERR => "$prog: invalid number: 'Foo'\n"},
{EXIT => 2}],
- # one of the lines is too short
- ['ign-err-m3.2', '--invalid=fail --field 2 --from=si --to=iec',
- {IN_PIPE => "A 1K x\nB\nC 3G z\n"},
- {OUT => "A 1000 x\nB\nC 2.8G z\n"},
- {ERR => "$prog: input line is too short, no numbers found " .
- "to convert in field 2\n"},
- {EXIT => 2}],
);
my @Locale_Tests =