From 71063bc858cd927e3622b511297e66b3e13f7453 Mon Sep 17 00:00:00 2001 From: Dylan Cali Date: Fri, 5 Sep 2014 04:42:02 -0500 Subject: numfmt: implement support for field ranges * src/numfmt.c: Replace field handling code with logic that understands field range specifiers. Instead of processing a single field and printing line prefix/suffix around it, process each field in the line checking whether it has been included for conversion. If so convert and print, otherwise just print the unaltered field. (extract_fields): Removed. (skip_fields): Removed. (process_line): Gutted and heavily reworked. (process_suffixed_number): FIELD is now passed as an arg instead of using a global. (parse_field_arg): New function that parses field range specifiers. (next_field): New function that returns pointers to the next field in a line. (process_field): New function that wraps the field conversion logic (include_field): New function that checks whether a field should be converted (compare_field): New function used for field value comparisons in a gl_list. (free_field): New function used for freeing field values in a gl_list. Global variable FIELD removed. New global variable all_fields indicates whether all fields should be processed. New global variable all_fields_after stores the first field of a N- style range. New global variable all_fields_before stores the last field of a -M style range. New global variable field_list stores explicitly specified fields to process (N N,M or N-M style specifiers). (usage): Document newly supported field range specifiers. * bootstrap.conf: Include xlist and linked-list modules. numfmt now uses the gl_linked_list implementation to store the field ranges. * tests/misc/numfmt.pl: Add tests for 'cut style' field ranges. Adjust existing tests as partial output can occur before an error Remove test for the 'invalid' field -5.. this is now a valid range. * gnulib: update to avoid compiler warnings in linked-list. * NEWS: Mention the new feature. --- NEWS | 2 + bootstrap.conf | 2 + doc/coreutils.texi | 14 +- gnulib | 2 +- src/numfmt.c | 355 +++++++++++++++++++++++++++++++++++---------------- tests/misc/numfmt.pl | 54 ++++---- 6 files changed, 291 insertions(+), 138 deletions(-) diff --git a/NEWS b/NEWS index 9d69da330..9c551d514 100644 --- a/NEWS +++ b/NEWS @@ -70,6 +70,8 @@ GNU coreutils NEWS -*- outline -*- dd accepts a new status=progress level to print data transfer statistics on stderr approximately every second. + numfmt can now process multiple fields using field ranges similar to cut. + split accepts a new --separator option to select a record separator character other than the default newline character. diff --git a/bootstrap.conf b/bootstrap.conf index 320e7f581..5b6ec58e5 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -34,6 +34,7 @@ gnulib_modules=" argv-iter assert autobuild + linked-list backupfile base64 buffer-lcm @@ -270,6 +271,7 @@ gnulib_modules=" xgetcwd xgetgroups xgethostname + xlist xmemcoll xnanosleep xprintf diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 08316c928..9197cb426 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -16892,9 +16892,19 @@ Print (to standard error) warning messages about possible erroneous usage. Use the character @var{d} as input field separator (default: whitespace). @emph{Note}: Using non-default delimiter turns off automatic padding. -@item --field=@var{n} +@item --field=@var{fields} @opindex --field -Convert the number in input field @var{n} (default: 1). +Convert the number in input field @var{fields} (default: 1). +@var{fields} supports @command{cut} style field ranges: + +@example +N N'th field, counted from 1 +N- from N'th field, to end of line +N-M from N'th to M'th field (inclusive) +-M from first to M'th field (inclusive) +- all fields +@end example + @item --format=@var{format} @opindex --format diff --git a/gnulib b/gnulib index 9a417cf7d..d0302f003 160000 --- a/gnulib +++ b/gnulib @@ -1 +1 @@ -Subproject commit 9a417cf7d48fa231c937c53626da6c45d09e6b3e +Subproject commit d0302f003873b8c633d2023ab98aa6c4045b32e8 diff --git a/src/numfmt.c b/src/numfmt.c index c03329f04..18243dd9f 100644 --- a/src/numfmt.c +++ b/src/numfmt.c @@ -29,6 +29,8 @@ #include "system.h" #include "xstrtol.h" #include "xstrndup.h" +#include "gl_linked_list.h" +#include "gl_xlist.h" /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "numfmt" @@ -182,7 +184,10 @@ static int conv_exit_code = EXIT_CONVERSION_WARNINGS; /* auto-pad each line based on skipped whitespace. */ static int auto_padding = 0; static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT; -static long int field = 1; +static bool all_fields = false; +static size_t all_fields_after = 0; +static size_t all_fields_before = 0; +static gl_list_t field_list; static int delimiter = DELIMITER_DEFAULT; /* if non-zero, the first 'header' lines from STDIN are skipped. */ @@ -854,7 +859,8 @@ Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\ -d, --delimiter=X use X instead of whitespace for field delimiter\n\ "), stdout); fputs (_("\ - --field=N replace the number in input field N (default is 1)\n\ + --field=FIELDS replace the numbers in these input fields (default=1)\n\ + see FIELDS below\n\ "), stdout); fputs (_("\ --format=FORMAT use printf style floating-point FORMAT;\n\ @@ -932,6 +938,16 @@ UNIT options:\n"), stdout); 1Mi = 1048576,\n\ ...\n"), stdout); + fputs (_("\n\ +FIELDS supports cut(1) style field ranges:\n\ + N N'th field, counted from 1\n\ + N- from N'th field, to end of line\n\ + N-M from N'th to M'th field (inclusive)\n\ + -M from first to M'th field (inclusive)\n\ + - all fields\n\ +Multiple fields/ranges can be separated with commas\n\ +"), stdout); + fputs (_("\n\ FORMAT must be suitable for printing one floating-point argument '%f'.\n\ Optional quote (%'f) will enable --grouping (if supported by current locale).\n\ @@ -960,7 +976,7 @@ Examples:\n\ -> \"1000\"\n\ $ echo 1K | %s --from=iec\n\ -> \"1024\"\n\ - $ df -B1 | %s --header --field 2 --to=si\n\ + $ df -B1 | %s --header --field 2-4 --to=si\n\ $ ls -l | %s --header --field 5 --to=iec\n\ $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\ $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"), @@ -1182,7 +1198,8 @@ print_padded_number (void) /* Converts the TEXT number string to the requested representation, and handles automatic suffix addition. */ static int -process_suffixed_number (char *text, long double *result, size_t *precision) +process_suffixed_number (char *text, long double *result, + size_t *precision, long int field) { if (suffix && strlen (text) > strlen (suffix)) { @@ -1233,139 +1250,253 @@ process_suffixed_number (char *text, long double *result, size_t *precision) return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS); } -/* Skip the requested number of fields in the input string. - Returns a pointer to the *delimiter* of the requested field, - or a pointer to NUL (if reached the end of the string). */ -static inline char * _GL_ATTRIBUTE_PURE -skip_fields (char *buf, int fields) +typedef struct range_pair { - char *ptr = buf; - if (delimiter != DELIMITER_DEFAULT) - { - if (*ptr == delimiter) - fields--; - while (*ptr && fields--) - { - while (*ptr && *ptr == delimiter) - ++ptr; - while (*ptr && *ptr != delimiter) - ++ptr; - } - } - else - while (*ptr && fields--) - { - while (*ptr && isblank (to_uchar (*ptr))) - ++ptr; - while (*ptr && !isblank (to_uchar (*ptr))) - ++ptr; - } - return ptr; + size_t lo; + size_t hi; +} range_pair_t; + +static int +sort_field (const void *elt1, const void *elt2) +{ + range_pair_t* rp1 = (range_pair_t*) elt1; + range_pair_t* rp2 = (range_pair_t*) elt2; + + if (rp1->lo < rp2->lo) + return -1; + + return rp1->lo > rp2->lo; } -/* Parse a delimited string, and extracts the requested field. - NOTE: the input buffer is modified. +static int +match_field (const void *elt1, const void *elt2) +{ + range_pair_t* rp = (range_pair_t*) elt1; + size_t field = *(size_t*) elt2; - TODO: - Maybe support multiple fields, though can always pipe output - into another numfmt to process other fields. - Maybe default to processing all fields rather than just first? + if (rp->lo <= field && field <= rp->hi) + return 0; + + if (rp->lo < field) + return -1; + + return 1; +} - Output: - _PREFIX, _DATA, _SUFFIX will point to the relevant positions - in the input string, or be NULL if such a part doesn't exist. */ static void -extract_fields (char *line, int _field, - char ** _prefix, char ** _data, char ** _suffix) +free_field (const void *elt) { - char *ptr = line; - *_prefix = NULL; - *_data = NULL; - *_suffix = NULL; + void *p = (void *)elt; + free (p); +} - devmsg ("extracting Fields:\n input: %s\n field: %d\n", - quote (line), _field); +/* Add the specified fields to field_list. + The format recognized is similar to cut. + TODO: Refactor the more performant cut implementation + for use by both utilities. */ +static void +parse_field_arg (char *optarg) +{ - if (field > 1) + char *start, *end; + range_pair_t *rp; + size_t field_val; + size_t range_val = 0; + + start = end = optarg; + + if (STREQ (optarg, "-")) { - /* skip the requested number of fields. */ - *_prefix = line; - ptr = skip_fields (line, field - 1); - if (*ptr == '\0') - { - /* not enough fields in the input - print warning? */ - devmsg (" TOO FEW FIELDS!\n prefix: %s\n", quote (*_prefix)); - return; - } + all_fields = true; - *ptr = '\0'; - ++ptr; + return; } - *_data = ptr; - *_suffix = skip_fields (*_data, 1); - if (**_suffix) + if (*start == '-') { - /* there is a suffix (i.e., the field is not the last on the line), - so null-terminate the _data before it. */ - **_suffix = '\0'; - ++(*_suffix); + /* range -M */ + ++start; + + all_fields_before = strtol (start, &end, 10); + + if (start == end || all_fields_before <=0) + error (EXIT_FAILURE, 0, _("invalid field value %s"), + quote (start)); + + return; } - else - *_suffix = NULL; - devmsg (" prefix: %s\n number: %s\n suffix: %s\n", - quote_n (0, *_prefix ? *_prefix : ""), - quote_n (1, *_data), - quote_n (2, *_suffix ? *_suffix : "")); -} + field_list = gl_list_create_empty (GL_LINKED_LIST, + NULL, NULL, free_field, false); + while (*end != '\0') { + field_val = strtol (start, &end, 10); -/* Convert a number in a given line of text. - NEWLINE specifies whether to output a '\n' for this "line". */ -static int -process_line (char *line, bool newline) -{ - char *pre, *num, *suf; - long double val = 0; - size_t precision = 0; - int valid_number = 0; + if (start == end || field_val <=0) + error (EXIT_FAILURE, 0, _("invalid field value %s"), + quote (start)); - extract_fields (line, field, &pre, &num, &suf); - if (!num) - if (inval_style != inval_ignore) - error (conv_exit_code, 0, _("input line is too short, " - "no numbers found to convert in field %ld"), - field); + if (! range_val) + { + /* field N */ + rp = xmalloc (sizeof (*rp)); + rp->lo = rp->hi = field_val; + gl_sortedlist_add (field_list, sort_field, rp); + } + else + { + /* range N-M + The last field was the start of the field range. The current + field is the end of the field range. We already added the + start field, so increment and add all the fields through + range end. */ + if (field_val < range_val) + error (EXIT_FAILURE, 0, _("invalid decreasing range")); + rp = xmalloc (sizeof (*rp)); + rp->lo = range_val + 1; + rp->hi = field_val; + gl_sortedlist_add (field_list, sort_field, rp); + + range_val = 0; + } - if (num) - { - valid_number = process_suffixed_number (num, &val, &precision); - if (valid_number) - valid_number = prepare_padded_number (val, precision); + switch (*end) { + case ',': + /* discrete field separator */ + ++end; + start = end; + break; + + case '-': + /* field range separator */ + ++end; + start = end; + range_val = field_val; + break; } + } - if (pre) - fputs (pre, stdout); + if (range_val) + { + /* range N- + range_val was not reset indicating optarg + ended with a trailing '-' */ + all_fields_after = range_val; + } +} - if (pre && num) - fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout); +/* Return a pointer to the beginning of the next field in line. + The line pointer is moved to the end of the next field. */ +static char* +next_field (char **line) +{ + char *field_start = *line; + char *field_end = field_start; - if (valid_number) + if (delimiter != DELIMITER_DEFAULT) { - print_padded_number (); + if (*field_start != delimiter) + { + while (*field_end && *field_end != delimiter) + ++field_end; + } + /* else empty field */ } else { - if (num) - fputs (num, stdout); + /* keep any space prefix in the returned field */ + while (*field_end && isblank (to_uchar (*field_end))) + ++field_end; + + while (*field_end && !isblank (to_uchar (*field_end))) + ++field_end; } - if (suf) + *line = field_end; + return field_start; +} + +static bool +include_field (size_t field) +{ + if (all_fields) + return true; + + if (all_fields_after && all_fields_after <= field) + return true; + + if (all_fields_before && field <= all_fields_before) + return true; + + /* default to field 1 */ + if (! field_list) + return field == 1; + + return gl_sortedlist_search (field_list, match_field, &field); +} + +/* Convert and output the given field. If it is not included in the set + of fields to process just output the original */ +static bool +process_field (char *text, size_t field) +{ + long double val = 0; + size_t precision = 0; + bool valid_number = true; + + if (include_field (field)) { - fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout); - fputs (suf, stdout); + valid_number = + process_suffixed_number (text, &val, &precision, field); + + if (valid_number) + valid_number = prepare_padded_number (val, precision); + + if (valid_number) + print_padded_number (); + else + fputs (text, stdout); } + else + fputs (text, stdout); + + return valid_number; +} + +/* Convert number in a given line of text. + NEWLINE specifies whether to output a '\n' for this "line". */ +static int +process_line (char *line, bool newline) +{ + char *next; + size_t field = 0; + bool valid_number = true; + + while (true) { + ++field; + next = next_field (&line); + + if (*line != '\0') + { + /* nul terminate the current field string and process */ + *line = '\0'; + + if (! process_field (next, field)) + valid_number = false; + + fputc ((delimiter == DELIMITER_DEFAULT) ? + ' ' : delimiter, stdout); + ++line; + } + else + { + /* end of the line, process the last field and finish */ + if (! process_field (next, field)) + valid_number = false; + + break; + } + } if (newline) putchar ('\n'); @@ -1441,10 +1572,12 @@ main (int argc, char **argv) break; case FIELD_OPTION: - if (xstrtol (optarg, NULL, 10, &field, "") != LONGINT_OK - || field <= 0) - error (EXIT_FAILURE, 0, _("invalid field value %s"), - quote (optarg)); + if (all_fields || all_fields_before || all_fields_after || field_list) + { + error (EXIT_FAILURE, 0, + _("multiple field specifications")); + } + parse_field_arg (optarg); break; case 'd': @@ -1556,10 +1689,14 @@ main (int argc, char **argv) error (0, errno, _("error reading input")); } +#ifdef lint free (padding_buffer); free (format_str_prefix); free (format_str_suffix); + if (field_list) + gl_list_free (field_list); +#endif if (debug && !valid_numbers) error (0, 0, _("failed to convert some of the input numbers")); diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl index e8640c0f7..630d18707 100755 --- a/tests/misc/numfmt.pl +++ b/tests/misc/numfmt.pl @@ -194,21 +194,16 @@ my @Tests = ['delim-3', '--delimiter=" " --from=auto "40M Foo"',{OUT=>'40000000 Foo'}], ['delim-4', '--delimiter=: --from=auto 40M:60M', {OUT=>'40000000:60M'}], ['delim-5', '-d: --field=2 --from=auto :40M:60M', {OUT=>':40000000:60M'}], - ['delim-6', '--delimiter=: --field 3 --from=auto 40M:60M', - {EXIT=>2}, - {ERR=>"$prog: input line is too short, no numbers found " . - "to convert in field 3\n"}], + ['delim-6', '-d: --field 3 --from=auto 40M:60M', {OUT=>"40M:60M"}], #Fields ['field-1', '--field A', {ERR => "$prog: invalid field value 'A'\n"}, {EXIT => '1'}], - ['field-1.1', '--field -5', - {ERR => "$prog: invalid field value '-5'\n"}, - {EXIT => '1'}], ['field-2', '--field 2 --from=auto "Hello 40M World 90G"', {OUT=>'Hello 40000000 World 90G'}], ['field-3', '--field 3 --from=auto "Hello 40M World 90G"', + {OUT=>"Hello 40M "}, {ERR=>"$prog: invalid number: 'World'\n"}, {EXIT => 2},], # Last field - no text after number @@ -223,10 +218,32 @@ my @Tests = {OUT=>"Hello:40000000:World:90G"}], # not enough fields - ['field-8', '--field 3 --to=si "Hello World"', - {EXIT=>2}, - {ERR=>"$prog: input line is too short, no numbers found " . - "to convert in field 3\n"}], + ['field-8', '--field 3 --to=si "Hello World"', {OUT=>"Hello World"}], + + # Multiple fields + ['field-range-1', '--field 2,4 --to=si "1000 2000 3000 4000 5000"', + {OUT=>"1000 2.0K 3000 4.0K 5000"}], + + ['field-range-2', '--field 2-4 --to=si "1000 2000 3000 4000 5000"', + {OUT=>"1000 2.0K 3.0K 4.0K 5000"}], + + ['field-range-3', '--field 1,2,3-5 --to=si "1000 2000 3000 4000 5000"', + {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}], + + ['field-range-4', '--field 1-5 --to=si "1000 2000 3000 4000 5000"', + {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}], + + ['field-range-5', '--field 1-3,5 --to=si "1000 2000 3000 4000 5000"', + {OUT=>"1.0K 2.0K 3.0K 4000 5.0K"}], + + ['field-range-6', '--field 3- --to=si "1000 2000 3000 4000 5000"', + {OUT=>"1000 2000 3.0K 4.0K 5.0K"}], + + ['field-range-7', '--field -3 --to=si "1000 2000 3000 4000 5000"', + {OUT=>"1.0K 2.0K 3.0K 4000 5000"}], + + ['all-fields-1', '--field=- --to=si "1000 2000 3000 4000 5000"', + {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}], # Auto-consume white-space, setup auto-padding ['whitespace-1', '--to=si --field 2 "A 500 B"', {OUT=>"A 500 B"}], @@ -679,9 +696,6 @@ my @Tests = ['devdebug-11', '---debug --format "%\'-10f" 10000',{OUT=>"10000 "}, {ERR=>""}, {ERR_SUBST=>"s/.*//msg"}], - ['devdebug-12', '---debug --field 2 A',{OUT=>""}, - {ERR=>""}, {EXIT=>2}, - {ERR_SUBST=>"s/.*//msg"}], # Invalid parameters ['help-1', '--foobar', @@ -787,11 +801,6 @@ my @Tests = {ERR => "$prog: invalid number: 'World'\n"}, {OUT => "Hello 40M World 90G\n"}, {EXIT => 2}], - ['ign-err-6', '--invalid=fail --field 3 --to=si "Hello World"', - {ERR => "$prog: input line is too short, no numbers found " . - "to convert in field 3\n"}, - {OUT => "Hello World\n"}, - {EXIT => 2}], ['ign-err-7', '--invalid=fail --from=si "foo"', {ERR => "$prog: invalid number: 'foo'\n"}, {OUT => "foo\n"}, @@ -855,13 +864,6 @@ my @Tests = {OUT => "A 1000 x\nB Foo y\nC 2.8G z\n"}, {ERR => "$prog: invalid number: 'Foo'\n"}, {EXIT => 2}], - # one of the lines is too short - ['ign-err-m3.2', '--invalid=fail --field 2 --from=si --to=iec', - {IN_PIPE => "A 1K x\nB\nC 3G z\n"}, - {OUT => "A 1000 x\nB\nC 2.8G z\n"}, - {ERR => "$prog: input line is too short, no numbers found " . - "to convert in field 2\n"}, - {EXIT => 2}], ); my @Locale_Tests = -- cgit v1.2.3-70-g09d2