diff options
author | Assaf Gordon <assafgordon@gmail.com> | 2015-07-17 23:30:30 -0400 |
---|---|---|
committer | Assaf Gordon <assafgordon@gmail.com> | 2015-09-12 02:27:32 +0000 |
commit | bd1bc2b3fe1c87744cc251334cc1c804075b027c (patch) | |
tree | 5baeb6bacf915762980ff3e87cd4f203814ffc28 /src/cut.c | |
parent | 5bbdcd2a02d9686edb788c46fc953192acdc6796 (diff) | |
download | coreutils-bd1bc2b3fe1c87744cc251334cc1c804075b027c.tar.xz |
cut: refactor into set-fields module
Extract the functionality of parsing --field=LIST into a separate
module, to be used by other programs.
* src/cut.c: move field parsing code from here ...
* src/set-fields.{c,h}: ... to here.
(set_fields): generalize by supporting multiple parsing/reporting
options.
(struct range_pair): rename to field_range_pair.
* src/local.mk: link cut with set-field.
* po/POTFILES.in: add set-field.c
* tests/misc/cut.pl: update wording of error messages
Diffstat (limited to 'src/cut.c')
-rw-r--r-- | src/cut.c | 257 |
1 files changed, 12 insertions, 245 deletions
@@ -34,9 +34,10 @@ #include "fadvise.h" #include "getndelim2.h" #include "hash.h" -#include "quote.h" #include "xstrndup.h" +#include "set-fields.h" + /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "cut" @@ -54,41 +55,11 @@ while (0) -struct range_pair - { - size_t lo; - size_t hi; - }; - -/* Array of `struct range_pair' holding all the finite ranges. */ -static struct range_pair *rp; - /* Pointer inside RP. When checking if a byte or field is selected by a finite range, we check if it is between CURRENT_RP.LO and CURRENT_RP.HI. If the byte or field index is greater than CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ -static struct range_pair *current_rp; - -/* Number of finite ranges specified by the user. */ -static size_t n_rp; - -/* Number of `struct range_pair's allocated. */ -static size_t n_rp_allocated; - - -/* Append LOW, HIGH to the list RP of range pairs, allocating additional - space if necessary. Update global variable N_RP. When allocating, - update global variable N_RP_ALLOCATED. */ - -static void -add_range_pair (size_t lo, size_t hi) -{ - if (n_rp == n_rp_allocated) - rp = X2NREALLOC (rp, &n_rp_allocated); - rp[n_rp].lo = lo; - rp[n_rp].hi = hi; - ++n_rp; -} +static struct field_range_pair *current_rp; /* This buffer is used to support the semantics of the -s option (or lack of same) when the specified field list includes (does @@ -221,208 +192,6 @@ Each range is one of:\n\ exit (status); } -/* Comparison function for qsort to order the list of - struct range_pairs. */ -static int -compare_ranges (const void *a, const void *b) -{ - int a_start = ((const struct range_pair *) a)->lo; - int b_start = ((const struct range_pair *) b)->lo; - return a_start < b_start ? -1 : a_start > b_start; -} - -/* Reallocate Range Pair entries, with corresponding - entries outside the range of each specified entry. */ - -static void -complement_rp (void) -{ - if (complement) - { - struct range_pair *c = rp; - size_t n = n_rp; - size_t i; - - rp = NULL; - n_rp = 0; - n_rp_allocated = 0; - - if (c[0].lo > 1) - add_range_pair (1, c[0].lo - 1); - - for (i = 1; i < n; ++i) - { - if (c[i-1].hi + 1 == c[i].lo) - continue; - - add_range_pair (c[i-1].hi + 1, c[i].lo - 1); - } - - if (c[n-1].hi < SIZE_MAX) - add_range_pair (c[n-1].hi + 1, SIZE_MAX); - - free (c); - } -} - -/* Given the list of field or byte range specifications FIELDSTR, - allocate and initialize the RP array. FIELDSTR should - be composed of one or more numbers or ranges of numbers, separated - by blanks or commas. Incomplete ranges may be given: '-m' means '1-m'; - 'n-' means 'n' through end of line. - Return true if FIELDSTR contains at least one field specification, - false otherwise. */ - -static bool -set_fields (const char *fieldstr) -{ - size_t initial = 1; /* Value of first number in a range. */ - size_t value = 0; /* If nonzero, a number being accumulated. */ - bool lhs_specified = false; - bool rhs_specified = false; - bool dash_found = false; /* True if a '-' is found in this field. */ - bool field_found = false; /* True if at least one field spec - has been processed. */ - - size_t i; - bool in_digits = false; - - /* Collect and store in RP the range end points. */ - - while (true) - { - if (*fieldstr == '-') - { - in_digits = false; - /* Starting a range. */ - if (dash_found) - FATAL_ERROR (_("invalid byte, character or field list")); - dash_found = true; - fieldstr++; - - if (lhs_specified && !value) - FATAL_ERROR (_("fields and positions are numbered from 1")); - - initial = (lhs_specified ? value : 1); - value = 0; - } - else if (*fieldstr == ',' - || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0') - { - in_digits = false; - /* Ending the string, or this field/byte sublist. */ - if (dash_found) - { - dash_found = false; - - if (!lhs_specified && !rhs_specified) - FATAL_ERROR (_("invalid range with no endpoint: -")); - - /* A range. Possibilities: -n, m-n, n-. - In any case, 'initial' contains the start of the range. */ - if (!rhs_specified) - { - /* 'n-'. From 'initial' to end of line. */ - add_range_pair (initial, SIZE_MAX); - field_found = true; - } - else - { - /* 'm-n' or '-n' (1-n). */ - if (value < initial) - FATAL_ERROR (_("invalid decreasing range")); - - add_range_pair (initial, value); - field_found = true; - } - value = 0; - } - else - { - /* A simple field number, not a range. */ - if (value == 0) - FATAL_ERROR (_("fields and positions are numbered from 1")); - add_range_pair (value, value); - value = 0; - field_found = true; - } - - if (*fieldstr == '\0') - break; - - fieldstr++; - lhs_specified = false; - rhs_specified = false; - } - else if (ISDIGIT (*fieldstr)) - { - /* Record beginning of digit string, in case we have to - complain about it. */ - static char const *num_start; - if (!in_digits || !num_start) - num_start = fieldstr; - in_digits = true; - - if (dash_found) - rhs_specified = 1; - else - lhs_specified = 1; - - /* Detect overflow. */ - if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t) - || value == SIZE_MAX) - { - /* In case the user specified -c$(echo 2^64|bc),22, - complain only about the first number. */ - /* Determine the length of the offending number. */ - size_t len = strspn (num_start, "0123456789"); - char *bad_num = xstrndup (num_start, len); - if (operating_mode == byte_mode) - error (0, 0, - _("byte offset %s is too large"), quote (bad_num)); - else - error (0, 0, - _("field number %s is too large"), quote (bad_num)); - free (bad_num); - exit (EXIT_FAILURE); - } - - fieldstr++; - } - else - FATAL_ERROR (_("invalid byte, character or field list")); - } - - qsort (rp, n_rp, sizeof (rp[0]), compare_ranges); - - /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */ - for (i = 0; i < n_rp; ++i) - { - for (size_t j = i + 1; j < n_rp; ++j) - { - if (rp[j].lo <= rp[i].hi) - { - rp[i].hi = MAX (rp[j].hi, rp[i].hi); - memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp); - n_rp--; - j--; - } - else - break; - } - } - - complement_rp (); - - /* After merging, reallocate RP so we release memory to the system. - Also add a sentinel at the end of RP, to avoid out of bounds access - and for performance reasons. */ - ++n_rp; - rp = xrealloc (rp, n_rp * sizeof (struct range_pair)); - rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX; - - return field_found; -} /* Increment *ITEM_IDX (i.e., a field or byte index), and if required CURRENT_RP. */ @@ -463,7 +232,7 @@ cut_bytes (FILE *stream) byte_idx = 0; print_delimiter = false; - current_rp = rp; + current_rp = frp; while (true) { int c; /* Each character from the file. */ @@ -475,7 +244,7 @@ cut_bytes (FILE *stream) putchar ('\n'); byte_idx = 0; print_delimiter = false; - current_rp = rp; + current_rp = frp; } else if (c == EOF) { @@ -514,7 +283,7 @@ cut_fields (FILE *stream) bool found_any_selected_field = false; bool buffer_first_field; - current_rp = rp; + current_rp = frp; c = getc (stream); if (c == EOF) @@ -642,7 +411,7 @@ cut_fields (FILE *stream) if (c == EOF) break; field_idx = 1; - current_rp = rp; + current_rp = frp; found_any_selected_field = false; } } @@ -793,13 +562,9 @@ main (int argc, char **argv) FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ \tonly when operating on fields")); - if (! set_fields (spec_list_string)) - { - if (operating_mode == field_mode) - FATAL_ERROR (_("missing list of fields")); - else - FATAL_ERROR (_("missing list of positions")); - } + set_fields (spec_list_string, + ( (operating_mode == field_mode) ? 0 : SETFLD_ERRMSG_USE_POS) + | (complement ? SETFLD_COMPLEMENT : 0) ); if (!delim_specified) delim = '\t'; @@ -826,5 +591,7 @@ main (int argc, char **argv) ok = false; } + IF_LINT (reset_fields ()); + return ok ? EXIT_SUCCESS : EXIT_FAILURE; } |