summaryrefslogtreecommitdiff
path: root/src/cut.c
diff options
context:
space:
mode:
authorAssaf Gordon <assafgordon@gmail.com>2015-07-17 23:30:30 -0400
committerAssaf Gordon <assafgordon@gmail.com>2015-09-12 02:27:32 +0000
commitbd1bc2b3fe1c87744cc251334cc1c804075b027c (patch)
tree5baeb6bacf915762980ff3e87cd4f203814ffc28 /src/cut.c
parent5bbdcd2a02d9686edb788c46fc953192acdc6796 (diff)
downloadcoreutils-bd1bc2b3fe1c87744cc251334cc1c804075b027c.tar.xz
cut: refactor into set-fields module
Extract the functionality of parsing --field=LIST into a separate module, to be used by other programs. * src/cut.c: move field parsing code from here ... * src/set-fields.{c,h}: ... to here. (set_fields): generalize by supporting multiple parsing/reporting options. (struct range_pair): rename to field_range_pair. * src/local.mk: link cut with set-field. * po/POTFILES.in: add set-field.c * tests/misc/cut.pl: update wording of error messages
Diffstat (limited to 'src/cut.c')
-rw-r--r--src/cut.c257
1 files changed, 12 insertions, 245 deletions
diff --git a/src/cut.c b/src/cut.c
index 4f80ebd45..aac91ef5e 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -34,9 +34,10 @@
#include "fadvise.h"
#include "getndelim2.h"
#include "hash.h"
-#include "quote.h"
#include "xstrndup.h"
+#include "set-fields.h"
+
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "cut"
@@ -54,41 +55,11 @@
while (0)
-struct range_pair
- {
- size_t lo;
- size_t hi;
- };
-
-/* Array of `struct range_pair' holding all the finite ranges. */
-static struct range_pair *rp;
-
/* Pointer inside RP. When checking if a byte or field is selected
by a finite range, we check if it is between CURRENT_RP.LO
and CURRENT_RP.HI. If the byte or field index is greater than
CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
-static struct range_pair *current_rp;
-
-/* Number of finite ranges specified by the user. */
-static size_t n_rp;
-
-/* Number of `struct range_pair's allocated. */
-static size_t n_rp_allocated;
-
-
-/* Append LOW, HIGH to the list RP of range pairs, allocating additional
- space if necessary. Update global variable N_RP. When allocating,
- update global variable N_RP_ALLOCATED. */
-
-static void
-add_range_pair (size_t lo, size_t hi)
-{
- if (n_rp == n_rp_allocated)
- rp = X2NREALLOC (rp, &n_rp_allocated);
- rp[n_rp].lo = lo;
- rp[n_rp].hi = hi;
- ++n_rp;
-}
+static struct field_range_pair *current_rp;
/* This buffer is used to support the semantics of the -s option
(or lack of same) when the specified field list includes (does
@@ -221,208 +192,6 @@ Each range is one of:\n\
exit (status);
}
-/* Comparison function for qsort to order the list of
- struct range_pairs. */
-static int
-compare_ranges (const void *a, const void *b)
-{
- int a_start = ((const struct range_pair *) a)->lo;
- int b_start = ((const struct range_pair *) b)->lo;
- return a_start < b_start ? -1 : a_start > b_start;
-}
-
-/* Reallocate Range Pair entries, with corresponding
- entries outside the range of each specified entry. */
-
-static void
-complement_rp (void)
-{
- if (complement)
- {
- struct range_pair *c = rp;
- size_t n = n_rp;
- size_t i;
-
- rp = NULL;
- n_rp = 0;
- n_rp_allocated = 0;
-
- if (c[0].lo > 1)
- add_range_pair (1, c[0].lo - 1);
-
- for (i = 1; i < n; ++i)
- {
- if (c[i-1].hi + 1 == c[i].lo)
- continue;
-
- add_range_pair (c[i-1].hi + 1, c[i].lo - 1);
- }
-
- if (c[n-1].hi < SIZE_MAX)
- add_range_pair (c[n-1].hi + 1, SIZE_MAX);
-
- free (c);
- }
-}
-
-/* Given the list of field or byte range specifications FIELDSTR,
- allocate and initialize the RP array. FIELDSTR should
- be composed of one or more numbers or ranges of numbers, separated
- by blanks or commas. Incomplete ranges may be given: '-m' means '1-m';
- 'n-' means 'n' through end of line.
- Return true if FIELDSTR contains at least one field specification,
- false otherwise. */
-
-static bool
-set_fields (const char *fieldstr)
-{
- size_t initial = 1; /* Value of first number in a range. */
- size_t value = 0; /* If nonzero, a number being accumulated. */
- bool lhs_specified = false;
- bool rhs_specified = false;
- bool dash_found = false; /* True if a '-' is found in this field. */
- bool field_found = false; /* True if at least one field spec
- has been processed. */
-
- size_t i;
- bool in_digits = false;
-
- /* Collect and store in RP the range end points. */
-
- while (true)
- {
- if (*fieldstr == '-')
- {
- in_digits = false;
- /* Starting a range. */
- if (dash_found)
- FATAL_ERROR (_("invalid byte, character or field list"));
- dash_found = true;
- fieldstr++;
-
- if (lhs_specified && !value)
- FATAL_ERROR (_("fields and positions are numbered from 1"));
-
- initial = (lhs_specified ? value : 1);
- value = 0;
- }
- else if (*fieldstr == ','
- || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
- {
- in_digits = false;
- /* Ending the string, or this field/byte sublist. */
- if (dash_found)
- {
- dash_found = false;
-
- if (!lhs_specified && !rhs_specified)
- FATAL_ERROR (_("invalid range with no endpoint: -"));
-
- /* A range. Possibilities: -n, m-n, n-.
- In any case, 'initial' contains the start of the range. */
- if (!rhs_specified)
- {
- /* 'n-'. From 'initial' to end of line. */
- add_range_pair (initial, SIZE_MAX);
- field_found = true;
- }
- else
- {
- /* 'm-n' or '-n' (1-n). */
- if (value < initial)
- FATAL_ERROR (_("invalid decreasing range"));
-
- add_range_pair (initial, value);
- field_found = true;
- }
- value = 0;
- }
- else
- {
- /* A simple field number, not a range. */
- if (value == 0)
- FATAL_ERROR (_("fields and positions are numbered from 1"));
- add_range_pair (value, value);
- value = 0;
- field_found = true;
- }
-
- if (*fieldstr == '\0')
- break;
-
- fieldstr++;
- lhs_specified = false;
- rhs_specified = false;
- }
- else if (ISDIGIT (*fieldstr))
- {
- /* Record beginning of digit string, in case we have to
- complain about it. */
- static char const *num_start;
- if (!in_digits || !num_start)
- num_start = fieldstr;
- in_digits = true;
-
- if (dash_found)
- rhs_specified = 1;
- else
- lhs_specified = 1;
-
- /* Detect overflow. */
- if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
- || value == SIZE_MAX)
- {
- /* In case the user specified -c$(echo 2^64|bc),22,
- complain only about the first number. */
- /* Determine the length of the offending number. */
- size_t len = strspn (num_start, "0123456789");
- char *bad_num = xstrndup (num_start, len);
- if (operating_mode == byte_mode)
- error (0, 0,
- _("byte offset %s is too large"), quote (bad_num));
- else
- error (0, 0,
- _("field number %s is too large"), quote (bad_num));
- free (bad_num);
- exit (EXIT_FAILURE);
- }
-
- fieldstr++;
- }
- else
- FATAL_ERROR (_("invalid byte, character or field list"));
- }
-
- qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
-
- /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
- for (i = 0; i < n_rp; ++i)
- {
- for (size_t j = i + 1; j < n_rp; ++j)
- {
- if (rp[j].lo <= rp[i].hi)
- {
- rp[i].hi = MAX (rp[j].hi, rp[i].hi);
- memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp);
- n_rp--;
- j--;
- }
- else
- break;
- }
- }
-
- complement_rp ();
-
- /* After merging, reallocate RP so we release memory to the system.
- Also add a sentinel at the end of RP, to avoid out of bounds access
- and for performance reasons. */
- ++n_rp;
- rp = xrealloc (rp, n_rp * sizeof (struct range_pair));
- rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX;
-
- return field_found;
-}
/* Increment *ITEM_IDX (i.e., a field or byte index),
and if required CURRENT_RP. */
@@ -463,7 +232,7 @@ cut_bytes (FILE *stream)
byte_idx = 0;
print_delimiter = false;
- current_rp = rp;
+ current_rp = frp;
while (true)
{
int c; /* Each character from the file. */
@@ -475,7 +244,7 @@ cut_bytes (FILE *stream)
putchar ('\n');
byte_idx = 0;
print_delimiter = false;
- current_rp = rp;
+ current_rp = frp;
}
else if (c == EOF)
{
@@ -514,7 +283,7 @@ cut_fields (FILE *stream)
bool found_any_selected_field = false;
bool buffer_first_field;
- current_rp = rp;
+ current_rp = frp;
c = getc (stream);
if (c == EOF)
@@ -642,7 +411,7 @@ cut_fields (FILE *stream)
if (c == EOF)
break;
field_idx = 1;
- current_rp = rp;
+ current_rp = frp;
found_any_selected_field = false;
}
}
@@ -793,13 +562,9 @@ main (int argc, char **argv)
FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
\tonly when operating on fields"));
- if (! set_fields (spec_list_string))
- {
- if (operating_mode == field_mode)
- FATAL_ERROR (_("missing list of fields"));
- else
- FATAL_ERROR (_("missing list of positions"));
- }
+ set_fields (spec_list_string,
+ ( (operating_mode == field_mode) ? 0 : SETFLD_ERRMSG_USE_POS)
+ | (complement ? SETFLD_COMPLEMENT : 0) );
if (!delim_specified)
delim = '\t';
@@ -826,5 +591,7 @@ main (int argc, char **argv)
ok = false;
}
+ IF_LINT (reset_fields ());
+
return ok ? EXIT_SUCCESS : EXIT_FAILURE;
}