summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>2003-01-09 20:16:58 +0000
committerJim Meyering <jim@meyering.net>2003-01-09 20:16:58 +0000
commit322ee6e302ba71e69b4a7f7ef70b7e5e7392d428 (patch)
tree66ba3d3eb45a18a86564f5dcc89bc05ab2779894
parentba5be82b7eef33bcda397962de6b28193a912607 (diff)
downloadcoreutils-322ee6e302ba71e69b4a7f7ef70b7e5e7392d428.tar.xz
When selecting ranges of byte offsets (as opposed to ranges of fields)
and when --output-delimiter=STRING is specified, output STRING between ranges of selected bytes. (RANGE_START_SENTINEL): Define. (output_delimiter_specified): New global. (print_kth): Add parameter. Adjust all callers. (set_fields): Mark each range-start index with RANGE_START_SENTINEL. (cut_bytes): When requested, output STRING between ranges of selected bytes. (main): Make a diagnostic a little clearer. Based on a patch from Jan Nieuwenhuizen.
-rw-r--r--src/cut.c67
1 files changed, 55 insertions, 12 deletions
diff --git a/src/cut.c b/src/cut.c
index e0a1cb458..960917394 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -93,6 +93,10 @@ static unsigned int max_range_endpoint;
to end of line. */
static unsigned int eol_range_start;
+/* A nonzero, non-1 value with which to distinguish the index
+ corresponding to the lower bound of a range. */
+#define RANGE_START_SENTINEL 2
+
/* In byte mode, which bytes to output.
In field mode, which DELIM-separated fields to output.
Both bytes and fields are numbered starting with 1,
@@ -126,6 +130,9 @@ static int suppress_non_delimited;
/* The delimeter character for field mode. */
static int delim;
+/* Nonzero if the --output-delimiter=STRING option was specified. */
+static int output_delimiter_specified;
+
/* The length of output_delimiter_string. */
static size_t output_delimiter_length;
@@ -210,11 +217,29 @@ With no FILE, or when FILE is -, read standard input.\n\
exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
+/* Return nonzero if the K'th field or byte is printable.
+ When returning nonzero, if RANGE_START is non-NULL,
+ set *RANGE_START to nonzero if K is the beginning of a range, and
+ set *RANGE_START to zero if K is not the beginning of a range. */
+
static int
-print_kth (unsigned int k)
+print_kth (unsigned int k, int *range_start)
{
- return ((0 < eol_range_start && eol_range_start <= k)
- || (k <= max_range_endpoint && printable_field[k]));
+ if (0 < eol_range_start && eol_range_start <= k)
+ {
+ if (range_start)
+ *range_start = (k == eol_range_start);
+ return 1;
+ }
+
+ if (k <= max_range_endpoint && printable_field[k])
+ {
+ if (range_start)
+ *range_start = (printable_field[k] == RANGE_START_SENTINEL);
+ return 1;
+ }
+
+ return 0;
}
/* Given the list of field or byte range specifications FIELDSTR, set
@@ -371,8 +396,13 @@ set_fields (const char *fieldstr)
/* Set the array entries corresponding to integers in the ranges of RP. */
for (i = 0; i < n_rp; i++)
{
- unsigned int j;
- for (j = rp[i].lo; j <= rp[i].hi; j++)
+ unsigned int j = rp[i].lo;
+
+ /* Mark the first position of field or range with a sentinel,
+ but not if it's already part of another range. */
+ if (j <= rp[i].hi && ! printable_field[j])
+ printable_field[j] = RANGE_START_SENTINEL;
+ for (++j; j <= rp[i].hi; j++)
{
printable_field[j] = 1;
}
@@ -388,9 +418,13 @@ set_fields (const char *fieldstr)
static void
cut_bytes (FILE *stream)
{
- unsigned int byte_idx; /* Number of chars in the line so far. */
+ unsigned int byte_idx; /* Number of bytes in the line so far. */
+ /* Whether to begin printing delimiters between ranges for the current line.
+ Set after we've begun printing data corresponding to the first range. */
+ int print_delimiter;
byte_idx = 0;
+ print_delimiter = 0;
while (1)
{
register int c; /* Each character from the file. */
@@ -401,6 +435,7 @@ cut_bytes (FILE *stream)
{
putchar ('\n');
byte_idx = 0;
+ print_delimiter = 0;
}
else if (c == EOF)
{
@@ -410,9 +445,15 @@ cut_bytes (FILE *stream)
}
else
{
- ++byte_idx;
- if (print_kth (byte_idx))
+ int range_start;
+ if (print_kth (++byte_idx, &range_start))
{
+ if (range_start && print_delimiter && output_delimiter_specified)
+ {
+ fwrite (output_delimiter_string, sizeof (char),
+ output_delimiter_length, stdout);
+ }
+ print_delimiter = 1;
putchar (c);
}
}
@@ -444,7 +485,7 @@ cut_fields (FILE *stream)
and the first field has been selected, or if non-delimited lines
must be suppressed and the first field has *not* been selected.
That is because a non-delimited line has exactly one field. */
- buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
while (1)
{
@@ -483,7 +524,7 @@ cut_fields (FILE *stream)
}
continue;
}
- if (print_kth (1))
+ if (print_kth (1, NULL))
{
/* Print the field, but not the trailing delimiter. */
fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
@@ -494,7 +535,7 @@ cut_fields (FILE *stream)
if (c != EOF)
{
- if (print_kth (field_idx))
+ if (print_kth (field_idx, NULL))
{
if (found_any_selected_field)
{
@@ -648,6 +689,7 @@ main (int argc, char **argv)
break;
case OUTPUT_DELIMITER_OPTION:
+ output_delimiter_specified = 1;
/* Interpret --output-delimiter='' to mean
`use the NUL byte as the delimiter.' */
output_delimiter_length = (optarg[0] == '\0'
@@ -675,7 +717,8 @@ main (int argc, char **argv)
FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
if (delim != '\0' && operating_mode != field_mode)
- FATAL_ERROR (_("a delimiter may be specified only when operating on fields"));
+ FATAL_ERROR (_("an input delimiter may be specified only\
+ when operating on fields"));
if (suppress_non_delimited && operating_mode != field_mode)
FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\