summaryrefslogtreecommitdiff
path: root/src/cut.c
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2014-05-30 17:44:32 +0100
committerPádraig Brady <P@draigBrady.com>2014-06-01 12:14:39 +0100
commit5c6cf94ba5f6c05b3ab6e732de7202754558c03c (patch)
treec042e76c151a9ee32b26ae88ce68637bb48c9c03 /src/cut.c
parent39d1c9576a3f2e5e65c5fb06744aa7245d743bc0 (diff)
downloadcoreutils-5c6cf94ba5f6c05b3ab6e732de7202754558c03c.tar.xz
cut: restore special case handling of -f with -d$'\n'
commits v8.20-98-g51ce0bf and v8.20-99-gd302aed changed cut(1) to process each line independently and thus promptly output each line without buffering. As part of those changes we removed the special handling of --delimiter=$'\n' --fields=... which could be used to select arbitrary (ranges of) lines, so as to simplify and optimize the implementation while also matching the behavior of different cut(1) implementations. However that GNU behavior was in place for a long time, and could be useful in certain cases like making a separated list like `seq 10 | cut -f1- -d$'\n' --output-delimiter=,` although other tools like head(1) and paste(1) are more suited to this operation. This patch reinstates that functionality but restricts the "line behind" buffering behavior to only the -d$'\n' case. We also fix the following related edge case to be more consistent: before> printf "\n" | cut -s -d$'\n' -f1- | wc -l 2 before> printf "\n" | cut -d$'\n' -f1- | wc -l 1 after > printf "\n" | cut -s -d$'\n' -f1- | wc -l 1 after > printf "\n" | cut -d$'\n' -f1- | wc -l 1 * src/cut.c (cut_fields): Adjust as discussed above. * tests/misc/cut.pl: Likewise. * NEWS: Mention the change in behavior both for v8.21 and this effective revert. * cfg.mk (old_NEWS_hash): Adjust for originally omitted v8.21 entry. * src/paste.c: s/delimeter/delimiter/ comment typo fix.
Diffstat (limited to 'src/cut.c')
-rw-r--r--src/cut.c46
1 files changed, 33 insertions, 13 deletions
diff --git a/src/cut.c b/src/cut.c
index 552806823..312551f08 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -109,13 +109,13 @@ enum operating_mode
/* Output characters that are in the given bytes. */
byte_mode,
- /* Output the given delimeter-separated fields. */
+ /* Output the given delimiter-separated fields. */
field_mode
};
static enum operating_mode operating_mode;
-/* If true do not output lines containing no delimeter characters.
+/* If true do not output lines containing no delimiter characters.
Otherwise, all such lines are printed. This option is valid only
with field mode. */
static bool suppress_non_delimited;
@@ -124,7 +124,7 @@ static bool suppress_non_delimited;
those that were specified. */
static bool complement;
-/* The delimeter character for field mode. */
+/* The delimiter character for field mode. */
static unsigned char delim;
/* True if the --output-delimiter=STRING option was specified. */
@@ -538,7 +538,6 @@ cut_fields (FILE *stream)
{
ssize_t len;
size_t n_bytes;
- bool got_line;
len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
GETNLINE_NO_LIMIT, delim, '\n', stream);
@@ -555,14 +554,13 @@ cut_fields (FILE *stream)
assert (n_bytes != 0);
c = 0;
- got_line = field_1_buffer[n_bytes - 1] == '\n';
/* If the first field extends to the end of line (it is not
delimited) and we are printing all non-delimited lines,
print this one. */
- if (to_uchar (field_1_buffer[n_bytes - 1]) != delim || got_line)
+ if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
{
- if (suppress_non_delimited && !(got_line && delim == '\n'))
+ if (suppress_non_delimited)
{
/* Empty. */
}
@@ -570,7 +568,7 @@ cut_fields (FILE *stream)
{
fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
/* Make sure the output line is newline terminated. */
- if (! got_line)
+ if (field_1_buffer[n_bytes - 1] != '\n')
putchar ('\n');
c = '\n';
}
@@ -580,7 +578,19 @@ cut_fields (FILE *stream)
{
/* Print the field, but not the trailing delimiter. */
fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
- found_any_selected_field = true;
+
+ /* With -d$'\n' don't treat the last '\n' as a delimiter. */
+ if (delim == '\n')
+ {
+ int last_c = getc (stream);
+ if (last_c != EOF)
+ {
+ ungetc (last_c, stream);
+ found_any_selected_field = true;
+ }
+ }
+ else
+ found_any_selected_field = true;
}
next_item (&field_idx);
}
@@ -610,12 +620,24 @@ cut_fields (FILE *stream)
}
}
- if (c == '\n' || c == EOF)
+ /* With -d$'\n' don't treat the last '\n' as a delimiter. */
+ if (delim == '\n' && c == delim)
+ {
+ int last_c = getc (stream);
+ if (last_c != EOF)
+ ungetc (last_c, stream);
+ else
+ c = last_c;
+ }
+
+ if (c == delim)
+ next_item (&field_idx);
+ else if (c == '\n' || c == EOF)
{
if (found_any_selected_field
|| !(suppress_non_delimited && field_idx == 1))
{
- if (c == '\n' || prev_c != '\n')
+ if (c == '\n' || prev_c != '\n' || delim == '\n')
putchar ('\n');
}
if (c == EOF)
@@ -624,8 +646,6 @@ cut_fields (FILE *stream)
current_rp = rp;
found_any_selected_field = false;
}
- else if (c == delim)
- next_item (&field_idx);
}
}