summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2014-05-30 17:44:32 +0100
committerPádraig Brady <P@draigBrady.com>2014-06-01 12:14:39 +0100
commit5c6cf94ba5f6c05b3ab6e732de7202754558c03c (patch)
treec042e76c151a9ee32b26ae88ce68637bb48c9c03
parent39d1c9576a3f2e5e65c5fb06744aa7245d743bc0 (diff)
downloadcoreutils-5c6cf94ba5f6c05b3ab6e732de7202754558c03c.tar.xz
cut: restore special case handling of -f with -d$'\n'
commits v8.20-98-g51ce0bf and v8.20-99-gd302aed changed cut(1) to process each line independently and thus promptly output each line without buffering. As part of those changes we removed the special handling of --delimiter=$'\n' --fields=... which could be used to select arbitrary (ranges of) lines, so as to simplify and optimize the implementation while also matching the behavior of different cut(1) implementations. However that GNU behavior was in place for a long time, and could be useful in certain cases like making a separated list like `seq 10 | cut -f1- -d$'\n' --output-delimiter=,` although other tools like head(1) and paste(1) are more suited to this operation. This patch reinstates that functionality but restricts the "line behind" buffering behavior to only the -d$'\n' case. We also fix the following related edge case to be more consistent: before> printf "\n" | cut -s -d$'\n' -f1- | wc -l 2 before> printf "\n" | cut -d$'\n' -f1- | wc -l 1 after > printf "\n" | cut -s -d$'\n' -f1- | wc -l 1 after > printf "\n" | cut -d$'\n' -f1- | wc -l 1 * src/cut.c (cut_fields): Adjust as discussed above. * tests/misc/cut.pl: Likewise. * NEWS: Mention the change in behavior both for v8.21 and this effective revert. * cfg.mk (old_NEWS_hash): Adjust for originally omitted v8.21 entry. * src/paste.c: s/delimeter/delimiter/ comment typo fix.
-rw-r--r--NEWS7
-rw-r--r--cfg.mk2
-rw-r--r--src/cut.c46
-rw-r--r--src/paste.c2
-rwxr-xr-xtests/misc/cut.pl12
5 files changed, 49 insertions, 20 deletions
diff --git a/NEWS b/NEWS
index 3919b7245..fa1aab8d5 100644
--- a/NEWS
+++ b/NEWS
@@ -90,6 +90,10 @@ GNU coreutils NEWS -*- outline -*-
chroot --userspec will now unset supplemental groups associated with root,
and instead use the supplemental groups of the specified user.
+ cut -d$'\n' again outputs lines identified in the --fields list, having
+ not done so in v8.21 and v8.22. Note using this non portable functionality
+ will result in the delayed output of lines.
+
ls with none of LS_COLORS or COLORTERM environment variables set,
will now honor an empty or unknown TERM environment variable,
and not output colors even with --colors=always.
@@ -343,6 +347,9 @@ GNU coreutils NEWS -*- outline -*-
the system by skipping duplicate entries (identified by the device number).
Consequently, df also elides the early-boot pseudo file system type "rootfs".
+ cut -d$'\n' no longer outputs lines identified in the --fields list,
+ to align with other implementations and to avoid delayed output of lines.
+
nl no longer supports the --page-increment option, which has been
deprecated since coreutils-7.5. Use --line-increment instead.
diff --git a/cfg.mk b/cfg.mk
index 1e884cde3..ea5fc99eb 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -45,7 +45,7 @@ export VERBOSE = yes
# 4914152 9e
export XZ_OPT = -8e
-old_NEWS_hash = 68fc9b352e924d5e59e2f543f80f6a41
+old_NEWS_hash = adf13e9314300d0dff82fa37b247d7db
# Add an exemption for sc_makefile_at_at_check.
_makefile_at_at_check_exceptions = ' && !/^cu_install_program =/'
diff --git a/src/cut.c b/src/cut.c
index 552806823..312551f08 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -109,13 +109,13 @@ enum operating_mode
/* Output characters that are in the given bytes. */
byte_mode,
- /* Output the given delimeter-separated fields. */
+ /* Output the given delimiter-separated fields. */
field_mode
};
static enum operating_mode operating_mode;
-/* If true do not output lines containing no delimeter characters.
+/* If true do not output lines containing no delimiter characters.
Otherwise, all such lines are printed. This option is valid only
with field mode. */
static bool suppress_non_delimited;
@@ -124,7 +124,7 @@ static bool suppress_non_delimited;
those that were specified. */
static bool complement;
-/* The delimeter character for field mode. */
+/* The delimiter character for field mode. */
static unsigned char delim;
/* True if the --output-delimiter=STRING option was specified. */
@@ -538,7 +538,6 @@ cut_fields (FILE *stream)
{
ssize_t len;
size_t n_bytes;
- bool got_line;
len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
GETNLINE_NO_LIMIT, delim, '\n', stream);
@@ -555,14 +554,13 @@ cut_fields (FILE *stream)
assert (n_bytes != 0);
c = 0;
- got_line = field_1_buffer[n_bytes - 1] == '\n';
/* If the first field extends to the end of line (it is not
delimited) and we are printing all non-delimited lines,
print this one. */
- if (to_uchar (field_1_buffer[n_bytes - 1]) != delim || got_line)
+ if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
{
- if (suppress_non_delimited && !(got_line && delim == '\n'))
+ if (suppress_non_delimited)
{
/* Empty. */
}
@@ -570,7 +568,7 @@ cut_fields (FILE *stream)
{
fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
/* Make sure the output line is newline terminated. */
- if (! got_line)
+ if (field_1_buffer[n_bytes - 1] != '\n')
putchar ('\n');
c = '\n';
}
@@ -580,7 +578,19 @@ cut_fields (FILE *stream)
{
/* Print the field, but not the trailing delimiter. */
fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
- found_any_selected_field = true;
+
+ /* With -d$'\n' don't treat the last '\n' as a delimiter. */
+ if (delim == '\n')
+ {
+ int last_c = getc (stream);
+ if (last_c != EOF)
+ {
+ ungetc (last_c, stream);
+ found_any_selected_field = true;
+ }
+ }
+ else
+ found_any_selected_field = true;
}
next_item (&field_idx);
}
@@ -610,12 +620,24 @@ cut_fields (FILE *stream)
}
}
- if (c == '\n' || c == EOF)
+ /* With -d$'\n' don't treat the last '\n' as a delimiter. */
+ if (delim == '\n' && c == delim)
+ {
+ int last_c = getc (stream);
+ if (last_c != EOF)
+ ungetc (last_c, stream);
+ else
+ c = last_c;
+ }
+
+ if (c == delim)
+ next_item (&field_idx);
+ else if (c == '\n' || c == EOF)
{
if (found_any_selected_field
|| !(suppress_non_delimited && field_idx == 1))
{
- if (c == '\n' || prev_c != '\n')
+ if (c == '\n' || prev_c != '\n' || delim == '\n')
putchar ('\n');
}
if (c == EOF)
@@ -624,8 +646,6 @@ cut_fields (FILE *stream)
current_rp = rp;
found_any_selected_field = false;
}
- else if (c == delim)
- next_item (&field_idx);
}
}
diff --git a/src/paste.c b/src/paste.c
index 707c49598..3663aaf6e 100644
--- a/src/paste.c
+++ b/src/paste.c
@@ -62,7 +62,7 @@ static bool have_read_stdin;
corresponding lines from each file in parallel. */
static bool serial_merge;
-/* The delimeters between lines of input files (used cyclically). */
+/* The delimiters between lines of input files (used cyclically). */
static char *delims;
/* A pointer to the character after the end of 'delims'. */
diff --git a/tests/misc/cut.pl b/tests/misc/cut.pl
index 295236707..04188621b 100755
--- a/tests/misc/cut.pl
+++ b/tests/misc/cut.pl
@@ -144,15 +144,17 @@ my @Tests =
['newline-12', '-s', '-d:', '-f1', {IN=>"a:1\nb:"}, {OUT=>"a\nb\n"}],
['newline-13', '-d:', '-f1-', {IN=>"a1:\n:"}, {OUT=>"a1:\n:\n"}],
# newline processing for fields when -d == '\n'
- ['newline-14', "-d'\n'", '-f1', {IN=>"a:1\nb:"}, {OUT=>"a:1\nb:\n"}],
+ ['newline-14', "-d'\n'", '-f1', {IN=>"a:1\nb:"}, {OUT=>"a:1\n"}],
['newline-15', '-s', "-d'\n'", '-f1', {IN=>"a:1\nb:"}, {OUT=>"a:1\n"}],
- ['newline-16', '-s', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>""}],
+ ['newline-16', '-s', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>"b\n"}],
['newline-17', '-s', "-d'\n'", '-f1', {IN=>"\nb"}, {OUT=>"\n"}],
- ['newline-18', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>"\nb\n"}],
- ['newline-19', "-d'\n'", '-f1', {IN=>"\nb"}, {OUT=>"\nb\n"}],
+ ['newline-18', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>"b\n"}],
+ ['newline-19', "-d'\n'", '-f1', {IN=>"\nb"}, {OUT=>"\n"}],
['newline-20', '-s', "-d'\n'", '-f1-', {IN=>"\n"}, {OUT=>"\n"}],
- ['newline-21', '-s', "-d'\n'", '-f1-', {IN=>"\nb"}, {OUT=>"\n"}],
+ ['newline-21', '-s', "-d'\n'", '-f1-', {IN=>"\nb"}, {OUT=>"\nb\n"}],
['newline-22', "-d'\n'", '-f1-', {IN=>"\nb"}, {OUT=>"\nb\n"}],
+ ['newline-23', "-d'\n'", '-f1-', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
+ ['newline-24', "-d'\n'", '-f1,2', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
# New functionality:
['out-delim1', '-c1-3,5-', '--output-d=:', {IN=>"abcdefg\n"},