summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2016-01-08 13:04:03 +0000
committerPádraig Brady <P@draigBrady.com>2016-01-13 10:59:56 +0000
commita499a0ce583a8d941e50c4da450133e694994d17 (patch)
treeb6700e72ac92627c8308728ba883c2f78f5f9fe0
parentbc94551f63cfc4c05a56628dfcb386707d9e98cb (diff)
downloadcoreutils-a499a0ce583a8d941e50c4da450133e694994d17.tar.xz
cut: add the -z,--zero-terminated option
* doc/coreutils.texi (cut invocation): Reference the description. * src/cut.c: Parameterize '\n' references. * tests/misc/cut.pl: Add tests for character and field processing. * NEWS: Mention the new feature.
-rw-r--r--NEWS6
-rw-r--r--doc/coreutils.texi2
-rw-r--r--src/cut.c42
-rwxr-xr-xtests/misc/cut.pl8
4 files changed, 40 insertions, 18 deletions
diff --git a/NEWS b/NEWS
index eb7fc5633..45f299056 100644
--- a/NEWS
+++ b/NEWS
@@ -33,15 +33,15 @@ GNU coreutils NEWS -*- outline -*-
** New features
+ cut, head, tail now have -z, --zero-terminated options to work with
+ NUL delimited items.
+
dd now summarizes sizes in --human-readable format too, not just --si.
E.g., "3441325000 bytes (3.4 GB, 3.2 GiB) copied". It omits the summaries
if they would not provide useful information, e.g., "3 bytes copied".
Its status=progress output now uses the same format as ordinary status,
perhaps with trailing spaces to erase previous progress output.
- head, tail now have -z, --zero-terminated options to work with
- NUL delimited items.
-
md5sum now supports the --ignore-missing option to allow
verifying a subset of files given a larger list of checksums.
This also affects sha1sum, sha224sum, sha256sum, sha384sum and sha512sum.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index a7a89ad98..dcf28c5f9 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -5902,6 +5902,8 @@ In other words, do @emph{not} print the bytes, characters or fields
specified via those options. This option is useful when you have
many fields and want to print all but a few of them.
+@optZeroTerminated
+
@end table
@exitstatus
diff --git a/src/cut.c b/src/cut.c
index 96440af54..7ab6be4cb 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -98,6 +98,9 @@ static bool complement;
/* The delimiter character for field mode. */
static unsigned char delim;
+/* The delimiter for each line/record. */
+static unsigned char line_delim = '\n';
+
/* True if the --output-delimiter=STRING option was specified. */
static bool output_delimiter_specified;
@@ -128,6 +131,7 @@ static struct option const longopts[] =
{"only-delimited", no_argument, NULL, 's'},
{"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
{"complement", no_argument, NULL, COMPLEMENT_OPTION},
+ {"zero-terminated", no_argument, NULL, 'z'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
@@ -171,6 +175,9 @@ Print selected parts of lines from each FILE to standard output.\n\
--output-delimiter=STRING use STRING as the output delimiter\n\
the default is to use the input delimiter\n\
"), stdout);
+ fputs (_("\
+ -z, --zero-terminated line delimiter is NUL, not newline\n\
+"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
fputs (_("\
@@ -239,9 +246,9 @@ cut_bytes (FILE *stream)
c = getc (stream);
- if (c == '\n')
+ if (c == line_delim)
{
- putchar ('\n');
+ putchar (c);
byte_idx = 0;
print_delimiter = false;
current_rp = frp;
@@ -249,7 +256,7 @@ cut_bytes (FILE *stream)
else if (c == EOF)
{
if (byte_idx > 0)
- putchar ('\n');
+ putchar (line_delim);
break;
}
else
@@ -308,7 +315,7 @@ cut_fields (FILE *stream)
size_t n_bytes;
len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
- GETNLINE_NO_LIMIT, delim, '\n', stream);
+ GETNLINE_NO_LIMIT, delim, line_delim, stream);
if (len < 0)
{
free (field_1_buffer);
@@ -336,9 +343,9 @@ cut_fields (FILE *stream)
{
fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
/* Make sure the output line is newline terminated. */
- if (field_1_buffer[n_bytes - 1] != '\n')
- putchar ('\n');
- c = '\n';
+ if (field_1_buffer[n_bytes - 1] != line_delim)
+ putchar (line_delim);
+ c = line_delim;
}
continue;
}
@@ -348,7 +355,7 @@ cut_fields (FILE *stream)
fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
/* With -d$'\n' don't treat the last '\n' as a delimiter. */
- if (delim == '\n')
+ if (delim == line_delim)
{
int last_c = getc (stream);
if (last_c != EOF)
@@ -374,7 +381,7 @@ cut_fields (FILE *stream)
}
found_any_selected_field = true;
- while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
+ while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
{
putchar (c);
prev_c = c;
@@ -382,14 +389,14 @@ cut_fields (FILE *stream)
}
else
{
- while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
+ while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
{
prev_c = c;
}
}
/* With -d$'\n' don't treat the last '\n' as a delimiter. */
- if (delim == '\n' && c == delim)
+ if (delim == line_delim && c == delim)
{
int last_c = getc (stream);
if (last_c != EOF)
@@ -400,13 +407,14 @@ cut_fields (FILE *stream)
if (c == delim)
next_item (&field_idx);
- else if (c == '\n' || c == EOF)
+ else if (c == line_delim || c == EOF)
{
if (found_any_selected_field
|| !(suppress_non_delimited && field_idx == 1))
{
- if (c == '\n' || prev_c != '\n' || delim == '\n')
- putchar ('\n');
+ if (c == line_delim || prev_c != line_delim
+ || delim == line_delim)
+ putchar (line_delim);
}
if (c == EOF)
break;
@@ -492,7 +500,7 @@ main (int argc, char **argv)
delim = '\0';
have_read_stdin = false;
- while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
+ while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1)
{
switch (optc)
{
@@ -538,6 +546,10 @@ main (int argc, char **argv)
suppress_non_delimited = true;
break;
+ case 'z':
+ line_delim = '\0';
+ break;
+
case COMPLEMENT_OPTION:
complement = true;
break;
diff --git a/tests/misc/cut.pl b/tests/misc/cut.pl
index 70c5a642a..f6f8a5611 100755
--- a/tests/misc/cut.pl
+++ b/tests/misc/cut.pl
@@ -161,6 +161,14 @@ my @Tests =
['newline-23', "-d'\n'", '-f1-', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
['newline-24', "-d'\n'", '-f1,2', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
+ # --zero-terminated
+ ['zerot-1', "-z", '-c1', {IN=>"ab\0cd\0"}, {OUT=>"a\0c\0"}],
+ ['zerot-2', "-z", '-c1', {IN=>"ab\0cd"}, {OUT=>"a\0c\0"}],
+ ['zerot-3', '-z -f1-', {IN=>""}, {OUT=>""}],
+ ['zerot-4', '-z -d:', '-f1', {IN=>"a:1\0b:2"}, {OUT=>"a\0b\0"}],
+ ['zerot-5', '-z -d:', '-f1-', {IN=>"a1:\0:"}, {OUT=>"a1:\0:\0"}],
+ ['zerot-6', "-z -d ''", '-f1,2', '--ou=:', {IN=>"a\0b\0"}, {OUT=>"a:b\0"}],
+
# New functionality:
['out-delim1', '-c1-3,5-', '--output-d=:', {IN=>"abcdefg\n"},
{OUT=>"abc:efg\n"}],