From a499a0ce583a8d941e50c4da450133e694994d17 Mon Sep 17 00:00:00 2001 From: Pádraig Brady
Date: Fri, 8 Jan 2016 13:04:03 +0000 Subject: cut: add the -z,--zero-terminated option * doc/coreutils.texi (cut invocation): Reference the description. * src/cut.c: Parameterize '\n' references. * tests/misc/cut.pl: Add tests for character and field processing. * NEWS: Mention the new feature. --- NEWS | 6 +++--- doc/coreutils.texi | 2 ++ src/cut.c | 42 +++++++++++++++++++++++++++--------------- tests/misc/cut.pl | 8 ++++++++ 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/NEWS b/NEWS index eb7fc5633..45f299056 100644 --- a/NEWS +++ b/NEWS @@ -33,15 +33,15 @@ GNU coreutils NEWS -*- outline -*- ** New features + cut, head, tail now have -z, --zero-terminated options to work with + NUL delimited items. + dd now summarizes sizes in --human-readable format too, not just --si. E.g., "3441325000 bytes (3.4 GB, 3.2 GiB) copied". It omits the summaries if they would not provide useful information, e.g., "3 bytes copied". Its status=progress output now uses the same format as ordinary status, perhaps with trailing spaces to erase previous progress output. - head, tail now have -z, --zero-terminated options to work with - NUL delimited items. - md5sum now supports the --ignore-missing option to allow verifying a subset of files given a larger list of checksums. This also affects sha1sum, sha224sum, sha256sum, sha384sum and sha512sum. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index a7a89ad98..dcf28c5f9 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -5902,6 +5902,8 @@ In other words, do @emph{not} print the bytes, characters or fields specified via those options. This option is useful when you have many fields and want to print all but a few of them. +@optZeroTerminated + @end table @exitstatus diff --git a/src/cut.c b/src/cut.c index 96440af54..7ab6be4cb 100644 --- a/src/cut.c +++ b/src/cut.c @@ -98,6 +98,9 @@ static bool complement; /* The delimiter character for field mode. */ static unsigned char delim; +/* The delimiter for each line/record. */ +static unsigned char line_delim = '\n'; + /* True if the --output-delimiter=STRING option was specified. */ static bool output_delimiter_specified; @@ -128,6 +131,7 @@ static struct option const longopts[] = {"only-delimited", no_argument, NULL, 's'}, {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION}, {"complement", no_argument, NULL, COMPLEMENT_OPTION}, + {"zero-terminated", no_argument, NULL, 'z'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} @@ -170,6 +174,9 @@ Print selected parts of lines from each FILE to standard output.\n\ -s, --only-delimited do not print lines not containing delimiters\n\ --output-delimiter=STRING use STRING as the output delimiter\n\ the default is to use the input delimiter\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); @@ -239,9 +246,9 @@ cut_bytes (FILE *stream) c = getc (stream); - if (c == '\n') + if (c == line_delim) { - putchar ('\n'); + putchar (c); byte_idx = 0; print_delimiter = false; current_rp = frp; @@ -249,7 +256,7 @@ cut_bytes (FILE *stream) else if (c == EOF) { if (byte_idx > 0) - putchar ('\n'); + putchar (line_delim); break; } else @@ -308,7 +315,7 @@ cut_fields (FILE *stream) size_t n_bytes; len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, - GETNLINE_NO_LIMIT, delim, '\n', stream); + GETNLINE_NO_LIMIT, delim, line_delim, stream); if (len < 0) { free (field_1_buffer); @@ -336,9 +343,9 @@ cut_fields (FILE *stream) { fwrite (field_1_buffer, sizeof (char), n_bytes, stdout); /* Make sure the output line is newline terminated. */ - if (field_1_buffer[n_bytes - 1] != '\n') - putchar ('\n'); - c = '\n'; + if (field_1_buffer[n_bytes - 1] != line_delim) + putchar (line_delim); + c = line_delim; } continue; } @@ -348,7 +355,7 @@ cut_fields (FILE *stream) fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout); /* With -d$'\n' don't treat the last '\n' as a delimiter. */ - if (delim == '\n') + if (delim == line_delim) { int last_c = getc (stream); if (last_c != EOF) @@ -374,7 +381,7 @@ cut_fields (FILE *stream) } found_any_selected_field = true; - while ((c = getc (stream)) != delim && c != '\n' && c != EOF) + while ((c = getc (stream)) != delim && c != line_delim && c != EOF) { putchar (c); prev_c = c; @@ -382,14 +389,14 @@ cut_fields (FILE *stream) } else { - while ((c = getc (stream)) != delim && c != '\n' && c != EOF) + while ((c = getc (stream)) != delim && c != line_delim && c != EOF) { prev_c = c; } } /* With -d$'\n' don't treat the last '\n' as a delimiter. */ - if (delim == '\n' && c == delim) + if (delim == line_delim && c == delim) { int last_c = getc (stream); if (last_c != EOF) @@ -400,13 +407,14 @@ cut_fields (FILE *stream) if (c == delim) next_item (&field_idx); - else if (c == '\n' || c == EOF) + else if (c == line_delim || c == EOF) { if (found_any_selected_field || !(suppress_non_delimited && field_idx == 1)) { - if (c == '\n' || prev_c != '\n' || delim == '\n') - putchar ('\n'); + if (c == line_delim || prev_c != line_delim + || delim == line_delim) + putchar (line_delim); } if (c == EOF) break; @@ -492,7 +500,7 @@ main (int argc, char **argv) delim = '\0'; have_read_stdin = false; - while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1) + while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1) { switch (optc) { @@ -538,6 +546,10 @@ main (int argc, char **argv) suppress_non_delimited = true; break; + case 'z': + line_delim = '\0'; + break; + case COMPLEMENT_OPTION: complement = true; break; diff --git a/tests/misc/cut.pl b/tests/misc/cut.pl index 70c5a642a..f6f8a5611 100755 --- a/tests/misc/cut.pl +++ b/tests/misc/cut.pl @@ -161,6 +161,14 @@ my @Tests = ['newline-23', "-d'\n'", '-f1-', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}], ['newline-24', "-d'\n'", '-f1,2', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}], + # --zero-terminated + ['zerot-1', "-z", '-c1', {IN=>"ab\0cd\0"}, {OUT=>"a\0c\0"}], + ['zerot-2', "-z", '-c1', {IN=>"ab\0cd"}, {OUT=>"a\0c\0"}], + ['zerot-3', '-z -f1-', {IN=>""}, {OUT=>""}], + ['zerot-4', '-z -d:', '-f1', {IN=>"a:1\0b:2"}, {OUT=>"a\0b\0"}], + ['zerot-5', '-z -d:', '-f1-', {IN=>"a1:\0:"}, {OUT=>"a1:\0:\0"}], + ['zerot-6', "-z -d ''", '-f1,2', '--ou=:', {IN=>"a\0b\0"}, {OUT=>"a:b\0"}], + # New functionality: ['out-delim1', '-c1-3,5-', '--output-d=:', {IN=>"abcdefg\n"}, {OUT=>"abc:efg\n"}], -- cgit v1.2.3-70-g09d2