cut: add the -z,--zero-terminated option

* doc/coreutils.texi (cut invocation): Reference the description. * src/cut.c: Parameterize '\n' references. * tests/misc/cut.pl: Add tests for character and field processing. * NEWS: Mention the new feature.
author: Pádraig Brady <P@draigBrady.com> 2016-01-08 13:04:03 +0000
committer: Pádraig Brady <P@draigBrady.com> 2016-01-13 10:59:56 +0000
commit: a499a0ce583a8d941e50c4da450133e694994d17 (patch)
tree: b6700e72ac92627c8308728ba883c2f78f5f9fe0
parent: bc94551f63cfc4c05a56628dfcb386707d9e98cb (diff)
download: coreutils-a499a0ce583a8d941e50c4da450133e694994d17.tar.xz
4 files changed, 40 insertions, 18 deletions
diff --git a/NEWS b/NEWS
index eb7fc5633..45f299056 100644
--- a/NEWS
+++ b/NEWS
@@ -33,15 +33,15 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 ** New features
 
+  cut, head, tail now have -z, --zero-terminated options to work with
+  NUL delimited items.
+
   dd now summarizes sizes in --human-readable format too, not just --si.
   E.g., "3441325000 bytes (3.4 GB, 3.2 GiB) copied".  It omits the summaries
   if they would not provide useful information, e.g., "3 bytes copied".
   Its status=progress output now uses the same format as ordinary status,
   perhaps with trailing spaces to erase previous progress output.
 
-  head, tail now have -z, --zero-terminated options to work with
-  NUL delimited items.
-
   md5sum now supports the --ignore-missing option to allow
   verifying a subset of files given a larger list of checksums.
   This also affects sha1sum, sha224sum, sha256sum, sha384sum and sha512sum.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index a7a89ad98..dcf28c5f9 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -5902,6 +5902,8 @@ In other words, do @emph{not} print the bytes, characters or fields
 specified via those options.  This option is useful when you have
 many fields and want to print all but a few of them.
 
+@optZeroTerminated
+
 @end table
 
 @exitstatus
diff --git a/src/cut.c b/src/cut.c
index 96440af54..7ab6be4cb 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -98,6 +98,9 @@ static bool complement;
 /* The delimiter character for field mode. */
 static unsigned char delim;
 
+/* The delimiter for each line/record. */
+static unsigned char line_delim = '\n';
+
 /* True if the --output-delimiter=STRING option was specified.  */
 static bool output_delimiter_specified;
 
@@ -128,6 +131,7 @@ static struct option const longopts[] =
   {"only-delimited", no_argument, NULL, 's'},
   {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
   {"complement", no_argument, NULL, COMPLEMENT_OPTION},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -171,6 +175,9 @@ Print selected parts of lines from each FILE to standard output.\n\
       --output-delimiter=STRING  use STRING as the output delimiter\n\
                             the default is to use the input delimiter\n\
 "), stdout);
+      fputs (_("\
+  -z, --zero-terminated    line delimiter is NUL, not newline\n\
+"), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
       fputs (_("\
@@ -239,9 +246,9 @@ cut_bytes (FILE *stream)
 
       c = getc (stream);
 
-      if (c == '\n')
+      if (c == line_delim)
         {
-          putchar ('\n');
+          putchar (c);
           byte_idx = 0;
           print_delimiter = false;
           current_rp = frp;
@@ -249,7 +256,7 @@ cut_bytes (FILE *stream)
       else if (c == EOF)
         {
           if (byte_idx > 0)
-            putchar ('\n');
+            putchar (line_delim);
           break;
         }
       else
@@ -308,7 +315,7 @@ cut_fields (FILE *stream)
           size_t n_bytes;
 
           len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
-                            GETNLINE_NO_LIMIT, delim, '\n', stream);
+                            GETNLINE_NO_LIMIT, delim, line_delim, stream);
           if (len < 0)
             {
               free (field_1_buffer);
@@ -336,9 +343,9 @@ cut_fields (FILE *stream)
                 {
                   fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
                   /* Make sure the output line is newline terminated.  */
-                  if (field_1_buffer[n_bytes - 1] != '\n')
-                    putchar ('\n');
-                  c = '\n';
+                  if (field_1_buffer[n_bytes - 1] != line_delim)
+                    putchar (line_delim);
+                  c = line_delim;
                 }
               continue;
             }
@@ -348,7 +355,7 @@ cut_fields (FILE *stream)
               fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
 
               /* With -d$'\n' don't treat the last '\n' as a delimiter.  */
-              if (delim == '\n')
+              if (delim == line_delim)
                 {
                   int last_c = getc (stream);
                   if (last_c != EOF)
@@ -374,7 +381,7 @@ cut_fields (FILE *stream)
             }
           found_any_selected_field = true;
 
-          while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
+          while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
             {
               putchar (c);
               prev_c = c;
@@ -382,14 +389,14 @@ cut_fields (FILE *stream)
         }
       else
         {
-          while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
+          while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
             {
               prev_c = c;
             }
         }
 
       /* With -d$'\n' don't treat the last '\n' as a delimiter.  */
-      if (delim == '\n' && c == delim)
+      if (delim == line_delim && c == delim)
         {
           int last_c = getc (stream);
           if (last_c != EOF)
@@ -400,13 +407,14 @@ cut_fields (FILE *stream)
 
       if (c == delim)
         next_item (&field_idx);
-      else if (c == '\n' || c == EOF)
+      else if (c == line_delim || c == EOF)
         {
           if (found_any_selected_field
               || !(suppress_non_delimited && field_idx == 1))
             {
-              if (c == '\n' || prev_c != '\n' || delim == '\n')
-                putchar ('\n');
+              if (c == line_delim || prev_c != line_delim
+                  || delim == line_delim)
+                putchar (line_delim);
             }
           if (c == EOF)
             break;
@@ -492,7 +500,7 @@ main (int argc, char **argv)
   delim = '\0';
   have_read_stdin = false;
 
-  while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
+  while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1)
     {
       switch (optc)
         {
@@ -538,6 +546,10 @@ main (int argc, char **argv)
           suppress_non_delimited = true;
           break;
 
+        case 'z':
+          line_delim = '\0';
+          break;
+
         case COMPLEMENT_OPTION:
           complement = true;
           break;
diff --git a/tests/misc/cut.pl b/tests/misc/cut.pl
index 70c5a642a..f6f8a5611 100755
--- a/tests/misc/cut.pl
+++ b/tests/misc/cut.pl
@@ -161,6 +161,14 @@ my @Tests =
   ['newline-23', "-d'\n'", '-f1-', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
   ['newline-24', "-d'\n'", '-f1,2', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
 
+  # --zero-terminated
+  ['zerot-1', "-z", '-c1', {IN=>"ab\0cd\0"}, {OUT=>"a\0c\0"}],
+  ['zerot-2', "-z", '-c1', {IN=>"ab\0cd"}, {OUT=>"a\0c\0"}],
+  ['zerot-3', '-z -f1-', {IN=>""}, {OUT=>""}],
+  ['zerot-4', '-z -d:', '-f1', {IN=>"a:1\0b:2"}, {OUT=>"a\0b\0"}],
+  ['zerot-5', '-z -d:', '-f1-', {IN=>"a1:\0:"}, {OUT=>"a1:\0:\0"}],
+  ['zerot-6', "-z -d ''", '-f1,2', '--ou=:', {IN=>"a\0b\0"}, {OUT=>"a:b\0"}],
+
   # New functionality:
   ['out-delim1', '-c1-3,5-', '--output-d=:', {IN=>"abcdefg\n"},
    {OUT=>"abc:efg\n"}],
author	Pádraig Brady <P@draigBrady.com>	2016-01-08 13:04:03 +0000
committer	Pádraig Brady <P@draigBrady.com>	2016-01-13 10:59:56 +0000
commit	a499a0ce583a8d941e50c4da450133e694994d17 (patch)
tree	b6700e72ac92627c8308728ba883c2f78f5f9fe0
parent	bc94551f63cfc4c05a56628dfcb386707d9e98cb (diff)
download	coreutils-a499a0ce583a8d941e50c4da450133e694994d17.tar.xz