4 files changed, 49 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog
index d9950c625..ed23b65cb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2007-05-12  James Youngman  <jay@gnu.org>
+
+	Add -z option to uniq.  Originally proposed by Egmont Koblinger.
+	* NEWS: Mention uniq's new option: --zero-terminated (-z).
+	* src/uniq.c: Add new option, --zero-terminated (-z), to make
+	uniq use the NUL byte as separator/delimiter rather than newline.
+	(check_file): Add a parameter: delimiter.  Update caller.
+	Use readlinebuffer_delim in place of readlinebuffer everywhere.
+	(main): Handle the new option.
+	(usage): Describe new option the same way sort does.
+	* doc/coreutils.texi (uniq invocation): Describe the new option.
+
 2007-05-07  Jim Meyering  <jim@meyering.net>
 
 	* NEWS: Mention that last week's tr bug dates back to 1992.
diff --git a/NEWS b/NEWS
index ea3aafb35..2a2310892 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,10 @@ GNU coreutils NEWS                                    -*- outline -*-
 
   Add SELinux support (FIXME: add details here)
 
+  uniq accepts a new option: --zero-terminated (-z).  As with the sort
+  option of the same name, this makes uniq consume and produce
+  NUL-terminated lines rather than newline-terminated lines.
+
 ** Bug fixes
 
   ls -x DIR would sometimes output the wrong string in place of the
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 6fc72ec2f..cc4d0b853 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -4229,11 +4229,15 @@ This is equivalent to @option{--all-repeated} (@option{-D}).
 
 @item prepend
 Output a newline before each group of repeated lines.
+With @option{--zero-terminated} (@option{-z}), use
+an @acronym{ASCII} @sc{nul} (zero) byte instead of a newline.
 
 @item separate
 Separate groups of repeated lines with a single newline.
+With @option{--zero-terminated} (@option{-z}), use
+an @acronym{ASCII} @sc{nul} (zero) byte instead of a newline.
 This is the same as using @samp{prepend}, except that
-there is no newline before the first group, and hence
+no delimiter is inserted before the first group, and hence
 may be better suited for output direct to users.
 @end table
 
@@ -4261,6 +4265,19 @@ Compare at most @var{n} characters on each line (after skipping any specified
 fields and characters).  By default the entire rest of the lines are
 compared.
 
+@item -z
+@itemx --zero-terminated
+@opindex -z
+@opindex --zero-terminated
+@cindex sort zero-terminated lines
+Treat the input as a set of lines, each terminated by a null character
+(@acronym{ASCII} @sc{nul}) instead of a line feed
+(@acronym{ASCII} @sc{lf}).
+This option can be useful in conjunction with @samp{sort -z}, @samp{perl -0} or
+@samp{find -print0} and @samp{xargs -0} which do the same in order to
+reliably handle arbitrary file names (even those containing blanks
+or other special characters).
+
 @end table
 
 @exitstatus
diff --git a/src/uniq.c b/src/uniq.c
index ac0840b03..36e2ea366 100644
--- a/src/uniq.c
+++ b/src/uniq.c
@@ -1,5 +1,5 @@
 /* uniq -- remove duplicate lines from a sorted file
-   Copyright (C) 86, 91, 1995-2006 Free Software Foundation, Inc.
+   Copyright (C) 86, 91, 1995-2007 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -119,6 +119,7 @@ static struct option const longopts[] =
   {"skip-fields", required_argument, NULL, 'f'},
   {"skip-chars", required_argument, NULL, 's'},
   {"check-chars", required_argument, NULL, 'w'},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -156,6 +157,7 @@ Mandatory arguments to long options are mandatory for short options too.\n\
   -i, --ignore-case     ignore differences in case when comparing\n\
   -s, --skip-chars=N    avoid comparing the first N characters\n\
   -u, --unique          only print unique lines\n\
+  -z, --zero-terminated  end lines with 0 byte, not newline\n\
 "), stdout);
      fputs (_("\
   -w, --check-chars=N   compare no more than N characters in lines\n\
@@ -268,7 +270,7 @@ writeline (struct linebuffer const *line,
    If either is "-", use the standard I/O stream for it instead. */
 
 static void
-check_file (const char *infile, const char *outfile)
+check_file (const char *infile, const char *outfile, char delimiter)
 {
   struct linebuffer lb1, lb2;
   struct linebuffer *thisline, *prevline;
@@ -300,7 +302,7 @@ check_file (const char *infile, const char *outfile)
 	{
 	  char *thisfield;
 	  size_t thislen;
-	  if (readlinebuffer (thisline, stdin) == 0)
+	  if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
 	    break;
 	  thisfield = find_field (thisline);
 	  thislen = thisline->length - 1 - (thisfield - thisline->buffer);
@@ -323,7 +325,7 @@ check_file (const char *infile, const char *outfile)
       uintmax_t match_count = 0;
       bool first_delimiter = true;
 
-      if (readlinebuffer (prevline, stdin) == 0)
+      if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
 	goto closefiles;
       prevfield = find_field (prevline);
       prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
@@ -333,7 +335,7 @@ check_file (const char *infile, const char *outfile)
 	  bool match;
 	  char *thisfield;
 	  size_t thislen;
-	  if (readlinebuffer (thisline, stdin) == 0)
+	  if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
 	    {
 	      if (ferror (stdin))
 		goto closefiles;
@@ -363,7 +365,7 @@ check_file (const char *infile, const char *outfile)
 		  if ((delimit_groups == DM_PREPEND)
 		      || (delimit_groups == DM_SEPARATE
 			  && !first_delimiter))
-		    putchar ('\n');
+		    putchar (delimiter);
 		}
 	    }
 
@@ -406,6 +408,7 @@ main (int argc, char **argv)
   enum Skip_field_option_type skip_field_option_type = SFO_NONE;
   int nfiles = 0;
   char const *file[2];
+  char delimiter = '\n';	/* change with --zero-terminated, -z */
 
   file[0] = file[1] = "-";
   initialize_main (&argc, &argv);
@@ -434,7 +437,7 @@ main (int argc, char **argv)
       if (optc == -1
 	  || (posixly_correct && nfiles != 0)
 	  || ((optc = getopt_long (argc, argv,
-				   "-0123456789Dcdf:is:uw:", longopts, NULL))
+				   "-0123456789Dcdf:is:uw:z", longopts, NULL))
 	      == -1))
 	{
 	  if (argc <= optind)
@@ -530,6 +533,10 @@ main (int argc, char **argv)
 				  N_("invalid number of bytes to compare"));
 	  break;
 
+	case 'z':
+	  delimiter = '\0';
+	  break;
+
 	case_GETOPT_HELP_CHAR;
 
 	case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
@@ -546,7 +553,7 @@ main (int argc, char **argv)
       usage (EXIT_FAILURE);
     }
 
-  check_file (file[0], file[1]);
+  check_file (file[0], file[1], delimiter);
 
   exit (EXIT_SUCCESS);
 }