From e06252480ac023b8c5f03ee1a8ab43d386fefd46 Mon Sep 17 00:00:00 2001 From: James Youngman Date: Sun, 13 May 2007 11:02:43 +0200 Subject: Add -z option to uniq. Originally proposed by Egmont Koblinger. * NEWS: Mention uniq's new option: --zero-terminated (-z). * src/uniq.c: Add new option, --zero-terminated (-z), to make uniq use the NUL byte as separator/delimiter rather than newline. (check_file): Add a parameter: delimiter. Update caller. Use readlinebuffer_delim in place of readlinebuffer everywhere. (main): Handle the new option. (usage): Describe new option the same way sort does. * doc/coreutils.texi (uniq invocation): Describe the new option. --- ChangeLog | 12 ++++++++++++ NEWS | 4 ++++ doc/coreutils.texi | 19 ++++++++++++++++++- src/uniq.c | 23 +++++++++++++++-------- 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index d9950c625..ed23b65cb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2007-05-12 James Youngman + + Add -z option to uniq. Originally proposed by Egmont Koblinger. + * NEWS: Mention uniq's new option: --zero-terminated (-z). + * src/uniq.c: Add new option, --zero-terminated (-z), to make + uniq use the NUL byte as separator/delimiter rather than newline. + (check_file): Add a parameter: delimiter. Update caller. + Use readlinebuffer_delim in place of readlinebuffer everywhere. + (main): Handle the new option. + (usage): Describe new option the same way sort does. + * doc/coreutils.texi (uniq invocation): Describe the new option. + 2007-05-07 Jim Meyering * NEWS: Mention that last week's tr bug dates back to 1992. diff --git a/NEWS b/NEWS index ea3aafb35..2a2310892 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,10 @@ GNU coreutils NEWS -*- outline -*- Add SELinux support (FIXME: add details here) + uniq accepts a new option: --zero-terminated (-z). As with the sort + option of the same name, this makes uniq consume and produce + NUL-terminated lines rather than newline-terminated lines. + ** Bug fixes ls -x DIR would sometimes output the wrong string in place of the diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 6fc72ec2f..cc4d0b853 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -4229,11 +4229,15 @@ This is equivalent to @option{--all-repeated} (@option{-D}). @item prepend Output a newline before each group of repeated lines. +With @option{--zero-terminated} (@option{-z}), use +an @acronym{ASCII} @sc{nul} (zero) byte instead of a newline. @item separate Separate groups of repeated lines with a single newline. +With @option{--zero-terminated} (@option{-z}), use +an @acronym{ASCII} @sc{nul} (zero) byte instead of a newline. This is the same as using @samp{prepend}, except that -there is no newline before the first group, and hence +no delimiter is inserted before the first group, and hence may be better suited for output direct to users. @end table @@ -4261,6 +4265,19 @@ Compare at most @var{n} characters on each line (after skipping any specified fields and characters). By default the entire rest of the lines are compared. +@item -z +@itemx --zero-terminated +@opindex -z +@opindex --zero-terminated +@cindex sort zero-terminated lines +Treat the input as a set of lines, each terminated by a null character +(@acronym{ASCII} @sc{nul}) instead of a line feed +(@acronym{ASCII} @sc{lf}). +This option can be useful in conjunction with @samp{sort -z}, @samp{perl -0} or +@samp{find -print0} and @samp{xargs -0} which do the same in order to +reliably handle arbitrary file names (even those containing blanks +or other special characters). + @end table @exitstatus diff --git a/src/uniq.c b/src/uniq.c index ac0840b03..36e2ea366 100644 --- a/src/uniq.c +++ b/src/uniq.c @@ -1,5 +1,5 @@ /* uniq -- remove duplicate lines from a sorted file - Copyright (C) 86, 91, 1995-2006 Free Software Foundation, Inc. + Copyright (C) 86, 91, 1995-2007 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -119,6 +119,7 @@ static struct option const longopts[] = {"skip-fields", required_argument, NULL, 'f'}, {"skip-chars", required_argument, NULL, 's'}, {"check-chars", required_argument, NULL, 'w'}, + {"zero-terminated", no_argument, NULL, 'z'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} @@ -156,6 +157,7 @@ Mandatory arguments to long options are mandatory for short options too.\n\ -i, --ignore-case ignore differences in case when comparing\n\ -s, --skip-chars=N avoid comparing the first N characters\n\ -u, --unique only print unique lines\n\ + -z, --zero-terminated end lines with 0 byte, not newline\n\ "), stdout); fputs (_("\ -w, --check-chars=N compare no more than N characters in lines\n\ @@ -268,7 +270,7 @@ writeline (struct linebuffer const *line, If either is "-", use the standard I/O stream for it instead. */ static void -check_file (const char *infile, const char *outfile) +check_file (const char *infile, const char *outfile, char delimiter) { struct linebuffer lb1, lb2; struct linebuffer *thisline, *prevline; @@ -300,7 +302,7 @@ check_file (const char *infile, const char *outfile) { char *thisfield; size_t thislen; - if (readlinebuffer (thisline, stdin) == 0) + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) break; thisfield = find_field (thisline); thislen = thisline->length - 1 - (thisfield - thisline->buffer); @@ -323,7 +325,7 @@ check_file (const char *infile, const char *outfile) uintmax_t match_count = 0; bool first_delimiter = true; - if (readlinebuffer (prevline, stdin) == 0) + if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) goto closefiles; prevfield = find_field (prevline); prevlen = prevline->length - 1 - (prevfield - prevline->buffer); @@ -333,7 +335,7 @@ check_file (const char *infile, const char *outfile) bool match; char *thisfield; size_t thislen; - if (readlinebuffer (thisline, stdin) == 0) + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) { if (ferror (stdin)) goto closefiles; @@ -363,7 +365,7 @@ check_file (const char *infile, const char *outfile) if ((delimit_groups == DM_PREPEND) || (delimit_groups == DM_SEPARATE && !first_delimiter)) - putchar ('\n'); + putchar (delimiter); } } @@ -406,6 +408,7 @@ main (int argc, char **argv) enum Skip_field_option_type skip_field_option_type = SFO_NONE; int nfiles = 0; char const *file[2]; + char delimiter = '\n'; /* change with --zero-terminated, -z */ file[0] = file[1] = "-"; initialize_main (&argc, &argv); @@ -434,7 +437,7 @@ main (int argc, char **argv) if (optc == -1 || (posixly_correct && nfiles != 0) || ((optc = getopt_long (argc, argv, - "-0123456789Dcdf:is:uw:", longopts, NULL)) + "-0123456789Dcdf:is:uw:z", longopts, NULL)) == -1)) { if (argc <= optind) @@ -530,6 +533,10 @@ main (int argc, char **argv) N_("invalid number of bytes to compare")); break; + case 'z': + delimiter = '\0'; + break; + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); @@ -546,7 +553,7 @@ main (int argc, char **argv) usage (EXIT_FAILURE); } - check_file (file[0], file[1]); + check_file (file[0], file[1], delimiter); exit (EXIT_SUCCESS); } -- cgit v1.2.3-54-g00ecf