diff options
author | Assaf Gordon <assafgordon@gmail.com> | 2013-02-14 15:29:08 -0500 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2013-02-28 01:49:56 +0000 |
commit | 6eb51ce29e962bc13586261bd0e1de4780c269ff (patch) | |
tree | 737687e0f2bb181a1eff21700ffbe848be1f1da7 | |
parent | 551128ebd71d1216ccc807eccee91190d6b21462 (diff) | |
download | coreutils-6eb51ce29e962bc13586261bd0e1de4780c269ff.tar.xz |
join: Add the -z, --zero-terminated option
* NEWS: Mention join's new option: --zero-terminated (-z).
* src/join.c: Add new option, --zero-terminated (-z), to make
join use the NUL byte as separator/delimiter rather than newline.
(get_line): Use readlinebuffer_delim in place of readlinebuffer.
(main): Handle the new option.
(usage): Describe new option the same way sort does.
* doc/coreutils.texi (join invocation): Describe the new option.
* tests/misc/join.pl: add tests for -z option.
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | doc/coreutils.texi | 2 | ||||
-rw-r--r-- | src/join.c | 19 | ||||
-rwxr-xr-x | tests/misc/join.pl | 20 |
4 files changed, 43 insertions, 4 deletions
@@ -9,6 +9,12 @@ GNU coreutils NEWS -*- outline -*- permissions. [This bug was present in "the beginning".] +** New features + + join accepts a new option: --zero-terminated (-z). As with the sort,uniq + option of the same name, this makes join consume and produce NUL-terminated + lines rather than newline-terminated lines. + * Noteworthy changes in release 8.21 (2013-02-14) [stable] diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 2c16dc48f..19ef4651c 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -6181,6 +6181,8 @@ character is used to delimit the fields. Print a line for each unpairable line in file @var{file-number} (either @samp{1} or @samp{2}), instead of the normal output. +@zeroTerminatedOption + @end table @exitstatus diff --git a/src/join.c b/src/join.c index 11e647cfd..1da618dc1 100644 --- a/src/join.c +++ b/src/join.c @@ -161,6 +161,7 @@ static struct option const longopts[] = {"ignore-case", no_argument, NULL, 'i'}, {"check-order", no_argument, NULL, CHECK_ORDER_OPTION}, {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION}, + {"zero-terminated", no_argument, NULL, 'z'}, {"header", no_argument, NULL, HEADER_LINE_OPTION}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, @@ -177,6 +178,9 @@ static bool ignore_case; join them without checking for ordering */ static bool join_header_lines; +/* The character marking end of line. Default to \n. */ +static char eolchar = '\n'; + void usage (int status) { @@ -213,6 +217,9 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\ --header treat the first line in each file as field headers,\n\ print them without trying to pair them\n\ "), stdout); + fputs (_("\ + -z, --zero-terminated end lines with 0 byte, not newline\n\ +"), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); fputs (_("\ @@ -445,7 +452,7 @@ get_line (FILE *fp, struct line **linep, int which) else line = init_linep (linep); - if (! readlinebuffer (&line->buf, fp)) + if (! readlinebuffer_delim (&line->buf, fp, eolchar)) { if (ferror (fp)) error (EXIT_FAILURE, errno, _("read error")); @@ -614,7 +621,7 @@ prjoin (struct line const *line1, struct line const *line2) break; putchar (output_separator); } - putchar ('\n'); + putchar (eolchar); } else { @@ -636,7 +643,7 @@ prjoin (struct line const *line1, struct line const *line2) prfields (line1, join_field_1, autocount_1); prfields (line2, join_field_2, autocount_2); - putchar ('\n'); + putchar (eolchar); } } @@ -1017,7 +1024,7 @@ main (int argc, char **argv) issued_disorder_warning[0] = issued_disorder_warning[1] = false; check_input_order = CHECK_ORDER_DEFAULT; - while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:", + while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z", longopts, NULL)) != -1) { @@ -1107,6 +1114,10 @@ main (int argc, char **argv) } break; + case 'z': + eolchar = 0; + break; + case NOCHECK_ORDER_OPTION: check_input_order = CHECK_ORDER_DISABLED; break; diff --git a/tests/misc/join.pl b/tests/misc/join.pl index 9b93794d9..7e06f1e6c 100755 --- a/tests/misc/join.pl +++ b/tests/misc/join.pl @@ -275,6 +275,26 @@ my @tv = ( [ "ID1 Name\n1 A\n", ""], "ID1 Name\n1 A\n", 0], +# Zero-terminated lines +['z1', '-z', + ["a\0c\0e\0", "a\0b\0c\0"], "a\0c\0", 0], + +# not zero-terminated, but related to the code change: +# the old readlinebuffer() auto-added '\n' to the last line. +# the new readlinebuffer_delim() does not. +# Ensure it doesn't matter. +['z2', '', + ["a\nc\ne\n", "a\nb\nc"], "a\nc\n", 0], +['z3', '', + ["a\nc\ne", "a\nb\nc"], "a\nc\n", 0], +# missing last NUL at the end of the last line (=end of file) +['z4', '-z', + ["a\0c\0e", "a\0b\0c"], "a\0c\0", 0], +# edge-case: the embedded newlines should treated as +# part of the nul-terminated line +['z5', '-z -a1 -a2', + ["a\n1\0c 3\0","b\n8\0c 9\0"], "a\n1\0b\n8\0c 3 9\0"], + ); # Convert the above old-style test vectors to the newer |