summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS6
-rw-r--r--doc/coreutils.texi2
-rw-r--r--src/join.c19
-rwxr-xr-xtests/misc/join.pl20
4 files changed, 43 insertions, 4 deletions
diff --git a/NEWS b/NEWS
index 5a253771a..8785bb333 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,12 @@ GNU coreutils NEWS -*- outline -*-
permissions.
[This bug was present in "the beginning".]
+** New features
+
+ join accepts a new option: --zero-terminated (-z). As with the sort,uniq
+ option of the same name, this makes join consume and produce NUL-terminated
+ lines rather than newline-terminated lines.
+
* Noteworthy changes in release 8.21 (2013-02-14) [stable]
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 2c16dc48f..19ef4651c 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -6181,6 +6181,8 @@ character is used to delimit the fields.
Print a line for each unpairable line in file @var{file-number}
(either @samp{1} or @samp{2}), instead of the normal output.
+@zeroTerminatedOption
+
@end table
@exitstatus
diff --git a/src/join.c b/src/join.c
index 11e647cfd..1da618dc1 100644
--- a/src/join.c
+++ b/src/join.c
@@ -161,6 +161,7 @@ static struct option const longopts[] =
{"ignore-case", no_argument, NULL, 'i'},
{"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
{"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
+ {"zero-terminated", no_argument, NULL, 'z'},
{"header", no_argument, NULL, HEADER_LINE_OPTION},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
@@ -177,6 +178,9 @@ static bool ignore_case;
join them without checking for ordering */
static bool join_header_lines;
+/* The character marking end of line. Default to \n. */
+static char eolchar = '\n';
+
void
usage (int status)
{
@@ -213,6 +217,9 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
--header treat the first line in each file as field headers,\n\
print them without trying to pair them\n\
"), stdout);
+ fputs (_("\
+ -z, --zero-terminated end lines with 0 byte, not newline\n\
+"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
fputs (_("\
@@ -445,7 +452,7 @@ get_line (FILE *fp, struct line **linep, int which)
else
line = init_linep (linep);
- if (! readlinebuffer (&line->buf, fp))
+ if (! readlinebuffer_delim (&line->buf, fp, eolchar))
{
if (ferror (fp))
error (EXIT_FAILURE, errno, _("read error"));
@@ -614,7 +621,7 @@ prjoin (struct line const *line1, struct line const *line2)
break;
putchar (output_separator);
}
- putchar ('\n');
+ putchar (eolchar);
}
else
{
@@ -636,7 +643,7 @@ prjoin (struct line const *line1, struct line const *line2)
prfields (line1, join_field_1, autocount_1);
prfields (line2, join_field_2, autocount_2);
- putchar ('\n');
+ putchar (eolchar);
}
}
@@ -1017,7 +1024,7 @@ main (int argc, char **argv)
issued_disorder_warning[0] = issued_disorder_warning[1] = false;
check_input_order = CHECK_ORDER_DEFAULT;
- while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
+ while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z",
longopts, NULL))
!= -1)
{
@@ -1107,6 +1114,10 @@ main (int argc, char **argv)
}
break;
+ case 'z':
+ eolchar = 0;
+ break;
+
case NOCHECK_ORDER_OPTION:
check_input_order = CHECK_ORDER_DISABLED;
break;
diff --git a/tests/misc/join.pl b/tests/misc/join.pl
index 9b93794d9..7e06f1e6c 100755
--- a/tests/misc/join.pl
+++ b/tests/misc/join.pl
@@ -275,6 +275,26 @@ my @tv = (
[ "ID1 Name\n1 A\n", ""],
"ID1 Name\n1 A\n", 0],
+# Zero-terminated lines
+['z1', '-z',
+ ["a\0c\0e\0", "a\0b\0c\0"], "a\0c\0", 0],
+
+# not zero-terminated, but related to the code change:
+# the old readlinebuffer() auto-added '\n' to the last line.
+# the new readlinebuffer_delim() does not.
+# Ensure it doesn't matter.
+['z2', '',
+ ["a\nc\ne\n", "a\nb\nc"], "a\nc\n", 0],
+['z3', '',
+ ["a\nc\ne", "a\nb\nc"], "a\nc\n", 0],
+# missing last NUL at the end of the last line (=end of file)
+['z4', '-z',
+ ["a\0c\0e", "a\0b\0c"], "a\0c\0", 0],
+# edge-case: the embedded newlines should treated as
+# part of the nul-terminated line
+['z5', '-z -a1 -a2',
+ ["a\n1\0c 3\0","b\n8\0c 9\0"], "a\n1\0b\n8\0c 3 9\0"],
+
);
# Convert the above old-style test vectors to the newer