diff options
author | Pádraig Brady <P@draigBrady.com> | 2010-02-01 15:19:08 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2010-02-01 15:36:56 +0000 |
commit | f86bb6967dba1e2b6026997963a90e00cd641490 (patch) | |
tree | 0b4451e967924bfba4c42f9c819c0e33b1b53c95 | |
parent | 819aa9eba741c36bb522cbc2c7f10e24d190f945 (diff) | |
download | coreutils-f86bb6967dba1e2b6026997963a90e00cd641490.tar.xz |
join: make -t '' operate on the whole line
Previously passing an empty parameter to -t would
raise an error, but now it means to treat each line
as a single field for matching. This matches the
default operation of `sort` which is usually used
in conjunction with join.
* src/join.c (main): Set the field delimiter to '\n' if
an empty parameter is passed to -t.
(usage): Mention the operation of -t ''.
* tests/misc/join: Add 2 new tests, for the existing -t '\0'
and the new -t '' functionality.
* doc/coreutils.texi (join invocation): Mention that
join -t '' always operates on the whole line, while
join -t '\0' usually does.
* NEWS: Mention the change in behavior.
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | doc/coreutils.texi | 7 | ||||
-rw-r--r-- | src/join.c | 7 | ||||
-rwxr-xr-x | tests/misc/join | 5 |
4 files changed, 18 insertions, 4 deletions
@@ -22,6 +22,9 @@ GNU coreutils NEWS -*- outline -*- ls --color no longer emits the final 3-byte color-resetting escape sequence when it would be a no-op. + join -t '' no longer emits an error and instead operates on + each line as a whole (even if they contains NUL characters). + * Noteworthy changes in release 8.4 (2010-01-13) [stable] diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 2b3d32b94..e3e95f5b6 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -5462,6 +5462,8 @@ locales and options if the output of @command{sort} is fed to sort a file on its default join field, but if you select a non-default locale, join field, separator, or comparison options, then you should do so consistently between @command{join} and @command{sort}. +If @samp{join -t ''} is specified then the whole line is considered which +matches the default operation of sort. If the input has no unpairable lines, a @acronym{GNU} extension is available; the sort order can be any order that considers two fields @@ -5572,7 +5574,10 @@ option---are subject to the specified @var{field-list}. Use character @var{char} as the input and output field separator. Treat as significant each occurrence of @var{char} in the input file. Use @samp{sort -t @var{char}}, without the @option{-b} option of -@samp{sort}, to produce this ordering. +@samp{sort}, to produce this ordering. If @samp{join -t ''} is specified, +the whole line is considered, matching the default operation of sort. +If @samp{-t '\0'} is specified then the @acronym{ASCII} @sc{nul} +character is used to delimit the fields. @item -v @var{file-number} Print a line for each unpairable line in file @var{file-number} diff --git a/src/join.c b/src/join.c index 6030a01b3..b1f3310ec 100644 --- a/src/join.c +++ b/src/join.c @@ -212,7 +212,8 @@ the remaining fields from FILE1, the remaining fields from FILE2, all\n\ separated by CHAR.\n\ \n\ Important: FILE1 and FILE2 must be sorted on the join fields.\n\ -E.g., use `sort -k 1b,1' if `join' has no options.\n\ +E.g., use ` sort -k 1b,1 ' if `join' has no options,\n\ +or use ` join -t '' ' if `sort' has no options.\n\ Note, comparisons honor the rules specified by `LC_COLLATE'.\n\ If the input is not sorted and some lines cannot be joined, a\n\ warning message will be given.\n\ @@ -1040,8 +1041,8 @@ main (int argc, char **argv) { unsigned char newtab = optarg[0]; if (! newtab) - error (EXIT_FAILURE, 0, _("empty tab")); - if (optarg[1]) + newtab = '\n'; /* '' => process the whole line. */ + else if (optarg[1]) { if (STREQ (optarg, "\\0")) newtab = '\0'; diff --git a/tests/misc/join b/tests/misc/join index 4e7798fd9..cef813773 100755 --- a/tests/misc/join +++ b/tests/misc/join @@ -51,6 +51,11 @@ my @tv = ( ['3a', '-t:', ["a:1\nb:1\n", "a:2:\nb:2:\n"], "a:1:2:\nb:1:2:\n", 0], +# operate on whole line (as sort does by default) +['3b', '-t ""', ["a 1\nb 1\n", "a 1\nb 2\n"], "a 1\n", 0], +# use NUL as the field delimiter +['3c', '-t "\\0"', ["a\0a\n", "a\0b\n"], "a\0a\0b\n", 0], + # Just like -a1 and -a2 when there are no pairable lines ['4a', '-v 1', ["a 1\n", "b\n"], "a 1\n", 0], ['4b', '-v 2', ["a 1\n", "b\n"], "b\n", 0], |