diff options
author | Pádraig Brady <P@draigBrady.com> | 2016-01-12 16:29:32 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2016-01-13 10:59:18 +0000 |
commit | b2eadd109c3a508011705761dfe24a35180d925d (patch) | |
tree | 7efae6e5d2ba65a74b2e22e1160f0dd6a4013ce6 /tests/misc | |
parent | d44ae88199ebf44fe721c06621a7ffc442fa34be (diff) | |
download | coreutils-b2eadd109c3a508011705761dfe24a35180d925d.tar.xz |
join,sort,uniq: with -z, treat '\n' as a field separator
* NEWS: Mention the change in behavior.
* doc/coreutils.texi (newlineFieldSeparator): A new description,
referenced from ({join,sort,uniq} invocation).
* src/system.h (field_sep): A new inline function to determine
if a character is a field separator.
* src/join.c (usage): s/whitespace/blank/ to be more accurate
wrt which characters are field separators.
(xfields): s/isblank/field_sep/.
* src/sort.c (inittables): Likewise.
* src/uniq.c (find_field): Likewise.
* tests/misc/join.pl: Adjust -z test, and add a test/example
for processing the whole record with field processing.
* tests/misc/sort.pl: Add -z test cases, including case with '\n'.
* tests/misc/uniq.pl: Add -z -f test case with \n.
Diffstat (limited to 'tests/misc')
-rwxr-xr-x | tests/misc/join.pl | 9 | ||||
-rwxr-xr-x | tests/misc/sort.pl | 5 | ||||
-rwxr-xr-x | tests/misc/uniq.pl | 1 |
3 files changed, 12 insertions, 3 deletions
diff --git a/tests/misc/join.pl b/tests/misc/join.pl index 2a40f0095..4d399d8ae 100755 --- a/tests/misc/join.pl +++ b/tests/misc/join.pl @@ -290,10 +290,13 @@ my @tv = ( # missing last NUL at the end of the last line (=end of file) ['z4', '-z', ["a\0c\0e", "a\0b\0c"], "a\0c\0", 0], -# edge-case: the embedded newlines should treated as -# part of the nul-terminated line +# With -z, embedded newlines are treated as field separators. +# Note '\n' are converted to ' ' in this case. ['z5', '-z -a1 -a2', - ["a\n1\0c 3\0","b\n8\0c 9\0"], "a\n1\0b\n8\0c 3 9\0"], + ["a\n\n1\0c 3\0", "a 2\0b\n8\0c 9\0"], "a 1 2\0b 8\0c 3 9\0"], +# One can avoid field processing like: +['z6', '-z -t ""', + ["a\n1\n\0", "a\n1\n\0"], "a\n1\n\0"], ); diff --git a/tests/misc/sort.pl b/tests/misc/sort.pl index c9bcce194..c3e7f8e48 100755 --- a/tests/misc/sort.pl +++ b/tests/misc/sort.pl @@ -406,6 +406,11 @@ my @Tests = ["output-is-input-3", '-m -o f', {OUT=>''}, {IN=> {g=> "a\n"}}, {IN=> {h=> "b\n"}}, {IN=> {f=> "c\n"}}, {CMP=> ["a\nb\nc\n", {'f'=> undef}]} ], + +# --zero-terminated +['zero-1', '-z', {IN=>"2\0001\000"}, {OUT=>"1\0002\000"}], +['zero-2', '-z -k2,2', {IN=>"1\n2\0002\n1\000"}, {OUT=>"2\n1\0001\n2\000"}], +['zero-3', '-zb -k2,2', {IN=>"1\n\n2\0002\n1\0"}, {OUT=>"2\n1\0001\n\n2\0"}], ); # Add _POSIX2_VERSION=199209 to the environment of each test diff --git a/tests/misc/uniq.pl b/tests/misc/uniq.pl index 2bc06b9d6..f028036be 100755 --- a/tests/misc/uniq.pl +++ b/tests/misc/uniq.pl @@ -95,6 +95,7 @@ my @Tests = ['3z', '-z', {IN=>"a\na"}, {OUT=>"a\na\0"}], ['4z', '-z', {IN=>"a\nb"}, {OUT=>"a\nb\0"}], ['5z', '-z', {IN=>"a\na\nb"}, {OUT=>"a\na\nb\0"}], + ['10z', '-z -f1', {IN=>"a\nb\n\0c\nb\n\0"}, {OUT=>"a\nb\n\0"}], ['20z', '-dz', {IN=>"a\na\n"}, {OUT=>""}], # Make sure that eight bit characters work |