diff options
author | Pádraig Brady <P@draigBrady.com> | 2016-01-12 16:29:32 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2016-01-13 10:59:18 +0000 |
commit | b2eadd109c3a508011705761dfe24a35180d925d (patch) | |
tree | 7efae6e5d2ba65a74b2e22e1160f0dd6a4013ce6 /src | |
parent | d44ae88199ebf44fe721c06621a7ffc442fa34be (diff) | |
download | coreutils-b2eadd109c3a508011705761dfe24a35180d925d.tar.xz |
join,sort,uniq: with -z, treat '\n' as a field separator
* NEWS: Mention the change in behavior.
* doc/coreutils.texi (newlineFieldSeparator): A new description,
referenced from ({join,sort,uniq} invocation).
* src/system.h (field_sep): A new inline function to determine
if a character is a field separator.
* src/join.c (usage): s/whitespace/blank/ to be more accurate
wrt which characters are field separators.
(xfields): s/isblank/field_sep/.
* src/sort.c (inittables): Likewise.
* src/uniq.c (find_field): Likewise.
* tests/misc/join.pl: Adjust -z test, and add a test/example
for processing the whole record with field processing.
* tests/misc/sort.pl: Add -z test cases, including case with '\n'.
* tests/misc/uniq.pl: Add -z -f test case with \n.
Diffstat (limited to 'src')
-rw-r--r-- | src/join.c | 8 | ||||
-rw-r--r-- | src/sort.c | 4 | ||||
-rw-r--r-- | src/system.h | 7 | ||||
-rw-r--r-- | src/uniq.c | 4 |
4 files changed, 15 insertions, 8 deletions
diff --git a/src/join.c b/src/join.c index 8686428fb..9b25da667 100644 --- a/src/join.c +++ b/src/join.c @@ -194,7 +194,7 @@ Usage: %s [OPTION]... FILE1 FILE2\n\ program_name); fputs (_("\ For each pair of input lines with identical join fields, write a line to\n\ -standard output. The default join field is the first, delimited by whitespace.\ +standard output. The default join field is the first, delimited by blanks.\ \n\ "), stdout); fputs (_("\ @@ -284,19 +284,19 @@ xfields (struct line *line) else if (tab < 0) { /* Skip leading blanks before the first field. */ - while (isblank (to_uchar (*ptr))) + while (field_sep (*ptr)) if (++ptr == lim) return; do { char *sep; - for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++) + for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++) continue; extract_field (line, ptr, sep - ptr); if (sep == lim) return; - for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++) + for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++) continue; } while (ptr != lim); diff --git a/src/sort.c b/src/sort.c index aca3b4231..575877d22 100644 --- a/src/sort.c +++ b/src/sort.c @@ -1275,9 +1275,9 @@ inittables (void) for (i = 0; i < UCHAR_LIM; ++i) { - blanks[i] = !! isblank (i); + blanks[i] = field_sep (i); nonprinting[i] = ! isprint (i); - nondictionary[i] = ! isalnum (i) && ! isblank (i); + nondictionary[i] = ! isalnum (i) && ! field_sep (i); fold_toupper[i] = toupper (i); } diff --git a/src/system.h b/src/system.h index c1c4a18a3..9898bc79c 100644 --- a/src/system.h +++ b/src/system.h @@ -155,6 +155,13 @@ enum errors that the cast doesn't. */ static inline unsigned char to_uchar (char ch) { return ch; } +/* '\n' is considered a field separator with --zero-terminated. */ +static inline bool +field_sep (unsigned char ch) +{ + return isblank (ch) || ch == '\n'; +} + #include <locale.h> /* Take care of NLS matters. */ diff --git a/src/uniq.c b/src/uniq.c index 6f8cd4a70..0e118da9d 100644 --- a/src/uniq.c +++ b/src/uniq.c @@ -261,9 +261,9 @@ find_field (struct linebuffer const *line) for (count = 0; count < skip_fields && i < size; count++) { - while (i < size && isblank (to_uchar (lp[i]))) + while (i < size && field_sep (lp[i])) i++; - while (i < size && !isblank (to_uchar (lp[i]))) + while (i < size && !field_sep (lp[i])) i++; } |