summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2016-01-12 16:29:32 +0000
committerPádraig Brady <P@draigBrady.com>2016-01-13 10:59:18 +0000
commitb2eadd109c3a508011705761dfe24a35180d925d (patch)
tree7efae6e5d2ba65a74b2e22e1160f0dd6a4013ce6 /src
parentd44ae88199ebf44fe721c06621a7ffc442fa34be (diff)
downloadcoreutils-b2eadd109c3a508011705761dfe24a35180d925d.tar.xz
join,sort,uniq: with -z, treat '\n' as a field separator
* NEWS: Mention the change in behavior. * doc/coreutils.texi (newlineFieldSeparator): A new description, referenced from ({join,sort,uniq} invocation). * src/system.h (field_sep): A new inline function to determine if a character is a field separator. * src/join.c (usage): s/whitespace/blank/ to be more accurate wrt which characters are field separators. (xfields): s/isblank/field_sep/. * src/sort.c (inittables): Likewise. * src/uniq.c (find_field): Likewise. * tests/misc/join.pl: Adjust -z test, and add a test/example for processing the whole record with field processing. * tests/misc/sort.pl: Add -z test cases, including case with '\n'. * tests/misc/uniq.pl: Add -z -f test case with \n.
Diffstat (limited to 'src')
-rw-r--r--src/join.c8
-rw-r--r--src/sort.c4
-rw-r--r--src/system.h7
-rw-r--r--src/uniq.c4
4 files changed, 15 insertions, 8 deletions
diff --git a/src/join.c b/src/join.c
index 8686428fb..9b25da667 100644
--- a/src/join.c
+++ b/src/join.c
@@ -194,7 +194,7 @@ Usage: %s [OPTION]... FILE1 FILE2\n\
program_name);
fputs (_("\
For each pair of input lines with identical join fields, write a line to\n\
-standard output. The default join field is the first, delimited by whitespace.\
+standard output. The default join field is the first, delimited by blanks.\
\n\
"), stdout);
fputs (_("\
@@ -284,19 +284,19 @@ xfields (struct line *line)
else if (tab < 0)
{
/* Skip leading blanks before the first field. */
- while (isblank (to_uchar (*ptr)))
+ while (field_sep (*ptr))
if (++ptr == lim)
return;
do
{
char *sep;
- for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
+ for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++)
continue;
extract_field (line, ptr, sep - ptr);
if (sep == lim)
return;
- for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
+ for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++)
continue;
}
while (ptr != lim);
diff --git a/src/sort.c b/src/sort.c
index aca3b4231..575877d22 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -1275,9 +1275,9 @@ inittables (void)
for (i = 0; i < UCHAR_LIM; ++i)
{
- blanks[i] = !! isblank (i);
+ blanks[i] = field_sep (i);
nonprinting[i] = ! isprint (i);
- nondictionary[i] = ! isalnum (i) && ! isblank (i);
+ nondictionary[i] = ! isalnum (i) && ! field_sep (i);
fold_toupper[i] = toupper (i);
}
diff --git a/src/system.h b/src/system.h
index c1c4a18a3..9898bc79c 100644
--- a/src/system.h
+++ b/src/system.h
@@ -155,6 +155,13 @@ enum
errors that the cast doesn't. */
static inline unsigned char to_uchar (char ch) { return ch; }
+/* '\n' is considered a field separator with --zero-terminated. */
+static inline bool
+field_sep (unsigned char ch)
+{
+ return isblank (ch) || ch == '\n';
+}
+
#include <locale.h>
/* Take care of NLS matters. */
diff --git a/src/uniq.c b/src/uniq.c
index 6f8cd4a70..0e118da9d 100644
--- a/src/uniq.c
+++ b/src/uniq.c
@@ -261,9 +261,9 @@ find_field (struct linebuffer const *line)
for (count = 0; count < skip_fields && i < size; count++)
{
- while (i < size && isblank (to_uchar (lp[i])))
+ while (i < size && field_sep (lp[i]))
i++;
- while (i < size && !isblank (to_uchar (lp[i])))
+ while (i < size && !field_sep (lp[i]))
i++;
}