summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Meyering <meyering@redhat.com>2008-04-26 09:28:48 +0200
committerJim Meyering <meyering@redhat.com>2008-05-06 08:21:28 +0200
commit209850fd7e1e89cf8937310878bd22d70e3588a5 (patch)
tree4f124f7243ad25afb4ca6a2f2fab081a927a3016
parent4d3bf408b9a308a4702ac93546446927c12cc101 (diff)
downloadcoreutils-209850fd7e1e89cf8937310878bd22d70e3588a5.tar.xz
avoid problems with sign-extended "char" operand to is* functions
* src/cut.c (set_fields): Apply to_uchar to isblank operands. * src/uniq.c (find_field): Likewise. * src/seq.c (scan_arg): Likewise, for isspace. * tests/misc/uniq: New file. Test for the above, but only when isspace(0240). * tests/Makefile.am (TESTS): Add misc/uniq. * configure.ac: Use gt_LOCALE_FR. * tests/check.mk (TESTS_ENVIRONMENT): Propagate LOCALE_FR to scripts. * NEWS: Mention the bug fixes. Before this patch, on FreeBSD 6: $ printf 'x y z\nx \xa0 y z\n' > in $ LC_ALL=fr_FR.UTF-8 uniq -f2 in|tr ' ' . x.y.z x. .y.z With the patch: $ LC_ALL=fr_FR.UTF-8 uniq -f2 in|tr ' ' . x.y.z This also affected many other locales: for i in $(locale -a); do test $(LC_ALL=$i ./uniq -f1 in|wc -l) = $(LC_ALL=$i uniq -f1 in|wc -l) || echo $i ; done ... en_GB.ISO8859-1 en_GB.ISO8859-15 en_GB.UTF-8 en_IE.UTF-8 en_NZ.ISO8859-1 en_NZ.ISO8859-15 en_NZ.UTF-8 en_US.ISO8859-1 en_US.ISO8859-15 en_US.UTF-8 ...
-rw-r--r--NEWS5
-rw-r--r--configure.ac3
-rw-r--r--src/cut.c5
-rw-r--r--src/seq.c2
-rw-r--r--src/uniq.c6
-rw-r--r--tests/Makefile.am1
-rw-r--r--tests/check.mk1
-rwxr-xr-xtests/misc/uniq58
8 files changed, 75 insertions, 6 deletions
diff --git a/NEWS b/NEWS
index d7ec1b2f1..8cee7f5fa 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,11 @@ GNU coreutils NEWS -*- outline -*-
Printing of such large-numbered, kernel-only (not in /etc/group) group-IDs
was suppressed in 6.11 due to ignorance that they are useful.
+ uniq: avoid subtle field-skipping malfunction due to isblank misuse.
+ In some locales on some systems, isblank(240) (aka &nbsp) is nonzero.
+ On such systems, uniq --skip-fields=N would fail to skip the proper
+ number of fields for some inputs.
+
tac: avoid segfault with --regex (-r) and multiple files, e.g.,
"echo > x; tac -r x x". [bug present at least in textutils-1.8b, from 1992]
diff --git a/configure.ac b/configure.ac
index 6a7c5a8df..7e5546a45 100644
--- a/configure.ac
+++ b/configure.ac
@@ -335,6 +335,9 @@ AC_SUBST([CONFIG_STATUS_DEPENDENCIES])
AM_GNU_GETTEXT([external], [need-formatstring-macros])
AM_GNU_GETTEXT_VERSION([0.15])
+# For a test of uniq: it uses the $LOCALE_FR envvar.
+gt_LOCALE_FR
+
AC_CONFIG_FILES(
Makefile
doc/Makefile
diff --git a/src/cut.c b/src/cut.c
index e0ce42a6b..f6254e45b 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -1,5 +1,5 @@
/* cut - remove parts of lines of files
- Copyright (C) 1997-2007 Free Software Foundation, Inc.
+ Copyright (C) 1997-2008 Free Software Foundation, Inc.
Copyright (C) 1984 David M. Ihnat
This program is free software: you can redistribute it and/or modify
@@ -372,7 +372,8 @@ set_fields (const char *fieldstr)
initial = (lhs_specified ? value : 1);
value = 0;
}
- else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
+ else if (*fieldstr == ',' ||
+ isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
{
in_digits = false;
/* Ending the string, or this field/byte sublist. */
diff --git a/src/seq.c b/src/seq.c
index 7fc89ad5d..efc0c7243 100644
--- a/src/seq.c
+++ b/src/seq.c
@@ -145,7 +145,7 @@ scan_arg (const char *arg)
}
/* We don't output spaces or '+' so don't include in width */
- while (isspace (*arg) || *arg == '+')
+ while (isspace (to_uchar (*arg)) || *arg == '+')
arg++;
ret.width = strlen (arg);
diff --git a/src/uniq.c b/src/uniq.c
index 1a54055b3..b167bbb08 100644
--- a/src/uniq.c
+++ b/src/uniq.c
@@ -1,5 +1,5 @@
/* uniq -- remove duplicate lines from a sorted file
- Copyright (C) 86, 91, 1995-2007 Free Software Foundation, Inc.
+ Copyright (C) 86, 91, 1995-2008 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -213,9 +213,9 @@ find_field (const struct linebuffer *line)
for (count = 0; count < skip_fields && i < size; count++)
{
- while (i < size && isblank (lp[i]))
+ while (i < size && isblank (to_uchar (lp[i])))
i++;
- while (i < size && !isblank (lp[i]))
+ while (i < size && !isblank (to_uchar (lp[i])))
i++;
}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 343d71948..515cfe6fe 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -211,6 +211,7 @@ TESTS = \
misc/tsort \
misc/tty-eof \
misc/unexpand \
+ misc/uniq \
chmod/c-option \
chmod/equal-x \
chmod/equals \
diff --git a/tests/check.mk b/tests/check.mk
index f62661ca0..abe03afcf 100644
--- a/tests/check.mk
+++ b/tests/check.mk
@@ -45,6 +45,7 @@ built_programs = \
# Append this, because automake does the same.
TESTS_ENVIRONMENT = \
+ LOCALE_FR='$(LOCALE_FR)' \
abs_top_builddir='$(abs_top_builddir)' \
abs_top_srcdir='$(abs_top_srcdir)' \
built_programs="`$(built_programs)`" \
diff --git a/tests/misc/uniq b/tests/misc/uniq
new file mode 100755
index 000000000..6587844ce
--- /dev/null
+++ b/tests/misc/uniq
@@ -0,0 +1,58 @@
+#!/bin/sh
+# Test for a subtle, system-and-locale-dependent bug in uniq.
+
+# Copyright (C) 2008 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+case $LOCALE_FR in
+''|none) echo "$0: skipping this test -- no appropriate locale" 1>&2; exit 77;;
+esac
+
+: ${srcdir=.}
+. $top_srcdir/tests/require-perl
+
+me=`echo $0|sed 's,.*/,,'`
+exec $PERL -w -I$top_srcdir/tests -MCoreutils -M"CuTmpdir qw($me)" -- - <<\EOF
+require 5.003;
+use strict;
+
+my $prog = 'uniq';
+
+# Turn off localization of executable's output.
+@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+# I've only ever triggered the problem in a non-C locale.
+my $locale = $ENV{LOCALE_FR};
+
+# See if isblank returns true for nbsp.
+my $x = `env printf '\xa0'| LC_ALL=$locale tr '[:blank:]' x`;
+# If so, expect just one line of output in the schar test.
+# Otherwise, expect two.
+my $in = " y z\n\xa0 y z\n";
+my $schar_exp = $x eq 'x' ? " y z\n" : $in;
+
+my @Tests =
+ (
+ ['schar', '-f1', {IN => $in}, {OUT => $schar_exp},
+ {ENV => "LC_ALL=$locale"},
+ ],
+ );
+
+my $save_temps = $ENV{DEBUG};
+my $verbose = $ENV{VERBOSE};
+
+my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose);
+exit $fail;
+EOF