From 59e2e55d0f154a388adc9bac37d2b45f2ba971f8 Mon Sep 17 00:00:00 2001 From: Pádraig Brady Date: Fri, 26 Feb 2010 15:33:16 +0000 Subject: sort: fix issues with month sorting in some locales MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * src/sort.c (char fold_toupper[]): Change to unsigned so as the correct comparisons are made in getmonth(). This fixes unibyte locales where abbreviated months have characters that are > 0x7F, but it also works for multibyte locales with the caveat that multibyte characters are matched case sensitively. With this change, the following example sorts correctly: $ echo -e "1 márta\n2 Feabhra" | LANG=ga_IE.utf8 sort -k2,2M 2 Feabhra 1 márta * src/sort.c (inittables): Since we ignore blanks around months in the input, don't include them when they're present in the locale. With this change, the following example sorts correctly: $ echo -e "1 2月\n2 1月" | LANG=ja_JP.utf8 sort -k2,2M 2 1月 1 2月 * tests/misc/sort-month: A new test to exercise the above cases. * tests/Makefile.am: Reference the new test. * NEWS: Mention the fix. --- NEWS | 5 +++++ src/sort.c | 11 ++++++----- tests/Makefile.am | 1 + tests/misc/sort-month | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 5 deletions(-) create mode 100755 tests/misc/sort-month diff --git a/NEWS b/NEWS index 595a8cf79..2a3ca63be 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,11 @@ GNU coreutils NEWS -*- outline -*- ls --color once again honors the 'NORMAL' dircolors directive. [bug introduced in coreutils-6.11] + sort -M now handles abbreviated months that are aligned using blanks + in the locale database. Also locales with 8 bit characters are + handled correctly, including multi byte locales with the caveat + that multi byte characters are matched case sensitively. + ** New features join now accepts the --header option, to treat the first line of each diff --git a/src/sort.c b/src/sort.c index 481fdb8bb..39cb6d6f2 100644 --- a/src/sort.c +++ b/src/sort.c @@ -209,7 +209,7 @@ static bool nonprinting[UCHAR_LIM]; static bool nondictionary[UCHAR_LIM]; /* Translation table folding lower case to upper. */ -static char fold_toupper[UCHAR_LIM]; +static unsigned char fold_toupper[UCHAR_LIM]; #define MONTHS_PER_YEAR 12 @@ -1129,7 +1129,7 @@ inittables (void) { char const *s; size_t s_len; - size_t j; + size_t j, k; char *name; s = (char *) nl_langinfo (ABMON_1 + i); @@ -1137,9 +1137,10 @@ inittables (void) monthtab[i].name = name = xmalloc (s_len + 1); monthtab[i].val = i + 1; - for (j = 0; j < s_len; j++) - name[j] = fold_toupper[to_uchar (s[j])]; - name[j] = '\0'; + for (j = k = 0; j < s_len; j++) + if (! isblank (to_uchar (s[j]))) + name[k++] = fold_toupper[to_uchar (s[j])]; + name[k] = '\0'; } qsort ((void *) monthtab, MONTHS_PER_YEAR, sizeof *monthtab, struct_month_cmp); diff --git a/tests/Makefile.am b/tests/Makefile.am index 68b3f71b7..db1610d6b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -225,6 +225,7 @@ TESTS = \ misc/sort-files0-from \ misc/sort-merge \ misc/sort-merge-fdlimit \ + misc/sort-month \ misc/sort-rand \ misc/sort-version \ misc/split-a \ diff --git a/tests/misc/sort-month b/tests/misc/sort-month new file mode 100755 index 000000000..aee5215d5 --- /dev/null +++ b/tests/misc/sort-month @@ -0,0 +1,38 @@ +#!/bin/sh +# Test sorting of abbreviated months from the locale + +# Copyright (C) 2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +if test "$VERBOSE" = yes; then + set -x + sort --version +fi + +. $srcdir/test-lib.sh + +locale --version >/dev/null 2>&1 || + skip_test_ 'The locale utility is not present' + +# C will be used if the locale is not present +for LOC in "$LOCALE_FR" "$LOCALE_FR_UTF8" "ja_JP.utf8"; do + mon="$(LC_ALL="$LOC" locale abmon 2>/dev/null);" + smon=$(LC_ALL="$LOC" locale abmon 2>/dev/null | + tr ';' '\n' | shuf | nl | LC_ALL="$LOC" sort -b -k2,2M | + cut -f2 | tr '\n' ';') + test "$mon" = "$smon" || { fail=1; break; } +done + +Exit $fail -- cgit v1.2.3-70-g09d2