From 0f9fc7b3b9aee1d001ca60de1125e6e714e70ac7 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Mon, 18 Jul 2016 19:04:45 +0200 Subject: sort: with -h, disallow thousands separator between number and unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * src/sort.c (traverse_raw_number): Accept thousands separator only if it is immediately followed by a digit. * tests/misc/sort-h-thousands-sep.sh: Cover the fix for this bug. Suggested by Pádraig Brady in http://bugs.gnu.org/24015 --- src/sort.c | 11 ++++++++++- tests/misc/sort-h-thousands-sep.sh | 25 +++++++++++++------------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/sort.c b/src/sort.c index 038f6aee3..079547595 100644 --- a/src/sort.c +++ b/src/sort.c @@ -1895,6 +1895,7 @@ traverse_raw_number (char const **number) char const *p = *number; unsigned char ch; unsigned char max_digit = '\0'; + bool ends_with_thousands_sep = false; /* Scan to end of number. Decimals or separators not followed by digits stop the scan. @@ -1910,10 +1911,18 @@ traverse_raw_number (char const **number) /* Allow to skip only one occurrence of thousands_sep to avoid finding the unit in the next column in case thousands_sep matches as blank and is used as column delimiter. */ - if (*p == thousands_sep) + ends_with_thousands_sep = (*p == thousands_sep); + if (ends_with_thousands_sep) ++p; } + if (ends_with_thousands_sep) + { + /* thousands_sep not followed by digit is not allowed. */ + *number = p - 2; + return max_digit; + } + if (ch == decimal_point) while (ISDIGIT (ch = *p++)) if (max_digit < ch) diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh index 17f1b6c98..3ffa89eeb 100755 --- a/tests/misc/sort-h-thousands-sep.sh +++ b/tests/misc/sort-h-thousands-sep.sh @@ -18,28 +18,29 @@ . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src print_ver_ sort + test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \ || skip_ 'The Swedish locale with blank thousands separator is unavailable.' -tee exp1 > in << _EOF_ -1 1k 4 003 1M -2k 2M 4 002 2 -3M 3 4 001 3k +tee exp1 exp3 > in << _EOF_ +1 1k 1 M 4 003 1M +2k 2M 2 k 4 002 2 +3M 3 3 G 4 001 3k _EOF_ cat > exp2 << _EOF_ -3M 3 4 001 3k -1 1k 4 003 1M -2k 2M 4 002 2 +3M 3 3 G 4 001 3k +1 1k 1 M 4 003 1M +2k 2M 2 k 4 002 2 _EOF_ -cat > exp3 << _EOF_ -3M 3 4 001 3k -2k 2M 4 002 2 -1 1k 4 003 1M +cat > exp5 << _EOF_ +3M 3 3 G 4 001 3k +2k 2M 2 k 4 002 2 +1 1k 1 M 4 003 1M _EOF_ -for i in 1 2 3; do +for i in 1 2 3 5; do LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1 compare "exp${i}" "out${i}" || fail=1 done -- cgit v1.2.3-70-g09d2