From 03f6f32c416e4bca1cfe66fe85834a91636f499d Mon Sep 17 00:00:00 2001 From: Pádraig Brady Date: Tue, 11 May 2010 18:46:21 +0100 Subject: sort: --debug: output data independent warnings and info * src/sort.c (usage): Mention --debug can output warnings to stderr. Also split the translatable string to aid translation. (default_key_compare): A new function refactored from main(), and now also called from the new key_warnings() function. (key_to_opts): A new function refactored from incompatible_options(), and now also called from the new key_warnings() function. (key_numeric): A new function refactored to test if key is numeric. (key_warnings): A new function to output warnings to stderr, about questionable use of various options. Currently it warns about zero length keys and ineffective global options. (incompatible_options): Refactor out key_to_opts() (main): Use key_init() to initialize gkey. Refactor out default_key_compare(). Call key_warnings() in debug mode. * doc/coreutils.texi (sort invocation): Mention that warnings are output by --debug. * tests/misc/sort-debug-warn: A new test for debug warnings. * tests/Makefile.am: Reference the new test. * NEWS: Mention the new feature --- NEWS | 2 +- doc/coreutils.texi | 1 + src/sort.c | 243 +++++++++++++++++++++++++++++++++++---------- tests/Makefile.am | 1 + tests/misc/sort-debug-warn | 91 +++++++++++++++++ 5 files changed, 283 insertions(+), 55 deletions(-) create mode 100755 tests/misc/sort-debug-warn diff --git a/NEWS b/NEWS index 4c2da67f1..5d7b81f97 100644 --- a/NEWS +++ b/NEWS @@ -5,7 +5,7 @@ GNU coreutils NEWS -*- outline -*- ** New features sort now accepts the --debug option, to highlight the part of the - line significant in the sort. + line significant in the sort, and warn about questionable options. ** Changes in behavior diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 6714ada86..cd99bd0b7 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -3944,6 +3944,7 @@ of the line being used in the sort. @item --debug Highlight the portion of each line used for sorting. +Also issue warnings about questionable usage to stderr. @item --batch-size=@var{nmerge} @opindex --batch-size diff --git a/src/sort.c b/src/sort.c index 38c5ed2b2..e099640f3 100644 --- a/src/sort.c +++ b/src/sort.c @@ -186,6 +186,7 @@ struct keyfield bool month; /* Flag for comparison by month name. */ bool reverse; /* Reverse the sense of comparison. */ bool version; /* sort by version number */ + bool obsolete_used; /* obsolescent key option format is used. */ struct keyfield *next; /* Next keyfield to try. */ }; @@ -375,7 +376,10 @@ Other options:\n\ -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\ --compress-program=PROG compress temporaries with PROG;\n\ decompress them with PROG -d\n\ - --debug annotate the part of the line used to sort\n\ +"), stdout); + fputs (_("\ + --debug annotate the part of the line used to sort,\n\ + and warn about questionable usage to stderr\n\ --files0-from=F read input from the files specified by\n\ NUL-terminated names in file F;\n\ If F is - then read names from standard input\n\ @@ -2158,6 +2162,161 @@ debug_key (char const *sline, char const *sfield, char const *efield, mark_key (offset, width); } +/* Testing if a key is numeric is done in various places. */ + +static inline bool +key_numeric (struct keyfield const *key) +{ + return key->numeric || key->general_numeric || key->human_numeric; +} + +/* Return whether sorting options specified for key. */ + +static bool +default_key_compare (struct keyfield const *key) +{ + return ! (key->ignore + || key->translate + || key->skipsblanks + || key->skipeblanks + || key_numeric (key) + || key->month + || key->version + || key->random + /* || key->reverse */ + ); +} + +/* Convert a key to the short options used to specify it. */ + +static void +key_to_opts (struct keyfield const *key, char *opts) +{ + if (key->skipsblanks || key->skipeblanks) + *opts++ = 'b';/* either disables global -b */ + if (key->ignore == nondictionary) + *opts++ = 'd'; + if (key->translate) + *opts++ = 'f'; + if (key->general_numeric) + *opts++ = 'g'; + if (key->human_numeric) + *opts++ = 'h'; + if (key->ignore == nonprinting) + *opts++ = 'i'; + if (key->month) + *opts++ = 'M'; + if (key->numeric) + *opts++ = 'n'; + if (key->random) + *opts++ = 'R'; + if (key->reverse) + *opts++ = 'r'; + if (key->version) + *opts++ = 'V'; + *opts = '\0'; +} + +/* Output data independent key warnings to stderr. */ + +static void +key_warnings (struct keyfield const *gkey, bool gkey_only) +{ + struct keyfield const *key; + struct keyfield ugkey = *gkey; + unsigned long keynum = 1; + + for (key = keylist; key; key = key->next, keynum++) + { + if (key->obsolete_used) + { + /* obsolescent syntax +A.x -B.y is equivalent to: + -k A+1.x+1,B.y (when y = 0) + -k A+1.x+1,B+1.y (when y > 0) */ + char obuf[INT_BUFSIZE_BOUND (size_t) * 2 + 4]; /* +# -# */ + char nbuf[INT_BUFSIZE_BOUND (size_t) * 2 + 5]; /* -k #,# */ + + char *po = obuf; + char *pn = nbuf; + + size_t sword = key->sword; + size_t eword = key->eword; + if (sword == SIZE_MAX) + sword++; + + po += sprintf (po, "+%" PRIuMAX, (uintmax_t) sword); + pn += sprintf (pn, "-k %" PRIuMAX, (uintmax_t) sword + 1); + if (key->eword != SIZE_MAX) + { + po += sprintf (po, " -%" PRIuMAX, (uintmax_t) eword + 1); + pn += sprintf (pn, ",%" PRIuMAX, + (uintmax_t) eword + 1 + (key->echar == SIZE_MAX)); + } + error (0, 0, _("obsolescent key `%s' used; consider `%s' instead"), + obuf, nbuf); + } + + /* Warn about field specs that will never match. */ + if (key->sword != SIZE_MAX && key->eword < key->sword) + error (0, 0, _("key %lu has zero width and will be ignored"), keynum); + + /* Warn about significant leading blanks. */ + if (!gkey_only && tab == TAB_DEFAULT && !key->skipsblanks + && !key_numeric (key) && !key->month) + error (0, 0, _("leading blanks are significant in key %lu; " + "consider also specifying `b'"), keynum); + + /* Warn about numeric comparisons spanning fields, + as field delimiters could be interpreted as part + of the number (maybe only in other locales). */ + if (!gkey_only && key_numeric (key)) + { + size_t sword = key->sword + 1; + size_t eword = key->eword + 1; + if (!sword) + sword++; + if (sword != eword) + error (0, 0, _("key %lu is numeric and spans multiple fields"), + keynum); + } + + /* Flag global options not copied or specified in any key. */ + if (ugkey.ignore && (ugkey.ignore == key->ignore)) + ugkey.ignore = NULL; + if (ugkey.translate && (ugkey.translate == key->translate)) + ugkey.translate = NULL; + ugkey.skipsblanks &= !key->skipsblanks; + ugkey.skipeblanks &= !key->skipeblanks; + ugkey.month &= !key->month; + ugkey.numeric &= !key->numeric; + ugkey.general_numeric &= !key->general_numeric; + ugkey.human_numeric &= !key->human_numeric; + ugkey.random &= !key->random; + ugkey.version &= !key->version; + ugkey.reverse &= !key->reverse; + } + + /* Warn about ignored global options flagged above. + Note if gkey is the only one in the list, all flags are cleared. */ + if (!default_key_compare (&ugkey) + || (ugkey.reverse && (stable || unique) && keylist)) + { + bool ugkey_reverse = ugkey.reverse; + if (!(stable || unique)) + ugkey.reverse = false; + /* The following is too big, but guaranteed to be "big enough". */ + char opts[sizeof short_options]; + key_to_opts (&ugkey, opts); + error (0, 0, + ngettext ("option `-%s' is ignored", + "options `-%s' are ignored", + select_plural (strlen (opts))), opts); + ugkey.reverse = ugkey_reverse; + } + if (ugkey.reverse && !(stable || unique) && keylist) + error (0, 0, _("option `-r' only applies to last-resort comparison")); +} + /* Compare two lines A and B trying every key in sequence until there are no more keys or a difference is found. */ @@ -2193,7 +2352,7 @@ keycompare (const struct line *a, const struct line *b, bool show_debug) if (key->random) diff = compare_random (texta, lena, textb, lenb); - else if (key->numeric || key->general_numeric || key->human_numeric) + else if (key_numeric (key)) { char savea = *lima, saveb = *limb; char const* ea = lima; @@ -3235,35 +3394,18 @@ incompatible_options (char const *opts) static void check_ordering_compatibility (void) { - struct keyfield const *key; + struct keyfield *key; for (key = keylist; key; key = key->next) if ((1 < (key->random + key->numeric + key->general_numeric + key->month + key->version + !!key->ignore + key->human_numeric)) || (key->random && key->translate)) { - /* The following is too big, but guaranteed to be "big enough". */ + /* The following is too big, but guaranteed to be "big enough". */ char opts[sizeof short_options]; - char *p = opts; - if (key->ignore == nondictionary) - *p++ = 'd'; - if (key->translate) - *p++ = 'f'; - if (key->general_numeric) - *p++ = 'g'; - if (key->human_numeric) - *p++ = 'h'; - if (key->ignore == nonprinting) - *p++ = 'i'; - if (key->month) - *p++ = 'M'; - if (key->numeric) - *p++ = 'n'; - if (key->version) - *p++ = 'V'; - if (key->random) - *p++ = 'R'; - *p = '\0'; + /* Clear flags we're not interested in. */ + key->skipsblanks = key->skipeblanks = key->reverse = false; + key_to_opts (key, opts); incompatible_options (opts); } } @@ -3391,6 +3533,7 @@ main (int argc, char **argv) struct keyfield *key; struct keyfield key_buf; struct keyfield gkey; + bool gkey_only = false; char const *s; int c = 0; char checkonly = 0; @@ -3494,14 +3637,8 @@ main (int argc, char **argv) /* The signal mask is known, so it is safe to invoke exit_cleanup. */ atexit (exit_cleanup); - gkey.sword = gkey.eword = SIZE_MAX; - gkey.ignore = NULL; - gkey.translate = NULL; - gkey.numeric = gkey.general_numeric = gkey.human_numeric = false; - gkey.iec_present = -1; - gkey.random = gkey.version = false; - gkey.month = gkey.reverse = false; - gkey.skipsblanks = gkey.skipeblanks = false; + key_init (&gkey); + gkey.sword = SIZE_MAX; files = xnmalloc (argc, sizeof *files); @@ -3573,6 +3710,7 @@ main (int argc, char **argv) badfieldspec (optarg1, N_("stray character in field spec")); } + key->obsolete_used = true; insertkey (key); } } @@ -3836,17 +3974,7 @@ main (int argc, char **argv) /* Inheritance of global options to individual keys. */ for (key = keylist; key; key = key->next) { - if (! (key->ignore - || key->translate - || (key->skipsblanks - || key->reverse - || key->skipeblanks - || key->month - || key->numeric - || key->version - || key->general_numeric - || key->human_numeric - || key->random))) + if (default_key_compare (key) && !key->reverse) { key->ignore = gkey.ignore; key->translate = gkey.translate; @@ -3856,34 +3984,41 @@ main (int argc, char **argv) key->numeric = gkey.numeric; key->general_numeric = gkey.general_numeric; key->human_numeric = gkey.human_numeric; + key->version = gkey.version; key->random = gkey.random; key->reverse = gkey.reverse; - key->version = gkey.version; } need_random |= key->random; } - if (!keylist && (gkey.ignore - || gkey.translate - || (gkey.skipsblanks - || gkey.skipeblanks - || gkey.month - || gkey.numeric - || gkey.general_numeric - || gkey.human_numeric - || gkey.random - || gkey.version))) + if (!keylist && !default_key_compare (&gkey)) { + gkey_only = true; insertkey (&gkey); need_random |= gkey.random; } check_ordering_compatibility (); + /* Disable this combination so that users are less likely + to inadvertantly update a file with debugging enabled. + Also it simplifies the code for handling temp files. */ if (debug && outfile) error (SORT_FAILURE, 0, _("options -o and --debug are incompatible")); + if (debug) + { + /* Always output the locale in debug mode, since this + is such a common source of confusion. */ + if (hard_LC_COLLATE) + error (0, 0, _("using %s sorting rules"), + quote (setlocale (LC_COLLATE, NULL))); + else + error (0, 0, _("using simple byte comparison")); + key_warnings (&gkey, gkey_only); + } + reverse = gkey.reverse; if (need_random) diff --git a/tests/Makefile.am b/tests/Makefile.am index 46d388ada..a732c631f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -225,6 +225,7 @@ TESTS = \ misc/sort-compress \ misc/sort-continue \ misc/sort-debug-keys \ + misc/sort-debug-warn \ misc/sort-files0-from \ misc/sort-float \ misc/sort-merge \ diff --git a/tests/misc/sort-debug-warn b/tests/misc/sort-debug-warn new file mode 100755 index 000000000..3a7b01a0c --- /dev/null +++ b/tests/misc/sort-debug-warn @@ -0,0 +1,91 @@ +#!/bin/sh +# Test warnings for sort options + +# Copyright (C) 2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +if test "$VERBOSE" = yes; then + set -x + sort --version +fi + +. $srcdir/test-lib.sh + +cat <<\EOF > exp +sort: using simple byte comparison +sort: key 1 has zero width and will be ignored +sort: leading blanks are significant in key 1; consider also specifying `b' +sort: using simple byte comparison +sort: options `-bghMRrV' are ignored +sort: using simple byte comparison +sort: options `-bghMRV' are ignored +sort: option `-r' only applies to last-resort comparison +sort: using simple byte comparison +sort: option `-r' only applies to last-resort comparison +sort: using simple byte comparison +sort: leading blanks are significant in key 2; consider also specifying `b' +sort: options `-bg' are ignored +sort: using simple byte comparison +sort: using simple byte comparison +sort: option `-b' is ignored +sort: using simple byte comparison +sort: using simple byte comparison +sort: leading blanks are significant in key 1; consider also specifying `b' +sort: using simple byte comparison +sort: leading blanks are significant in key 1; consider also specifying `b' +sort: using simple byte comparison +sort: leading blanks are significant in key 1; consider also specifying `b' +sort: option `-d' is ignored +sort: using simple byte comparison +sort: leading blanks are significant in key 1; consider also specifying `b' +sort: option `-i' is ignored +sort: using simple byte comparison +sort: using simple byte comparison +sort: using simple byte comparison +EOF + +sort -s -k2,1 --debug /dev/null 2>>out +sort -s -rRVMhgb -k1,1n --debug /dev/null 2>>out +sort -rRVMhgb -k1,1n --debug /dev/null 2>>out +sort -r -k1,1n --debug /dev/null 2>>out +sort -gbr -k1,1n -k1,1r --debug /dev/null 2>>out +sort -b -k1b,1bn --debug /dev/null 2>>out # no warning +sort -b -k1,1bn --debug /dev/null 2>>out +sort -b -k1,1bn -k2b,2 --debug /dev/null 2>>out # no warning +sort -r -k1,1r --debug /dev/null 2>>out # no warning for redundant options +sort -i -k1,1i --debug /dev/null 2>>out # no warning +sort -d -k1,1b --debug /dev/null 2>>out +sort -i -k1,1d --debug /dev/null 2>>out +sort -r --debug /dev/null 2>>out #no warning +sort -rM --debug /dev/null 2>>out #no warning +sort -rM -k1,1 --debug /dev/null 2>>out #no warning + +compare exp out || fail=1 + +cat <<\EOF > exp +sort: using simple byte comparison +sort: key 1 is numeric and spans multiple fields +sort: obsolescent key `+2 -1' used; consider `-k 3,1' instead +sort: key 2 has zero width and will be ignored +sort: leading blanks are significant in key 2; consider also specifying `b' +sort: option `-b' is ignored +sort: option `-r' only applies to last-resort comparison +EOF + +sort --debug -rb -k2n +2 -1b /dev/null 2>out + +compare exp out || fail=1 + +Exit $fail -- cgit v1.2.3-70-g09d2