summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2010-05-11 18:46:21 +0100
committerPádraig Brady <P@draigBrady.com>2010-05-16 01:08:33 +0100
commit03f6f32c416e4bca1cfe66fe85834a91636f499d (patch)
treeb7a5b44a0197e5b1b847d5771dad823d399cb782
parent144d6e5f53b59cec504b1194b4f420144896f9ae (diff)
downloadcoreutils-03f6f32c416e4bca1cfe66fe85834a91636f499d.tar.xz
sort: --debug: output data independent warnings and info
* src/sort.c (usage): Mention --debug can output warnings to stderr. Also split the translatable string to aid translation. (default_key_compare): A new function refactored from main(), and now also called from the new key_warnings() function. (key_to_opts): A new function refactored from incompatible_options(), and now also called from the new key_warnings() function. (key_numeric): A new function refactored to test if key is numeric. (key_warnings): A new function to output warnings to stderr, about questionable use of various options. Currently it warns about zero length keys and ineffective global options. (incompatible_options): Refactor out key_to_opts() (main): Use key_init() to initialize gkey. Refactor out default_key_compare(). Call key_warnings() in debug mode. * doc/coreutils.texi (sort invocation): Mention that warnings are output by --debug. * tests/misc/sort-debug-warn: A new test for debug warnings. * tests/Makefile.am: Reference the new test. * NEWS: Mention the new feature
-rw-r--r--NEWS2
-rw-r--r--doc/coreutils.texi1
-rw-r--r--src/sort.c243
-rw-r--r--tests/Makefile.am1
-rwxr-xr-xtests/misc/sort-debug-warn91
5 files changed, 283 insertions, 55 deletions
diff --git a/NEWS b/NEWS
index 4c2da67f1..5d7b81f97 100644
--- a/NEWS
+++ b/NEWS
@@ -5,7 +5,7 @@ GNU coreutils NEWS -*- outline -*-
** New features
sort now accepts the --debug option, to highlight the part of the
- line significant in the sort.
+ line significant in the sort, and warn about questionable options.
** Changes in behavior
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 6714ada86..cd99bd0b7 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3944,6 +3944,7 @@ of the line being used in the sort.
@item --debug
Highlight the portion of each line used for sorting.
+Also issue warnings about questionable usage to stderr.
@item --batch-size=@var{nmerge}
@opindex --batch-size
diff --git a/src/sort.c b/src/sort.c
index 38c5ed2b2..e099640f3 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -186,6 +186,7 @@ struct keyfield
bool month; /* Flag for comparison by month name. */
bool reverse; /* Reverse the sense of comparison. */
bool version; /* sort by version number */
+ bool obsolete_used; /* obsolescent key option format is used. */
struct keyfield *next; /* Next keyfield to try. */
};
@@ -375,7 +376,10 @@ Other options:\n\
-C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
--compress-program=PROG compress temporaries with PROG;\n\
decompress them with PROG -d\n\
- --debug annotate the part of the line used to sort\n\
+"), stdout);
+ fputs (_("\
+ --debug annotate the part of the line used to sort,\n\
+ and warn about questionable usage to stderr\n\
--files0-from=F read input from the files specified by\n\
NUL-terminated names in file F;\n\
If F is - then read names from standard input\n\
@@ -2158,6 +2162,161 @@ debug_key (char const *sline, char const *sfield, char const *efield,
mark_key (offset, width);
}
+/* Testing if a key is numeric is done in various places. */
+
+static inline bool
+key_numeric (struct keyfield const *key)
+{
+ return key->numeric || key->general_numeric || key->human_numeric;
+}
+
+/* Return whether sorting options specified for key. */
+
+static bool
+default_key_compare (struct keyfield const *key)
+{
+ return ! (key->ignore
+ || key->translate
+ || key->skipsblanks
+ || key->skipeblanks
+ || key_numeric (key)
+ || key->month
+ || key->version
+ || key->random
+ /* || key->reverse */
+ );
+}
+
+/* Convert a key to the short options used to specify it. */
+
+static void
+key_to_opts (struct keyfield const *key, char *opts)
+{
+ if (key->skipsblanks || key->skipeblanks)
+ *opts++ = 'b';/* either disables global -b */
+ if (key->ignore == nondictionary)
+ *opts++ = 'd';
+ if (key->translate)
+ *opts++ = 'f';
+ if (key->general_numeric)
+ *opts++ = 'g';
+ if (key->human_numeric)
+ *opts++ = 'h';
+ if (key->ignore == nonprinting)
+ *opts++ = 'i';
+ if (key->month)
+ *opts++ = 'M';
+ if (key->numeric)
+ *opts++ = 'n';
+ if (key->random)
+ *opts++ = 'R';
+ if (key->reverse)
+ *opts++ = 'r';
+ if (key->version)
+ *opts++ = 'V';
+ *opts = '\0';
+}
+
+/* Output data independent key warnings to stderr. */
+
+static void
+key_warnings (struct keyfield const *gkey, bool gkey_only)
+{
+ struct keyfield const *key;
+ struct keyfield ugkey = *gkey;
+ unsigned long keynum = 1;
+
+ for (key = keylist; key; key = key->next, keynum++)
+ {
+ if (key->obsolete_used)
+ {
+ /* obsolescent syntax +A.x -B.y is equivalent to:
+ -k A+1.x+1,B.y (when y = 0)
+ -k A+1.x+1,B+1.y (when y > 0) */
+ char obuf[INT_BUFSIZE_BOUND (size_t) * 2 + 4]; /* +# -# */
+ char nbuf[INT_BUFSIZE_BOUND (size_t) * 2 + 5]; /* -k #,# */
+
+ char *po = obuf;
+ char *pn = nbuf;
+
+ size_t sword = key->sword;
+ size_t eword = key->eword;
+ if (sword == SIZE_MAX)
+ sword++;
+
+ po += sprintf (po, "+%" PRIuMAX, (uintmax_t) sword);
+ pn += sprintf (pn, "-k %" PRIuMAX, (uintmax_t) sword + 1);
+ if (key->eword != SIZE_MAX)
+ {
+ po += sprintf (po, " -%" PRIuMAX, (uintmax_t) eword + 1);
+ pn += sprintf (pn, ",%" PRIuMAX,
+ (uintmax_t) eword + 1 + (key->echar == SIZE_MAX));
+ }
+ error (0, 0, _("obsolescent key `%s' used; consider `%s' instead"),
+ obuf, nbuf);
+ }
+
+ /* Warn about field specs that will never match. */
+ if (key->sword != SIZE_MAX && key->eword < key->sword)
+ error (0, 0, _("key %lu has zero width and will be ignored"), keynum);
+
+ /* Warn about significant leading blanks. */
+ if (!gkey_only && tab == TAB_DEFAULT && !key->skipsblanks
+ && !key_numeric (key) && !key->month)
+ error (0, 0, _("leading blanks are significant in key %lu; "
+ "consider also specifying `b'"), keynum);
+
+ /* Warn about numeric comparisons spanning fields,
+ as field delimiters could be interpreted as part
+ of the number (maybe only in other locales). */
+ if (!gkey_only && key_numeric (key))
+ {
+ size_t sword = key->sword + 1;
+ size_t eword = key->eword + 1;
+ if (!sword)
+ sword++;
+ if (sword != eword)
+ error (0, 0, _("key %lu is numeric and spans multiple fields"),
+ keynum);
+ }
+
+ /* Flag global options not copied or specified in any key. */
+ if (ugkey.ignore && (ugkey.ignore == key->ignore))
+ ugkey.ignore = NULL;
+ if (ugkey.translate && (ugkey.translate == key->translate))
+ ugkey.translate = NULL;
+ ugkey.skipsblanks &= !key->skipsblanks;
+ ugkey.skipeblanks &= !key->skipeblanks;
+ ugkey.month &= !key->month;
+ ugkey.numeric &= !key->numeric;
+ ugkey.general_numeric &= !key->general_numeric;
+ ugkey.human_numeric &= !key->human_numeric;
+ ugkey.random &= !key->random;
+ ugkey.version &= !key->version;
+ ugkey.reverse &= !key->reverse;
+ }
+
+ /* Warn about ignored global options flagged above.
+ Note if gkey is the only one in the list, all flags are cleared. */
+ if (!default_key_compare (&ugkey)
+ || (ugkey.reverse && (stable || unique) && keylist))
+ {
+ bool ugkey_reverse = ugkey.reverse;
+ if (!(stable || unique))
+ ugkey.reverse = false;
+ /* The following is too big, but guaranteed to be "big enough". */
+ char opts[sizeof short_options];
+ key_to_opts (&ugkey, opts);
+ error (0, 0,
+ ngettext ("option `-%s' is ignored",
+ "options `-%s' are ignored",
+ select_plural (strlen (opts))), opts);
+ ugkey.reverse = ugkey_reverse;
+ }
+ if (ugkey.reverse && !(stable || unique) && keylist)
+ error (0, 0, _("option `-r' only applies to last-resort comparison"));
+}
+
/* Compare two lines A and B trying every key in sequence until there
are no more keys or a difference is found. */
@@ -2193,7 +2352,7 @@ keycompare (const struct line *a, const struct line *b, bool show_debug)
if (key->random)
diff = compare_random (texta, lena, textb, lenb);
- else if (key->numeric || key->general_numeric || key->human_numeric)
+ else if (key_numeric (key))
{
char savea = *lima, saveb = *limb;
char const* ea = lima;
@@ -3235,35 +3394,18 @@ incompatible_options (char const *opts)
static void
check_ordering_compatibility (void)
{
- struct keyfield const *key;
+ struct keyfield *key;
for (key = keylist; key; key = key->next)
if ((1 < (key->random + key->numeric + key->general_numeric + key->month
+ key->version + !!key->ignore + key->human_numeric))
|| (key->random && key->translate))
{
- /* The following is too big, but guaranteed to be "big enough". */
+ /* The following is too big, but guaranteed to be "big enough". */
char opts[sizeof short_options];
- char *p = opts;
- if (key->ignore == nondictionary)
- *p++ = 'd';
- if (key->translate)
- *p++ = 'f';
- if (key->general_numeric)
- *p++ = 'g';
- if (key->human_numeric)
- *p++ = 'h';
- if (key->ignore == nonprinting)
- *p++ = 'i';
- if (key->month)
- *p++ = 'M';
- if (key->numeric)
- *p++ = 'n';
- if (key->version)
- *p++ = 'V';
- if (key->random)
- *p++ = 'R';
- *p = '\0';
+ /* Clear flags we're not interested in. */
+ key->skipsblanks = key->skipeblanks = key->reverse = false;
+ key_to_opts (key, opts);
incompatible_options (opts);
}
}
@@ -3391,6 +3533,7 @@ main (int argc, char **argv)
struct keyfield *key;
struct keyfield key_buf;
struct keyfield gkey;
+ bool gkey_only = false;
char const *s;
int c = 0;
char checkonly = 0;
@@ -3494,14 +3637,8 @@ main (int argc, char **argv)
/* The signal mask is known, so it is safe to invoke exit_cleanup. */
atexit (exit_cleanup);
- gkey.sword = gkey.eword = SIZE_MAX;
- gkey.ignore = NULL;
- gkey.translate = NULL;
- gkey.numeric = gkey.general_numeric = gkey.human_numeric = false;
- gkey.iec_present = -1;
- gkey.random = gkey.version = false;
- gkey.month = gkey.reverse = false;
- gkey.skipsblanks = gkey.skipeblanks = false;
+ key_init (&gkey);
+ gkey.sword = SIZE_MAX;
files = xnmalloc (argc, sizeof *files);
@@ -3573,6 +3710,7 @@ main (int argc, char **argv)
badfieldspec (optarg1,
N_("stray character in field spec"));
}
+ key->obsolete_used = true;
insertkey (key);
}
}
@@ -3836,17 +3974,7 @@ main (int argc, char **argv)
/* Inheritance of global options to individual keys. */
for (key = keylist; key; key = key->next)
{
- if (! (key->ignore
- || key->translate
- || (key->skipsblanks
- || key->reverse
- || key->skipeblanks
- || key->month
- || key->numeric
- || key->version
- || key->general_numeric
- || key->human_numeric
- || key->random)))
+ if (default_key_compare (key) && !key->reverse)
{
key->ignore = gkey.ignore;
key->translate = gkey.translate;
@@ -3856,34 +3984,41 @@ main (int argc, char **argv)
key->numeric = gkey.numeric;
key->general_numeric = gkey.general_numeric;
key->human_numeric = gkey.human_numeric;
+ key->version = gkey.version;
key->random = gkey.random;
key->reverse = gkey.reverse;
- key->version = gkey.version;
}
need_random |= key->random;
}
- if (!keylist && (gkey.ignore
- || gkey.translate
- || (gkey.skipsblanks
- || gkey.skipeblanks
- || gkey.month
- || gkey.numeric
- || gkey.general_numeric
- || gkey.human_numeric
- || gkey.random
- || gkey.version)))
+ if (!keylist && !default_key_compare (&gkey))
{
+ gkey_only = true;
insertkey (&gkey);
need_random |= gkey.random;
}
check_ordering_compatibility ();
+ /* Disable this combination so that users are less likely
+ to inadvertantly update a file with debugging enabled.
+ Also it simplifies the code for handling temp files. */
if (debug && outfile)
error (SORT_FAILURE, 0, _("options -o and --debug are incompatible"));
+ if (debug)
+ {
+ /* Always output the locale in debug mode, since this
+ is such a common source of confusion. */
+ if (hard_LC_COLLATE)
+ error (0, 0, _("using %s sorting rules"),
+ quote (setlocale (LC_COLLATE, NULL)));
+ else
+ error (0, 0, _("using simple byte comparison"));
+ key_warnings (&gkey, gkey_only);
+ }
+
reverse = gkey.reverse;
if (need_random)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 46d388ada..a732c631f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -225,6 +225,7 @@ TESTS = \
misc/sort-compress \
misc/sort-continue \
misc/sort-debug-keys \
+ misc/sort-debug-warn \
misc/sort-files0-from \
misc/sort-float \
misc/sort-merge \
diff --git a/tests/misc/sort-debug-warn b/tests/misc/sort-debug-warn
new file mode 100755
index 000000000..3a7b01a0c
--- /dev/null
+++ b/tests/misc/sort-debug-warn
@@ -0,0 +1,91 @@
+#!/bin/sh
+# Test warnings for sort options
+
+# Copyright (C) 2010 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+if test "$VERBOSE" = yes; then
+ set -x
+ sort --version
+fi
+
+. $srcdir/test-lib.sh
+
+cat <<\EOF > exp
+sort: using simple byte comparison
+sort: key 1 has zero width and will be ignored
+sort: leading blanks are significant in key 1; consider also specifying `b'
+sort: using simple byte comparison
+sort: options `-bghMRrV' are ignored
+sort: using simple byte comparison
+sort: options `-bghMRV' are ignored
+sort: option `-r' only applies to last-resort comparison
+sort: using simple byte comparison
+sort: option `-r' only applies to last-resort comparison
+sort: using simple byte comparison
+sort: leading blanks are significant in key 2; consider also specifying `b'
+sort: options `-bg' are ignored
+sort: using simple byte comparison
+sort: using simple byte comparison
+sort: option `-b' is ignored
+sort: using simple byte comparison
+sort: using simple byte comparison
+sort: leading blanks are significant in key 1; consider also specifying `b'
+sort: using simple byte comparison
+sort: leading blanks are significant in key 1; consider also specifying `b'
+sort: using simple byte comparison
+sort: leading blanks are significant in key 1; consider also specifying `b'
+sort: option `-d' is ignored
+sort: using simple byte comparison
+sort: leading blanks are significant in key 1; consider also specifying `b'
+sort: option `-i' is ignored
+sort: using simple byte comparison
+sort: using simple byte comparison
+sort: using simple byte comparison
+EOF
+
+sort -s -k2,1 --debug /dev/null 2>>out
+sort -s -rRVMhgb -k1,1n --debug /dev/null 2>>out
+sort -rRVMhgb -k1,1n --debug /dev/null 2>>out
+sort -r -k1,1n --debug /dev/null 2>>out
+sort -gbr -k1,1n -k1,1r --debug /dev/null 2>>out
+sort -b -k1b,1bn --debug /dev/null 2>>out # no warning
+sort -b -k1,1bn --debug /dev/null 2>>out
+sort -b -k1,1bn -k2b,2 --debug /dev/null 2>>out # no warning
+sort -r -k1,1r --debug /dev/null 2>>out # no warning for redundant options
+sort -i -k1,1i --debug /dev/null 2>>out # no warning
+sort -d -k1,1b --debug /dev/null 2>>out
+sort -i -k1,1d --debug /dev/null 2>>out
+sort -r --debug /dev/null 2>>out #no warning
+sort -rM --debug /dev/null 2>>out #no warning
+sort -rM -k1,1 --debug /dev/null 2>>out #no warning
+
+compare exp out || fail=1
+
+cat <<\EOF > exp
+sort: using simple byte comparison
+sort: key 1 is numeric and spans multiple fields
+sort: obsolescent key `+2 -1' used; consider `-k 3,1' instead
+sort: key 2 has zero width and will be ignored
+sort: leading blanks are significant in key 2; consider also specifying `b'
+sort: option `-b' is ignored
+sort: option `-r' only applies to last-resort comparison
+EOF
+
+sort --debug -rb -k2n +2 -1b /dev/null 2>out
+
+compare exp out || fail=1
+
+Exit $fail