summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/sort.c316
1 files changed, 165 insertions, 151 deletions
diff --git a/src/sort.c b/src/sort.c
index 54382631e..a5e5c07b9 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -91,6 +91,14 @@ struct rlimit { size_t rlim_cur; };
#define UCHAR_LIM (UCHAR_MAX + 1)
+#if HAVE_C99_STRTOLD
+# define long_double long double
+#else
+# define long_double double
+# undef strtold
+# define strtold strtod
+#endif
+
#ifndef DEFAULT_TMPDIR
# define DEFAULT_TMPDIR "/tmp"
#endif
@@ -1791,43 +1799,43 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
}
}
+/* Table that maps characters to order-of-magnitude values. */
+static char const unit_order[UCHAR_LIM] =
+ {
+#if ! ('K' == 75 && 'M' == 77 && 'G' == 71 && 'T' == 84 && 'P' == 80 \
+ && 'E' == 69 && 'Z' == 90 && 'Y' == 89 && 'k' == 107)
+ /* This initializer syntax works on all C99 hosts. For now, use
+ it only on non-ASCII hosts, to ease the pain of porting to
+ pre-C99 ASCII hosts. */
+ ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
+ ['k']=1,
+#else
+ /* Generate the following table with this command:
+ perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
+ foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
+ |fmt */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
+ 0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif
+ };
+
/* Return an integer that represents the order of magnitude of the
unit following the number. The number may contain thousands
separators and a decimal point, but it may not contain leading blanks.
- Negative numbers get negative orders; zero numbers have a zero order.
- Store the address of the end of the number into *ENDPTR. */
+ Negative numbers get negative orders; zero numbers have a zero order. */
static int
-find_unit_order (char const *number, char **endptr)
+find_unit_order (char const *number)
{
- static char const orders[UCHAR_LIM] =
- {
-#if ! ('K' == 75 && 'M' == 77 && 'G' == 71 && 'T' == 84 && 'P' == 80 \
- && 'E' == 69 && 'Z' == 90 && 'Y' == 89 && 'k' == 107)
- /* This initializer syntax works on all C99 hosts. For now, use
- it only on non-ASCII hosts, to ease the pain of porting to
- pre-C99 ASCII hosts. */
- ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
- ['k']=1,
-#else
- /* Generate the following table with this command:
- perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
- foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
- |fmt */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
- 0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-#endif
- };
-
bool minus_sign = (*number == '-');
char const *p = number + minus_sign;
int nonzero = 0;
@@ -1850,27 +1858,27 @@ find_unit_order (char const *number, char **endptr)
while (ISDIGIT (ch = *p++))
nonzero |= ch - '0';
- int order = (nonzero ? orders[ch] : 0);
- *endptr = (char *) p - !order;
- return (minus_sign ? -order : order);
+ if (nonzero)
+ {
+ int order = unit_order[ch];
+ return (minus_sign ? -order : order);
+ }
+ else
+ return 0;
}
/* Compare numbers A and B ending in units with SI or IEC prefixes
- <none/unknown> < K/k < M < G < T < P < E < Z < Y
- This may temporarily modify the strings. Store into *EA the end
- of the string A. */
+ <none/unknown> < K/k < M < G < T < P < E < Z < Y */
static int
-human_numcompare (char *a, char *b, char **ea)
+human_numcompare (char const *a, char const *b)
{
- char *endb;
-
while (blanks[to_uchar (*a)])
a++;
while (blanks[to_uchar (*b)])
b++;
- int diff = find_unit_order (a, ea) - find_unit_order (b, &endb);
+ int diff = find_unit_order (a) - find_unit_order (b);
return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep));
}
@@ -1879,43 +1887,29 @@ human_numcompare (char *a, char *b, char **ea)
hideously fast. */
static int
-numcompare (char const *a, char const *b, char **ea)
+numcompare (char const *a, char const *b)
{
while (blanks[to_uchar (*a)])
a++;
while (blanks[to_uchar (*b)])
b++;
- if (debug)
- {
- /* Approximate strnumcmp extents with find_unit_order. */
- int order = find_unit_order (a, ea);
- *ea -= !!order;
- }
-
return strnumcmp (a, b, decimal_point, thousands_sep);
}
static int
-general_numcompare (char const *sa, char const *sb, char **ea)
+general_numcompare (char const *sa, char const *sb)
{
/* FIXME: maybe add option to try expensive FP conversion
only if A and B can't be compared more cheaply/accurately. */
-#if HAVE_C99_STRTOLD
-# define long_double long double
-#else
-# define long_double double
-# undef strtold
-# define strtold strtod
-#endif
-
+ char *ea;
char *eb;
- long_double a = strtold (sa, ea);
+ long_double a = strtold (sa, &ea);
long_double b = strtold (sb, &eb);
/* Put conversion errors at the start of the collating sequence. */
- if (sa == *ea)
+ if (sa == ea)
return sb == eb ? 0 : -1;
if (sb == eb)
return 1;
@@ -1935,7 +1929,7 @@ general_numcompare (char const *sa, char const *sb, char **ea)
Return 0 if the name in S is not recognized. */
static int
-getmonth (char const *month, size_t len, char const **ea)
+getmonth (char const *month, size_t len, char **ea)
{
size_t lo = 0;
size_t hi = MONTHS_PER_YEAR;
@@ -1961,7 +1955,7 @@ getmonth (char const *month, size_t len, char const **ea)
if (!*n)
{
if (ea)
- *ea = m;
+ *ea = (char *) m;
return monthtab[ix].val;
}
if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
@@ -2201,36 +2195,90 @@ mark_key (size_t offset, size_t width)
}
}
-/* For debug mode, determine the screen offset and width
- to highlight for a key, and then output the highlight. */
+/* Return true if KEY is a numeric key. */
+
+static inline bool
+key_numeric (struct keyfield const *key)
+{
+ return key->numeric || key->general_numeric || key->human_numeric;
+}
+
+/* For LINE, output a debugging line that underlines KEY in LINE.
+ If KEY is null, underline the whole line. */
static void
-debug_key (char const *sline, char const *sfield, char const *efield,
- size_t flen, bool skipb)
+debug_key (struct line const *line, struct keyfield const *key)
{
- char const *sa = sfield;
+ char *text = line->text;
+ char *beg = text;
+ char *lim = text + line->length - 1;
- if (skipb) /* This key type implicitly skips leading blanks. */
+ if (key)
{
- while (sa < efield && blanks[to_uchar (*sa)])
+ if (key->sword != SIZE_MAX)
+ beg = begfield (line, key);
+ if (key->eword != SIZE_MAX)
+ lim = limfield (line, key);
+
+ if (key->skipsblanks || key->month || key_numeric (key))
+ while (beg < lim && blanks[to_uchar (*beg)])
+ beg++;
+
+ if (key_numeric (key))
+ {
+ char *numlim;
+ char saved = *lim; *lim = '\0';
+
+ if (key->general_numeric)
+ strtold (beg, &numlim);
+ else
+ {
+ char *p = beg + (beg < lim && *beg == '-');
+ bool found_digit = false;
+ unsigned char ch;
+
+ do
+ {
+ while (ISDIGIT (ch = *p++))
+ found_digit = true;
+ }
+ while (ch == thousands_sep);
+
+ if (ch == decimal_point)
+ while (ISDIGIT (ch = *p++))
+ found_digit = true;
+
+ numlim = (found_digit
+ ? p - ! (key->human_numeric && unit_order[ch])
+ : beg);
+ }
+
+ *lim = saved;
+ lim = numlim;
+ }
+ else if (key->month)
{
- sa++;
- if (flen)
- flen--; /* This assumes TABs same width as SPACEs. */
+ char *monthlim = beg;
+ getmonth (beg, lim - beg, &monthlim);
+ lim = monthlim;
}
}
- size_t offset = debug_width (sline, sfield) + (sa - sfield);
- size_t width = debug_width (sa, sa + flen);
+ size_t offset = debug_width (text, beg);
+ size_t width = debug_width (beg, lim);
mark_key (offset, width);
}
-/* Testing if a key is numeric is done in various places. */
+/* Debug LINE by underlining its keys. */
-static inline bool
-key_numeric (struct keyfield const *key)
+static void
+debug_line (struct line const *line)
{
- return key->numeric || key->general_numeric || key->human_numeric;
+ struct keyfield const *key = keylist;
+
+ do
+ debug_key (line, key);
+ while (key && ((key = key->next) || ! (unique || stable)));
}
/* Return whether sorting options specified for key. */
@@ -2391,7 +2439,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)
are no more keys or a difference is found. */
static int
-keycompare (struct line const *a, struct line const *b, bool show_debug)
+keycompare (struct line const *a, struct line const *b)
{
struct keyfield *key = keylist;
@@ -2408,7 +2456,6 @@ keycompare (struct line const *a, struct line const *b, bool show_debug)
{
char const *translate = key->translate;
bool const *ignore = key->ignore;
- bool skipb = false; /* Whether key type auto skips leading blanks. */
/* Treat field ends before field starts as empty fields. */
lima = MAX (texta, lima);
@@ -2425,35 +2472,17 @@ keycompare (struct line const *a, struct line const *b, bool show_debug)
else if (key_numeric (key))
{
char savea = *lima, saveb = *limb;
- char *ea = lima;
*lima = *limb = '\0';
- diff = (key->numeric ? numcompare (texta, textb, &ea)
- : key->general_numeric ? general_numcompare (texta, textb,
- &ea)
- : human_numcompare (texta, textb, &ea));
- if (show_debug)
- {
- lena = ea - texta;
- skipb = true;
- }
+ diff = (key->numeric ? numcompare (texta, textb)
+ : key->general_numeric ? general_numcompare (texta, textb)
+ : human_numcompare (texta, textb));
*lima = savea, *limb = saveb;
}
else if (key->version)
diff = compare_version (texta, lena, textb, lenb);
else if (key->month)
- {
- char const *ea = lima;
-
- int amon = getmonth (texta, lena, &ea);
- diff = amon - getmonth (textb, lenb, NULL);
-
- if (show_debug)
- {
- lena = amon ? ea - texta : 0;
- skipb = true;
- }
- }
+ diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL);
/* Sorting like this may become slow, so in a simple locale the user
can select a faster sort that is similar to ascii sort. */
else if (hard_LC_COLLATE)
@@ -2506,8 +2535,6 @@ keycompare (struct line const *a, struct line const *b, bool show_debug)
}
else if (ignore)
{
- char *savea = texta;
-
#define CMP_WITH_IGNORE(A, B) \
do \
{ \
@@ -2535,10 +2562,6 @@ keycompare (struct line const *a, struct line const *b, bool show_debug)
translate[to_uchar (*textb)]);
else
CMP_WITH_IGNORE (*texta, *textb);
-
- /* We only need to restore this for debug_key
- in which case the keys being compared are equal. */
- texta = savea;
}
else if (lena == 0)
diff = - NONZERO (lenb);
@@ -2548,8 +2571,6 @@ keycompare (struct line const *a, struct line const *b, bool show_debug)
{
if (translate)
{
- char *savea = texta;
-
while (texta < lima && textb < limb)
{
diff = (to_uchar (translate[to_uchar (*texta++)])
@@ -2557,10 +2578,6 @@ keycompare (struct line const *a, struct line const *b, bool show_debug)
if (diff)
goto not_equal;
}
-
- /* We only need to restore this for debug_key
- in which case the keys being compared are equal. */
- texta = savea;
}
else
{
@@ -2574,9 +2591,6 @@ keycompare (struct line const *a, struct line const *b, bool show_debug)
if (diff)
goto not_equal;
- if (show_debug)
- debug_key (a->text, texta, lima, lena, skipb);
-
key = key->next;
if (! key)
break;
@@ -2614,7 +2628,7 @@ keycompare (struct line const *a, struct line const *b, bool show_debug)
depending on whether A compares less than, equal to, or greater than B. */
static int
-compare (struct line const *a, struct line const *b, bool show_debug)
+compare (struct line const *a, struct line const *b)
{
int diff;
size_t alen, blen;
@@ -2624,7 +2638,7 @@ compare (struct line const *a, struct line const *b, bool show_debug)
and unadorned sort -r. */
if (keylist)
{
- diff = keycompare (a, b, show_debug);
+ diff = keycompare (a, b);
if (diff || unique || stable)
return diff;
}
@@ -2633,9 +2647,6 @@ compare (struct line const *a, struct line const *b, bool show_debug)
fall through to the default comparison. */
alen = a->length - 1, blen = b->length - 1;
- if (show_debug)
- debug_key (a->text, a->text, a->text + alen, alen, false);
-
if (alen == 0)
diff = - NONZERO (blen);
else if (blen == 0)
@@ -2654,16 +2665,21 @@ compare (struct line const *a, struct line const *b, bool show_debug)
return reverse ? -diff : diff;
}
+/* Write LINE to output stream FP; the output file's name is
+ OUTPUT_FILE if OUTPUT_FILE is nonnull, and is the standard output
+ otherwise. If debugging is enabled and FP is standard output,
+ append some debugging information. */
+
static void
-write_bytes (struct line const *line, FILE *fp, char const *output_file)
+write_line (struct line const *line, FILE *fp, char const *output_file)
{
char *buf = line->text;
size_t n_bytes = line->length;
char *ebuf = buf + n_bytes;
- /* Convert TABs to '>' and \0 to \n when -z specified. */
- if (debug && fp == stdout)
+ if (!output_file && debug)
{
+ /* Convert TAB to '>' and EOL to \n, and then output debugging info. */
char const *c = buf;
while (c < ebuf)
@@ -2677,7 +2693,7 @@ write_bytes (struct line const *line, FILE *fp, char const *output_file)
die (_("write failed"), output_file);
}
- compare (line, line, true);
+ debug_line (line);
}
else
{
@@ -2716,7 +2732,7 @@ check (char const *file_name, char checkonly)
/* Make sure the line saved from the old buffer contents is
less than or equal to the first line of the new buffer. */
- if (alloc && nonunique <= compare (&temp, line - 1, false))
+ if (alloc && nonunique <= compare (&temp, line - 1))
{
found_disorder:
{
@@ -2729,10 +2745,7 @@ check (char const *file_name, char checkonly)
fprintf (stderr, _("%s: %s:%s: disorder: "),
program_name, file_name,
umaxtostr (disorder_line_number, hr_buf));
- if (debug)
- fputc ('\n', stderr);
- write_bytes (disorder_line, debug ? stdout : stderr,
- debug ? _("standard out") : _("standard error"));
+ write_line (disorder_line, stderr, _("standard error"));
}
ordered = false;
@@ -2742,7 +2755,7 @@ check (char const *file_name, char checkonly)
/* Compare each line in the buffer with its successor. */
while (linebase < --line)
- if (nonunique <= compare (line, line - 1, false))
+ if (nonunique <= compare (line, line - 1))
goto found_disorder;
line_number += buf.nlines;
@@ -2871,7 +2884,7 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
for (i = 0; i < nfiles; ++i)
ord[i] = i;
for (i = 1; i < nfiles; ++i)
- if (0 < compare (cur[ord[i - 1]], cur[ord[i]], false))
+ if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
/* Repeatedly output the smallest line until no input remains. */
@@ -2883,10 +2896,10 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
an identical series of lines. */
if (unique)
{
- if (savedline && compare (savedline, smallest, false))
+ if (savedline && compare (savedline, smallest))
{
savedline = NULL;
- write_bytes (&saved, ofp, output_file);
+ write_line (&saved, ofp, output_file);
}
if (!savedline)
{
@@ -2915,7 +2928,7 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
}
}
else
- write_bytes (smallest, ofp, output_file);
+ write_line (smallest, ofp, output_file);
/* Check if we need to read more lines into core. */
if (base[ord[0]] < smallest)
@@ -2969,7 +2982,7 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
while (lo < hi)
{
- int cmp = compare (cur[ord0], cur[ord[probe]], false);
+ int cmp = compare (cur[ord0], cur[ord[probe]]);
if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
hi = probe;
else
@@ -2990,7 +3003,7 @@ mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
if (unique && savedline)
{
- write_bytes (&saved, ofp, output_file);
+ write_line (&saved, ofp, output_file);
free (saved.text);
}
@@ -3039,7 +3052,7 @@ mergelines (struct line *restrict t, size_t nlines,
struct line *hi = t - nlo;
while (true)
- if (compare (lo - 1, hi - 1, false) <= 0)
+ if (compare (lo - 1, hi - 1) <= 0)
{
*--t = *--lo;
if (! --nlo)
@@ -3084,7 +3097,7 @@ sequential_sort (struct line *restrict lines, size_t nlines,
/* Declare `swap' as int, not bool, to work around a bug
<http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
- int swap = (0 < compare (&lines[-1], &lines[-2], false));
+ int swap = (0 < compare (&lines[-1], &lines[-2]));
if (to_temp)
{
temp[-1] = lines[-1 - swap];
@@ -3221,11 +3234,11 @@ write_unique (struct line const *line, FILE *tfp, char const *temp_output)
static struct line const *saved = NULL;
if (!unique)
- write_bytes (line, tfp, temp_output);
- else if (!saved || compare (line, saved, false))
+ write_line (line, tfp, temp_output);
+ else if (!saved || compare (line, saved))
{
saved = line;
- write_bytes (line, tfp, temp_output);
+ write_line (line, tfp, temp_output);
}
}
@@ -3247,7 +3260,7 @@ mergelines_node (struct merge_node *restrict node, size_t total_lines,
/* Merge to destination buffer. */
struct line *dest = *node->dest;
while (node->lo != node->end_lo && node->hi != node->end_hi && to_merge--)
- if (compare (node->lo - 1, node->hi - 1, false) <= 0)
+ if (compare (node->lo - 1, node->hi - 1) <= 0)
*--dest = *--node->lo;
else
*--dest = *--node->hi;
@@ -3267,7 +3280,7 @@ mergelines_node (struct merge_node *restrict node, size_t total_lines,
/* Merge directly to output. */
while (node->lo != node->end_lo && node->hi != node->end_hi && to_merge--)
{
- if (compare (node->lo - 1, node->hi - 1, false) <= 0)
+ if (compare (node->lo - 1, node->hi - 1) <= 0)
write_unique (--node->lo, tfp, temp_output);
else
write_unique (--node->hi, tfp, temp_output);
@@ -4444,14 +4457,15 @@ main (int argc, char **argv)
check_ordering_compatibility ();
- /* Disable this combination so that users are less likely
- to inadvertantly update a file with debugging enabled.
- Also it simplifies the code for handling temp files. */
- if (debug && outfile)
- error (SORT_FAILURE, 0, _("options -o and --debug are incompatible"));
-
if (debug)
{
+ if (checkonly || outfile)
+ {
+ static char opts[] = "X --debug";
+ opts[0] = (checkonly ? checkonly : 'o');
+ incompatible_options (opts);
+ }
+
/* Always output the locale in debug mode, since this
is such a common source of confusion. */
if (hard_LC_COLLATE)