1 files changed, 1091 insertions, 0 deletions
diff --git a/src/sort-uniq-keyfuncs.c b/src/sort-uniq-keyfuncs.c
new file mode 100644
index 000000000..196045bc0
--- /dev/null
+++ b/src/sort-uniq-keyfuncs.c
@@ -0,0 +1,1091 @@
+#include "filevercmp.h"
+#include "md5.h"
+#include "strnumcmp.h"
+#define UCHAR_LIM (UCHAR_MAX + 1)
+
+#if HAVE_C99_STRTOLD
+# define long_double long double
+#else
+# define long_double double
+# undef strtold
+# define strtold strtod
+#endif
+
+/* Exit statuses.  */
+enum
+  {
+    /* POSIX says to exit with status 1 if invoked with -c and the
+       input is not properly sorted.  */
+    SORT_OUT_OF_ORDER = 1,
+
+    /* POSIX says any other irregular exit must exit with a status
+       code greater than 1.  */
+    SORT_FAILURE = 2
+  };
+
+/* The representation of the decimal point in the current locale.  */
+static int decimal_point;
+
+/* Thousands separator; if -1, then there isn't one.  */
+static int thousands_sep;
+
+/* Nonzero if the corresponding locales are hard.  */
+static bool hard_LC_COLLATE;
+
+#define NONZERO(x) ((x) != 0)
+
+/* The kind of blanks for '-b' to skip in various options. */
+enum blanktype { bl_start, bl_end, bl_both };
+
+/* Lines are held in core as counted strings. */
+struct line
+{
+  char *text;			/* Text of the line. */
+  size_t length;		/* Length including final newline. */
+  char *keybeg;			/* Start of first key. */
+  char *keylim;			/* Limit of first key. */
+};
+
+/* Sort key.  */
+struct keyfield
+{
+  size_t sword;			/* Zero-origin 'word' to start at. */
+  size_t schar;			/* Additional characters to skip. */
+  size_t eword;			/* Zero-origin last 'word' of key. */
+  size_t echar;			/* Additional characters in field. */
+  bool const *ignore;		/* Boolean array of characters to ignore. */
+  char const *translate;	/* Translation applied to characters. */
+  bool skipsblanks;		/* Skip leading blanks when finding start.  */
+  bool skipeblanks;		/* Skip leading blanks when finding end.  */
+  bool numeric;			/* Flag for numeric comparison.  Handle
+                                   strings of digits with optional decimal
+                                   point, but no exponential notation. */
+  bool random;			/* Sort by random hash of key.  */
+  bool general_numeric;		/* Flag for general, numeric comparison.
+                                   Handle numbers in exponential notation. */
+  bool human_numeric;		/* Flag for sorting by human readable
+                                   units with either SI or IEC prefixes. */
+  bool month;			/* Flag for comparison by month name. */
+  bool reverse;			/* Reverse the sense of comparison. */
+  bool version;			/* sort by version number */
+  bool traditional_used;	/* Traditional key option format is used. */
+  struct keyfield *next;	/* Next keyfield to try. */
+};
+
+struct month
+{
+  char const *name;
+  int val;
+};
+
+/* FIXME: None of these tables work with multibyte character sets.
+   Also, there are many other bugs when handling multibyte characters.
+   One way to fix this is to rewrite 'sort' to use wide characters
+   internally, but doing this with good performance is a bit
+   tricky.  */
+
+/* Table of blanks.  */
+static bool blanks[UCHAR_LIM];
+
+/* Table of non-printing characters. */
+static bool nonprinting[UCHAR_LIM];
+
+/* Table of non-dictionary characters (not letters, digits, or blanks). */
+static bool nondictionary[UCHAR_LIM];
+
+/* Translation table folding lower case to upper.  */
+static char fold_toupper[UCHAR_LIM];
+
+#define MONTHS_PER_YEAR 12
+
+/* Table mapping month names to integers.
+   Alphabetic order allows binary search. */
+static struct month monthtab[] =
+{
+  {"APR", 4},
+  {"AUG", 8},
+  {"DEC", 12},
+  {"FEB", 2},
+  {"JAN", 1},
+  {"JUL", 7},
+  {"JUN", 6},
+  {"MAR", 3},
+  {"MAY", 5},
+  {"NOV", 11},
+  {"OCT", 10},
+  {"SEP", 9}
+};
+
+/* Flag to reverse the order of all comparisons. */
+static bool reverse;
+
+/* Flag for stable sort.  This turns off the last ditch bytewise
+   comparison of lines, and instead leaves lines in the same order
+   they were read if all keys compare equal.  */
+static bool stable;
+
+/* If TAB has this value, blanks separate fields.  */
+enum { TAB_DEFAULT = CHAR_MAX + 1 };
+
+/* Tab character separating fields.  If TAB_DEFAULT, then fields are
+   separated by the empty string between a non-blank character and a blank
+   character. */
+static int tab = TAB_DEFAULT;
+
+/* Flag to remove consecutive duplicate lines from the output.
+   Only the last of a sequence of equal lines will be output. */
+static bool unique;
+
+/* List of key field comparisons to be tried.  */
+static struct keyfield *keylist;
+
+/* Return a pointer to the first character of the field specified
+   by KEY in LINE. */
+
+static char *
+begfield (struct line const *line, struct keyfield const *key)
+{
+  char *ptr = line->text, *lim = ptr + line->length - 1;
+  size_t sword = key->sword;
+  size_t schar = key->schar;
+
+  /* The leading field separator itself is included in a field when -t
+     is absent.  */
+
+  if (tab != TAB_DEFAULT)
+    while (ptr < lim && sword--)
+      {
+        while (ptr < lim && *ptr != tab)
+          ++ptr;
+        if (ptr < lim)
+          ++ptr;
+      }
+  else
+    while (ptr < lim && sword--)
+      {
+        while (ptr < lim && blanks[to_uchar (*ptr)])
+          ++ptr;
+        while (ptr < lim && !blanks[to_uchar (*ptr)])
+          ++ptr;
+      }
+
+  /* If we're ignoring leading blanks when computing the Start
+     of the field, skip past them here.  */
+  if (key->skipsblanks)
+    while (ptr < lim && blanks[to_uchar (*ptr)])
+      ++ptr;
+
+  /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
+  ptr = MIN (lim, ptr + schar);
+
+  return ptr;
+}
+
+/* Return the limit of (a pointer to the first character after) the field
+   in LINE specified by KEY. */
+
+static char *
+limfield (struct line const *line, struct keyfield const *key)
+{
+  char *ptr = line->text, *lim = ptr + line->length - 1;
+  size_t eword = key->eword, echar = key->echar;
+
+  if (echar == 0)
+    eword++; /* Skip all of end field.  */
+
+  /* Move PTR past EWORD fields or to one past the last byte on LINE,
+     whichever comes first.  If there are more than EWORD fields, leave
+     PTR pointing at the beginning of the field having zero-based index,
+     EWORD.  If a delimiter character was specified (via -t), then that
+     'beginning' is the first character following the delimiting TAB.
+     Otherwise, leave PTR pointing at the first 'blank' character after
+     the preceding field.  */
+  if (tab != TAB_DEFAULT)
+    while (ptr < lim && eword--)
+      {
+        while (ptr < lim && *ptr != tab)
+          ++ptr;
+        if (ptr < lim && (eword || echar))
+          ++ptr;
+      }
+  else
+    while (ptr < lim && eword--)
+      {
+        while (ptr < lim && blanks[to_uchar (*ptr)])
+          ++ptr;
+        while (ptr < lim && !blanks[to_uchar (*ptr)])
+          ++ptr;
+      }
+
+#ifdef POSIX_UNSPECIFIED
+  /* The following block of code makes GNU sort incompatible with
+     standard Unix sort, so it's ifdef'd out for now.
+     The POSIX spec isn't clear on how to interpret this.
+     FIXME: request clarification.
+
+     From: kwzh@gnu.ai.mit.edu (Karl Heuer)
+     Date: Thu, 30 May 96 12:20:41 -0400
+     [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
+
+     [...]I believe I've found another bug in 'sort'.
+
+     $ cat /tmp/sort.in
+     a b c 2 d
+     pq rs 1 t
+     $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
+     a b c 2 d
+     pq rs 1 t
+     $ /bin/sort -k1.7,1.7 </tmp/sort.in
+     pq rs 1 t
+     a b c 2 d
+
+     Unix sort produced the answer I expected: sort on the single character
+     in column 7.  GNU sort produced different results, because it disagrees
+     on the interpretation of the key-end spec "M.N".  Unix sort reads this
+     as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
+     "skip M-1 fields, then either N-1 characters or the rest of the current
+     field, whichever comes first".  This extra clause applies only to
+     key-ends, not key-starts.
+     */
+
+  /* Make LIM point to the end of (one byte past) the current field.  */
+  if (tab != TAB_DEFAULT)
+    {
+      char *newlim;
+      newlim = memchr (ptr, tab, lim - ptr);
+      if (newlim)
+        lim = newlim;
+    }
+  else
+    {
+      char *newlim;
+      newlim = ptr;
+      while (newlim < lim && blanks[to_uchar (*newlim)])
+        ++newlim;
+      while (newlim < lim && !blanks[to_uchar (*newlim)])
+        ++newlim;
+      lim = newlim;
+    }
+#endif
+
+  if (echar != 0) /* We need to skip over a portion of the end field.  */
+    {
+      /* If we're ignoring leading blanks when computing the End
+         of the field, skip past them here.  */
+      if (key->skipeblanks)
+        while (ptr < lim && blanks[to_uchar (*ptr)])
+          ++ptr;
+
+      /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
+      ptr = MIN (lim, ptr + echar);
+    }
+
+  return ptr;
+}
+
+/* Table that maps characters to order-of-magnitude values.  */
+static char const unit_order[UCHAR_LIM] =
+  {
+#if ! ('K' == 75 && 'M' == 77 && 'G' == 71 && 'T' == 84 && 'P' == 80 \
+     && 'E' == 69 && 'Z' == 90 && 'Y' == 89 && 'k' == 107)
+    /* This initializer syntax works on all C99 hosts.  For now, use
+       it only on non-ASCII hosts, to ease the pain of porting to
+       pre-C99 ASCII hosts.  */
+    ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
+    ['k']=1,
+#else
+    /* Generate the following table with this command:
+       perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
+       foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
+       |fmt  */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
+    0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif
+  };
+
+/* Traverse number given as *number consisting of digits, thousands_sep, and
+   decimal_point chars only.  Returns the highest digit found in the number,
+   or '\0' if no digit has been found.  Upon return *number points at the
+   character that immediately follows after the given number.  */
+static unsigned char
+traverse_raw_number (char const **number)
+{
+  char const *p = *number;
+  unsigned char ch;
+  unsigned char max_digit = '\0';
+  bool ends_with_thousands_sep = false;
+
+  /* Scan to end of number.
+     Decimals or separators not followed by digits stop the scan.
+     Numbers ending in decimals or separators are thus considered
+     to be lacking in units.
+     FIXME: add support for multibyte thousands_sep and decimal_point.  */
+
+  while (ISDIGIT (ch = *p++))
+    {
+      if (max_digit < ch)
+        max_digit = ch;
+
+      /* Allow to skip only one occurrence of thousands_sep to avoid finding
+         the unit in the next column in case thousands_sep matches as blank
+         and is used as column delimiter.  */
+      ends_with_thousands_sep = (*p == thousands_sep);
+      if (ends_with_thousands_sep)
+        ++p;
+    }
+
+  if (ends_with_thousands_sep)
+    {
+      /* thousands_sep not followed by digit is not allowed.  */
+      *number = p - 2;
+      return max_digit;
+    }
+
+  if (ch == decimal_point)
+    while (ISDIGIT (ch = *p++))
+      if (max_digit < ch)
+        max_digit = ch;
+
+  *number = p - 1;
+  return max_digit;
+}
+
+/* Return an integer that represents the order of magnitude of the
+   unit following the number.  The number may contain thousands
+   separators and a decimal point, but it may not contain leading blanks.
+   Negative numbers get negative orders; zero numbers have a zero order.  */
+
+static int _GL_ATTRIBUTE_PURE
+find_unit_order (char const *number)
+{
+  bool minus_sign = (*number == '-');
+  char const *p = number + minus_sign;
+  unsigned char max_digit = traverse_raw_number (&p);
+  if ('0' < max_digit)
+    {
+      unsigned char ch = *p;
+      int order = unit_order[ch];
+      return (minus_sign ? -order : order);
+    }
+  else
+    return 0;
+}
+
+/* Compare numbers A and B ending in units with SI or IEC prefixes
+       <none/unknown> < K/k < M < G < T < P < E < Z < Y  */
+
+static int
+human_numcompare (char const *a, char const *b)
+{
+  while (blanks[to_uchar (*a)])
+    a++;
+  while (blanks[to_uchar (*b)])
+    b++;
+
+  int diff = find_unit_order (a) - find_unit_order (b);
+  return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep));
+}
+
+/* Compare strings A and B as numbers without explicitly converting them to
+   machine numbers.  Comparatively slow for short strings, but asymptotically
+   hideously fast. */
+
+static int
+numcompare (char const *a, char const *b)
+{
+  while (blanks[to_uchar (*a)])
+    a++;
+  while (blanks[to_uchar (*b)])
+    b++;
+
+  return strnumcmp (a, b, decimal_point, thousands_sep);
+}
+
+/* Work around a problem whereby the long double value returned by glibc's
+   strtold ("NaN", ...) contains uninitialized bits: clear all bytes of
+   A and B before calling strtold.  FIXME: remove this function once
+   gnulib guarantees that strtold's result is always well defined.  */
+static int
+nan_compare (char const *sa, char const *sb)
+{
+  long_double a;
+  memset (&a, 0, sizeof a);
+  a = strtold (sa, NULL);
+
+  long_double b;
+  memset (&b, 0, sizeof b);
+  b = strtold (sb, NULL);
+
+  return memcmp (&a, &b, sizeof a);
+}
+
+static int
+general_numcompare (char const *sa, char const *sb)
+{
+  /* FIXME: maybe add option to try expensive FP conversion
+     only if A and B can't be compared more cheaply/accurately.  */
+
+  char *ea;
+  char *eb;
+  long_double a = strtold (sa, &ea);
+  long_double b = strtold (sb, &eb);
+
+  /* Put conversion errors at the start of the collating sequence.  */
+  if (sa == ea)
+    return sb == eb ? 0 : -1;
+  if (sb == eb)
+    return 1;
+
+  /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
+     conversion errors but before numbers; sort them by internal
+     bit-pattern, for lack of a more portable alternative.  */
+  return (a < b ? -1
+          : a > b ? 1
+          : a == b ? 0
+          : b == b ? -1
+          : a == a ? 1
+          : nan_compare (sa, sb));
+}
+
+/* Return an integer in 1..12 of the month name MONTH.
+   Return 0 if the name in S is not recognized.  */
+
+static int
+getmonth (char const *month, char **ea)
+{
+  size_t lo = 0;
+  size_t hi = MONTHS_PER_YEAR;
+
+  while (blanks[to_uchar (*month)])
+    month++;
+
+  do
+    {
+      size_t ix = (lo + hi) / 2;
+      char const *m = month;
+      char const *n = monthtab[ix].name;
+
+      for (;; m++, n++)
+        {
+          if (!*n)
+            {
+              if (ea)
+                *ea = (char *) m;
+              return monthtab[ix].val;
+            }
+          if (to_uchar (fold_toupper[to_uchar (*m)]) < to_uchar (*n))
+            {
+              hi = ix;
+              break;
+            }
+          else if (to_uchar (fold_toupper[to_uchar (*m)]) > to_uchar (*n))
+            {
+              lo = ix + 1;
+              break;
+            }
+        }
+    }
+  while (lo < hi);
+
+  return 0;
+}
+
+/* A randomly chosen MD5 state, used for random comparison.  */
+static struct md5_ctx random_md5_state;
+
+/* This is like strxfrm, except it reports any error and exits.  */
+
+static size_t
+xstrxfrm (char *restrict dest, char const *restrict src, size_t destsize)
+{
+  errno = 0;
+  size_t translated_size = strxfrm (dest, src, destsize);
+
+  if (errno)
+    {
+      error (0, errno, _("string transformation failed"));
+      error (0, 0, _("set LC_ALL='C' to work around the problem"));
+      die (SORT_FAILURE, 0,
+           _("the untransformed string was %s"),
+           quotearg_n_style (0, locale_quoting_style, src));
+    }
+
+  return translated_size;
+}
+
+/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
+   using one or more random hash functions.  TEXTA[LENA] and
+   TEXTB[LENB] must be zero.  */
+
+static int
+compare_random (char *restrict texta, size_t lena,
+                char *restrict textb, size_t lenb)
+{
+  /* XFRM_DIFF records the equivalent of memcmp on the transformed
+     data.  This is used to break ties if there is a checksum
+     collision, and this is good enough given the astronomically low
+     probability of a collision.  */
+  int xfrm_diff = 0;
+
+  char stackbuf[4000];
+  char *buf = stackbuf;
+  size_t bufsize = sizeof stackbuf;
+  void *allocated = NULL;
+  uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
+  struct md5_ctx s[2];
+  s[0] = s[1] = random_md5_state;
+
+  if (hard_LC_COLLATE)
+    {
+      char const *lima = texta + lena;
+      char const *limb = textb + lenb;
+
+      while (true)
+        {
+          /* Transform the text into the basis of comparison, so that byte
+             strings that would otherwise considered to be equal are
+             considered equal here even if their bytes differ.
+
+             Each time through this loop, transform one
+             null-terminated string's worth from TEXTA or from TEXTB
+             or both.  That way, there's no need to store the
+             transformation of the whole line, if it contains many
+             null-terminated strings.  */
+
+          /* Store the transformed data into a big-enough buffer.  */
+
+          /* A 3X size guess avoids the overhead of calling strxfrm
+             twice on typical implementations.  Don't worry about
+             size_t overflow, as the guess need not be correct.  */
+          size_t guess_bufsize = 3 * (lena + lenb) + 2;
+          if (bufsize < guess_bufsize)
+            {
+              bufsize = MAX (guess_bufsize, bufsize * 3 / 2);
+              free (allocated);
+              buf = allocated = malloc (bufsize);
+              if (! buf)
+                {
+                  buf = stackbuf;
+                  bufsize = sizeof stackbuf;
+                }
+            }
+
+          size_t sizea =
+            (texta < lima ? xstrxfrm (buf, texta, bufsize) + 1 : 0);
+          bool a_fits = sizea <= bufsize;
+          size_t sizeb =
+            (textb < limb
+             ? (xstrxfrm ((a_fits ? buf + sizea : NULL), textb,
+                          (a_fits ? bufsize - sizea : 0))
+                + 1)
+             : 0);
+
+          if (! (a_fits && sizea + sizeb <= bufsize))
+            {
+              bufsize = sizea + sizeb;
+              if (bufsize < SIZE_MAX / 3)
+                bufsize = bufsize * 3 / 2;
+              free (allocated);
+              buf = allocated = xmalloc (bufsize);
+              if (texta < lima)
+                strxfrm (buf, texta, sizea);
+              if (textb < limb)
+                strxfrm (buf + sizea, textb, sizeb);
+            }
+
+          /* Advance past NULs to the next part of each input string,
+             exiting the loop if both strings are exhausted.  When
+             exiting the loop, prepare to finish off the tiebreaker
+             comparison properly.  */
+          if (texta < lima)
+            texta += strlen (texta) + 1;
+          if (textb < limb)
+            textb += strlen (textb) + 1;
+          if (! (texta < lima || textb < limb))
+            {
+              lena = sizea; texta = buf;
+              lenb = sizeb; textb = buf + sizea;
+              break;
+            }
+
+          /* Accumulate the transformed data in the corresponding
+             checksums.  */
+          md5_process_bytes (buf, sizea, &s[0]);
+          md5_process_bytes (buf + sizea, sizeb, &s[1]);
+
+          /* Update the tiebreaker comparison of the transformed data.  */
+          if (! xfrm_diff)
+            {
+              xfrm_diff = memcmp (buf, buf + sizea, MIN (sizea, sizeb));
+              if (! xfrm_diff)
+                xfrm_diff = (sizea > sizeb) - (sizea < sizeb);
+            }
+        }
+    }
+
+  /* Compute and compare the checksums.  */
+  md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
+  md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
+  int diff = memcmp (dig[0], dig[1], sizeof dig[0]);
+
+  /* Fall back on the tiebreaker if the checksums collide.  */
+  if (! diff)
+    {
+      if (! xfrm_diff)
+        {
+          xfrm_diff = memcmp (texta, textb, MIN (lena, lenb));
+          if (! xfrm_diff)
+            xfrm_diff = (lena > lenb) - (lena < lenb);
+        }
+
+      diff = xfrm_diff;
+    }
+
+  free (allocated);
+
+  return diff;
+}
+
+/* Return true if KEY is a numeric key.  */
+
+static inline bool
+key_numeric (struct keyfield const *key)
+{
+  return key->numeric || key->general_numeric || key->human_numeric;
+}
+
+/* Compare two lines A and B trying every key in sequence until there
+   are no more keys or a difference is found. */
+
+static int
+keycompare (struct line const *a, struct line const *b)
+{
+  struct keyfield *key = keylist;
+
+  /* For the first iteration only, the key positions have been
+     precomputed for us. */
+  char *texta = a->keybeg;
+  char *textb = b->keybeg;
+  char *lima = a->keylim;
+  char *limb = b->keylim;
+
+  int diff;
+
+  while (true)
+    {
+      char const *translate = key->translate;
+      bool const *ignore = key->ignore;
+
+      /* Treat field ends before field starts as empty fields.  */
+      lima = MAX (texta, lima);
+      limb = MAX (textb, limb);
+
+      /* Find the lengths. */
+      size_t lena = lima - texta;
+      size_t lenb = limb - textb;
+
+      if (hard_LC_COLLATE || key_numeric (key)
+          || key->month || key->random || key->version)
+        {
+          char *ta;
+          char *tb;
+          size_t tlena;
+          size_t tlenb;
+
+          char enda IF_LINT (= 0);
+          char endb IF_LINT (= 0);
+          void *allocated IF_LINT (= NULL);
+          char stackbuf[4000];
+
+          if (ignore || translate)
+            {
+              /* Compute with copies of the keys, which are the result of
+                 translating or ignoring characters, and which need their
+                 own storage.  */
+
+              size_t i;
+
+              /* Allocate space for copies.  */
+              size_t size = lena + 1 + lenb + 1;
+              if (size <= sizeof stackbuf)
+                ta = stackbuf, allocated = NULL;
+              else
+                ta = allocated = xmalloc (size);
+              tb = ta + lena + 1;
+
+              /* Put into each copy a version of the key in which the
+                 requested characters are ignored or translated.  */
+              for (tlena = i = 0; i < lena; i++)
+                if (! (ignore && ignore[to_uchar (texta[i])]))
+                  ta[tlena++] = (translate
+                                 ? translate[to_uchar (texta[i])]
+                                 : texta[i]);
+              ta[tlena] = '\0';
+
+              for (tlenb = i = 0; i < lenb; i++)
+                if (! (ignore && ignore[to_uchar (textb[i])]))
+                  tb[tlenb++] = (translate
+                                 ? translate[to_uchar (textb[i])]
+                                 : textb[i]);
+              tb[tlenb] = '\0';
+            }
+          else
+            {
+              /* Use the keys in-place, temporarily null-terminated.  */
+              ta = texta; tlena = lena; enda = ta[tlena]; ta[tlena] = '\0';
+              tb = textb; tlenb = lenb; endb = tb[tlenb]; tb[tlenb] = '\0';
+            }
+
+          if (key->numeric)
+            diff = numcompare (ta, tb);
+          else if (key->general_numeric)
+            diff = general_numcompare (ta, tb);
+          else if (key->human_numeric)
+            diff = human_numcompare (ta, tb);
+          else if (key->month)
+            diff = getmonth (ta, NULL) - getmonth (tb, NULL);
+          else if (key->random)
+            diff = compare_random (ta, tlena, tb, tlenb);
+          else if (key->version)
+            diff = filevercmp (ta, tb);
+          else
+            {
+              /* Locale-dependent string sorting.  This is slower than
+                 C-locale sorting, which is implemented below.  */
+              if (tlena == 0)
+                diff = - NONZERO (tlenb);
+              else if (tlenb == 0)
+                diff = 1;
+              else
+                diff = xmemcoll0 (ta, tlena + 1, tb, tlenb + 1);
+            }
+
+          if (ignore || translate)
+            free (allocated);
+          else
+            {
+              ta[tlena] = enda;
+              tb[tlenb] = endb;
+            }
+        }
+      else if (ignore)
+        {
+#define CMP_WITH_IGNORE(A, B)						\
+  do									\
+    {									\
+          while (true)							\
+            {								\
+              while (texta < lima && ignore[to_uchar (*texta)])		\
+                ++texta;						\
+              while (textb < limb && ignore[to_uchar (*textb)])		\
+                ++textb;						\
+              if (! (texta < lima && textb < limb))			\
+                break;							\
+              diff = to_uchar (A) - to_uchar (B);			\
+              if (diff)							\
+                goto not_equal;						\
+              ++texta;							\
+              ++textb;							\
+            }								\
+                                                                        \
+          diff = (texta < lima) - (textb < limb);			\
+    }									\
+  while (0)
+
+          if (translate)
+            CMP_WITH_IGNORE (translate[to_uchar (*texta)],
+                             translate[to_uchar (*textb)]);
+          else
+            CMP_WITH_IGNORE (*texta, *textb);
+        }
+      else if (lena == 0)
+        diff = - NONZERO (lenb);
+      else if (lenb == 0)
+        goto greater;
+      else
+        {
+          if (translate)
+            {
+              while (texta < lima && textb < limb)
+                {
+                  diff = (to_uchar (translate[to_uchar (*texta++)])
+                          - to_uchar (translate[to_uchar (*textb++)]));
+                  if (diff)
+                    goto not_equal;
+                }
+            }
+          else
+            {
+              diff = memcmp (texta, textb, MIN (lena, lenb));
+              if (diff)
+                goto not_equal;
+            }
+          diff = lena < lenb ? -1 : lena != lenb;
+        }
+
+      if (diff)
+        goto not_equal;
+
+      key = key->next;
+      if (! key)
+        break;
+
+      /* Find the beginning and limit of the next field.  */
+      if (key->eword != SIZE_MAX)
+        lima = limfield (a, key), limb = limfield (b, key);
+      else
+        lima = a->text + a->length - 1, limb = b->text + b->length - 1;
+
+      if (key->sword != SIZE_MAX)
+        texta = begfield (a, key), textb = begfield (b, key);
+      else
+        {
+          texta = a->text, textb = b->text;
+          if (key->skipsblanks)
+            {
+              while (texta < lima && blanks[to_uchar (*texta)])
+                ++texta;
+              while (textb < limb && blanks[to_uchar (*textb)])
+                ++textb;
+            }
+        }
+    }
+
+  return 0;
+
+ greater:
+  diff = 1;
+ not_equal:
+  return key->reverse ? -diff : diff;
+}
+
+/* Compare two lines A and B, returning negative, zero, or positive
+   depending on whether A compares less than, equal to, or greater than B. */
+
+static int
+compare (struct line const *a, struct line const *b)
+{
+  int diff;
+  size_t alen, blen;
+
+  /* First try to compare on the specified keys (if any).
+     The only two cases with no key at all are unadorned sort,
+     and unadorned sort -r. */
+  if (keylist)
+    {
+      diff = keycompare (a, b);
+      if (diff || unique || stable)
+        return diff;
+    }
+
+  /* If the keys all compare equal (or no keys were specified)
+     fall through to the default comparison.  */
+  alen = a->length - 1, blen = b->length - 1;
+
+  if (alen == 0)
+    diff = - NONZERO (blen);
+  else if (blen == 0)
+    diff = 1;
+  else if (hard_LC_COLLATE)
+    {
+      /* Note xmemcoll0 is a performance enhancement as
+         it will not unconditionally write '\0' after the
+         passed in buffers, which was seen to give around
+         a 3% increase in performance for short lines.  */
+      diff = xmemcoll0 (a->text, alen + 1, b->text, blen + 1);
+    }
+  else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
+    diff = alen < blen ? -1 : alen != blen;
+
+  return reverse ? -diff : diff;
+}
+
+static void
+insertkey (struct keyfield *key_arg)
+{
+  struct keyfield **p;
+  struct keyfield *key = xmemdup (key_arg, sizeof *key);
+
+  for (p = &keylist; *p; p = &(*p)->next)
+    continue;
+  *p = key;
+  key->next = NULL;
+}
+
+/* Report a bad field specification SPEC, with extra info MSGID.  */
+
+static void badfieldspec (char const *, char const *)
+     ATTRIBUTE_NORETURN;
+static void
+badfieldspec (char const *spec, char const *msgid)
+{
+  die (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
+       _(msgid), quote (spec));
+}
+
+/* Parse the leading integer in STRING and store the resulting value
+   (which must fit into size_t) into *VAL.  Return the address of the
+   suffix after the integer.  If the value is too large, silently
+   substitute SIZE_MAX.  If MSGID is NULL, return NULL after
+   failure; otherwise, report MSGID and exit on failure.  */
+
+static char const *
+parse_field_count (char const *string, size_t *val, char const *msgid)
+{
+  char *suffix;
+  uintmax_t n;
+
+  switch (xstrtoumax (string, &suffix, 10, &n, ""))
+    {
+    case LONGINT_OK:
+    case LONGINT_INVALID_SUFFIX_CHAR:
+      *val = n;
+      if (*val == n)
+        break;
+      /* Fall through.  */
+    case LONGINT_OVERFLOW:
+    case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
+      *val = SIZE_MAX;
+      break;
+
+    case LONGINT_INVALID:
+      if (msgid)
+        die (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
+             _(msgid), quote (string));
+      return NULL;
+    }
+
+  return suffix;
+}
+
+/* Set the ordering options for KEY specified in S.
+   Return the address of the first character in S that
+   is not a valid ordering option.
+   BLANKTYPE is the kind of blanks that 'b' should skip. */
+
+static char *
+set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype)
+{
+  while (*s)
+    {
+      switch (*s)
+        {
+        case 'b':
+          if (blanktype == bl_start || blanktype == bl_both)
+            key->skipsblanks = true;
+          if (blanktype == bl_end || blanktype == bl_both)
+            key->skipeblanks = true;
+          break;
+        case 'd':
+          key->ignore = nondictionary;
+          break;
+        case 'f':
+          key->translate = fold_toupper;
+          break;
+        case 'g':
+          key->general_numeric = true;
+          break;
+        case 'h':
+          key->human_numeric = true;
+          break;
+        case 'i':
+          /* Option order should not matter, so don't let -i override
+             -d.  -d implies -i, but -i does not imply -d.  */
+          if (! key->ignore)
+            key->ignore = nonprinting;
+          break;
+        case 'M':
+          key->month = true;
+          break;
+        case 'n':
+          key->numeric = true;
+          break;
+        case 'R':
+          key->random = true;
+          break;
+        case 'r':
+          key->reverse = true;
+          break;
+        case 'V':
+          key->version = true;
+          break;
+        default:
+          return (char *) s;
+        }
+      ++s;
+    }
+  return (char *) s;
+}
+
+/* Initialize KEY.  */
+
+static struct keyfield *
+key_init (struct keyfield *key)
+{
+  memset (key, 0, sizeof *key);
+  key->eword = SIZE_MAX;
+  return key;
+}
+
+static void add_key(void)
+{
+  struct keyfield *key;
+  struct keyfield key_buf;  char const *s;
+  key = key_init (&key_buf);
+
+  /* Get POS1. */
+  s = parse_field_count (optarg, &key->sword,
+                         N_("invalid number at field start"));
+  if (! key->sword--)
+    {
+      /* Provoke with 'sort -k0' */
+      badfieldspec (optarg, N_("field number is zero"));
+    }
+  if (*s == '.')
+    {
+      s = parse_field_count (s + 1, &key->schar,
+                             N_("invalid number after '.'"));
+      if (! key->schar--)
+        {
+          /* Provoke with 'sort -k1.0' */
+          badfieldspec (optarg, N_("character offset is zero"));
+        }
+    }
+  if (! (key->sword || key->schar))
+    key->sword = SIZE_MAX;
+  s = set_ordering (s, key, bl_start);
+  if (*s != ',')
+    {
+      key->eword = SIZE_MAX;
+      key->echar = 0;
+    }
+  else
+    {
+      /* Get POS2. */
+      s = parse_field_count (s + 1, &key->eword,
+                             N_("invalid number after ','"));
+      if (! key->eword--)
+        {
+          /* Provoke with 'sort -k1,0' */
+          badfieldspec (optarg, N_("field number is zero"));
+        }
+      if (*s == '.')
+        {
+          s = parse_field_count (s + 1, &key->echar,
+                                 N_("invalid number after '.'"));
+        }
+      s = set_ordering (s, key, bl_end);
+    }
+  if (*s)
+    badfieldspec (optarg, N_("stray character in field spec"));
+  insertkey (key);
+}