sort: split off source lines useful for uniq, too

author: Erich Eckner <git@eckner.net> 2016-11-28 14:03:15 +0100
committer: Erich Eckner <git@eckner.net> 2017-02-16 13:23:48 +0100
commit: e74f109de104c1966d2d4863aedfb25eefe0358f (patch)
tree: 738ec2f7ae295019d346175a86ea85f7e13782e2 /src
parent: 1c27b56095b4a82be7d072baabc09262cb4227e5 (diff)
download: coreutils-e74f109de104c1966d2d4863aedfb25eefe0358f.tar.xz
2 files changed, 1094 insertions, 1086 deletions
diff --git a/src/sort-uniq-keyfuncs.c b/src/sort-uniq-keyfuncs.c
new file mode 100644
index 000000000..196045bc0
--- /dev/null
+++ b/src/sort-uniq-keyfuncs.c
@@ -0,0 +1,1091 @@
+#include "filevercmp.h"
+#include "md5.h"
+#include "strnumcmp.h"
+#define UCHAR_LIM (UCHAR_MAX + 1)
+
+#if HAVE_C99_STRTOLD
+# define long_double long double
+#else
+# define long_double double
+# undef strtold
+# define strtold strtod
+#endif
+
+/* Exit statuses.  */
+enum
+  {
+    /* POSIX says to exit with status 1 if invoked with -c and the
+       input is not properly sorted.  */
+    SORT_OUT_OF_ORDER = 1,
+
+    /* POSIX says any other irregular exit must exit with a status
+       code greater than 1.  */
+    SORT_FAILURE = 2
+  };
+
+/* The representation of the decimal point in the current locale.  */
+static int decimal_point;
+
+/* Thousands separator; if -1, then there isn't one.  */
+static int thousands_sep;
+
+/* Nonzero if the corresponding locales are hard.  */
+static bool hard_LC_COLLATE;
+
+#define NONZERO(x) ((x) != 0)
+
+/* The kind of blanks for '-b' to skip in various options. */
+enum blanktype { bl_start, bl_end, bl_both };
+
+/* Lines are held in core as counted strings. */
+struct line
+{
+  char *text;			/* Text of the line. */
+  size_t length;		/* Length including final newline. */
+  char *keybeg;			/* Start of first key. */
+  char *keylim;			/* Limit of first key. */
+};
+
+/* Sort key.  */
+struct keyfield
+{
+  size_t sword;			/* Zero-origin 'word' to start at. */
+  size_t schar;			/* Additional characters to skip. */
+  size_t eword;			/* Zero-origin last 'word' of key. */
+  size_t echar;			/* Additional characters in field. */
+  bool const *ignore;		/* Boolean array of characters to ignore. */
+  char const *translate;	/* Translation applied to characters. */
+  bool skipsblanks;		/* Skip leading blanks when finding start.  */
+  bool skipeblanks;		/* Skip leading blanks when finding end.  */
+  bool numeric;			/* Flag for numeric comparison.  Handle
+                                   strings of digits with optional decimal
+                                   point, but no exponential notation. */
+  bool random;			/* Sort by random hash of key.  */
+  bool general_numeric;		/* Flag for general, numeric comparison.
+                                   Handle numbers in exponential notation. */
+  bool human_numeric;		/* Flag for sorting by human readable
+                                   units with either SI or IEC prefixes. */
+  bool month;			/* Flag for comparison by month name. */
+  bool reverse;			/* Reverse the sense of comparison. */
+  bool version;			/* sort by version number */
+  bool traditional_used;	/* Traditional key option format is used. */
+  struct keyfield *next;	/* Next keyfield to try. */
+};
+
+struct month
+{
+  char const *name;
+  int val;
+};
+
+/* FIXME: None of these tables work with multibyte character sets.
+   Also, there are many other bugs when handling multibyte characters.
+   One way to fix this is to rewrite 'sort' to use wide characters
+   internally, but doing this with good performance is a bit
+   tricky.  */
+
+/* Table of blanks.  */
+static bool blanks[UCHAR_LIM];
+
+/* Table of non-printing characters. */
+static bool nonprinting[UCHAR_LIM];
+
+/* Table of non-dictionary characters (not letters, digits, or blanks). */
+static bool nondictionary[UCHAR_LIM];
+
+/* Translation table folding lower case to upper.  */
+static char fold_toupper[UCHAR_LIM];
+
+#define MONTHS_PER_YEAR 12
+
+/* Table mapping month names to integers.
+   Alphabetic order allows binary search. */
+static struct month monthtab[] =
+{
+  {"APR", 4},
+  {"AUG", 8},
+  {"DEC", 12},
+  {"FEB", 2},
+  {"JAN", 1},
+  {"JUL", 7},
+  {"JUN", 6},
+  {"MAR", 3},
+  {"MAY", 5},
+  {"NOV", 11},
+  {"OCT", 10},
+  {"SEP", 9}
+};
+
+/* Flag to reverse the order of all comparisons. */
+static bool reverse;
+
+/* Flag for stable sort.  This turns off the last ditch bytewise
+   comparison of lines, and instead leaves lines in the same order
+   they were read if all keys compare equal.  */
+static bool stable;
+
+/* If TAB has this value, blanks separate fields.  */
+enum { TAB_DEFAULT = CHAR_MAX + 1 };
+
+/* Tab character separating fields.  If TAB_DEFAULT, then fields are
+   separated by the empty string between a non-blank character and a blank
+   character. */
+static int tab = TAB_DEFAULT;
+
+/* Flag to remove consecutive duplicate lines from the output.
+   Only the last of a sequence of equal lines will be output. */
+static bool unique;
+
+/* List of key field comparisons to be tried.  */
+static struct keyfield *keylist;
+
+/* Return a pointer to the first character of the field specified
+   by KEY in LINE. */
+
+static char *
+begfield (struct line const *line, struct keyfield const *key)
+{
+  char *ptr = line->text, *lim = ptr + line->length - 1;
+  size_t sword = key->sword;
+  size_t schar = key->schar;
+
+  /* The leading field separator itself is included in a field when -t
+     is absent.  */
+
+  if (tab != TAB_DEFAULT)
+    while (ptr < lim && sword--)
+      {
+        while (ptr < lim && *ptr != tab)
+          ++ptr;
+        if (ptr < lim)
+          ++ptr;
+      }
+  else
+    while (ptr < lim && sword--)
+      {
+        while (ptr < lim && blanks[to_uchar (*ptr)])
+          ++ptr;
+        while (ptr < lim && !blanks[to_uchar (*ptr)])
+          ++ptr;
+      }
+
+  /* If we're ignoring leading blanks when computing the Start
+     of the field, skip past them here.  */
+  if (key->skipsblanks)
+    while (ptr < lim && blanks[to_uchar (*ptr)])
+      ++ptr;
+
+  /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
+  ptr = MIN (lim, ptr + schar);
+
+  return ptr;
+}
+
+/* Return the limit of (a pointer to the first character after) the field
+   in LINE specified by KEY. */
+
+static char *
+limfield (struct line const *line, struct keyfield const *key)
+{
+  char *ptr = line->text, *lim = ptr + line->length - 1;
+  size_t eword = key->eword, echar = key->echar;
+
+  if (echar == 0)
+    eword++; /* Skip all of end field.  */
+
+  /* Move PTR past EWORD fields or to one past the last byte on LINE,
+     whichever comes first.  If there are more than EWORD fields, leave
+     PTR pointing at the beginning of the field having zero-based index,
+     EWORD.  If a delimiter character was specified (via -t), then that
+     'beginning' is the first character following the delimiting TAB.
+     Otherwise, leave PTR pointing at the first 'blank' character after
+     the preceding field.  */
+  if (tab != TAB_DEFAULT)
+    while (ptr < lim && eword--)
+      {
+        while (ptr < lim && *ptr != tab)
+          ++ptr;
+        if (ptr < lim && (eword || echar))
+          ++ptr;
+      }
+  else
+    while (ptr < lim && eword--)
+      {
+        while (ptr < lim && blanks[to_uchar (*ptr)])
+          ++ptr;
+        while (ptr < lim && !blanks[to_uchar (*ptr)])
+          ++ptr;
+      }
+
+#ifdef POSIX_UNSPECIFIED
+  /* The following block of code makes GNU sort incompatible with
+     standard Unix sort, so it's ifdef'd out for now.
+     The POSIX spec isn't clear on how to interpret this.
+     FIXME: request clarification.
+
+     From: kwzh@gnu.ai.mit.edu (Karl Heuer)
+     Date: Thu, 30 May 96 12:20:41 -0400
+     [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
+
+     [...]I believe I've found another bug in 'sort'.
+
+     $ cat /tmp/sort.in
+     a b c 2 d
+     pq rs 1 t
+     $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
+     a b c 2 d
+     pq rs 1 t
+     $ /bin/sort -k1.7,1.7 </tmp/sort.in
+     pq rs 1 t
+     a b c 2 d
+
+     Unix sort produced the answer I expected: sort on the single character
+     in column 7.  GNU sort produced different results, because it disagrees
+     on the interpretation of the key-end spec "M.N".  Unix sort reads this
+     as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
+     "skip M-1 fields, then either N-1 characters or the rest of the current
+     field, whichever comes first".  This extra clause applies only to
+     key-ends, not key-starts.
+     */
+
+  /* Make LIM point to the end of (one byte past) the current field.  */
+  if (tab != TAB_DEFAULT)
+    {
+      char *newlim;
+      newlim = memchr (ptr, tab, lim - ptr);
+      if (newlim)
+        lim = newlim;
+    }
+  else
+    {
+      char *newlim;
+      newlim = ptr;
+      while (newlim < lim && blanks[to_uchar (*newlim)])
+        ++newlim;
+      while (newlim < lim && !blanks[to_uchar (*newlim)])
+        ++newlim;
+      lim = newlim;
+    }
+#endif
+
+  if (echar != 0) /* We need to skip over a portion of the end field.  */
+    {
+      /* If we're ignoring leading blanks when computing the End
+         of the field, skip past them here.  */
+      if (key->skipeblanks)
+        while (ptr < lim && blanks[to_uchar (*ptr)])
+          ++ptr;
+
+      /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
+      ptr = MIN (lim, ptr + echar);
+    }
+
+  return ptr;
+}
+
+/* Table that maps characters to order-of-magnitude values.  */
+static char const unit_order[UCHAR_LIM] =
+  {
+#if ! ('K' == 75 && 'M' == 77 && 'G' == 71 && 'T' == 84 && 'P' == 80 \
+     && 'E' == 69 && 'Z' == 90 && 'Y' == 89 && 'k' == 107)
+    /* This initializer syntax works on all C99 hosts.  For now, use
+       it only on non-ASCII hosts, to ease the pain of porting to
+       pre-C99 ASCII hosts.  */
+    ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
+    ['k']=1,
+#else
+    /* Generate the following table with this command:
+       perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
+       foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
+       |fmt  */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
+    0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif
+  };
+
+/* Traverse number given as *number consisting of digits, thousands_sep, and
+   decimal_point chars only.  Returns the highest digit found in the number,
+   or '\0' if no digit has been found.  Upon return *number points at the
+   character that immediately follows after the given number.  */
+static unsigned char
+traverse_raw_number (char const **number)
+{
+  char const *p = *number;
+  unsigned char ch;
+  unsigned char max_digit = '\0';
+  bool ends_with_thousands_sep = false;
+
+  /* Scan to end of number.
+     Decimals or separators not followed by digits stop the scan.
+     Numbers ending in decimals or separators are thus considered
+     to be lacking in units.
+     FIXME: add support for multibyte thousands_sep and decimal_point.  */
+
+  while (ISDIGIT (ch = *p++))
+    {
+      if (max_digit < ch)
+        max_digit = ch;
+
+      /* Allow to skip only one occurrence of thousands_sep to avoid finding
+         the unit in the next column in case thousands_sep matches as blank
+         and is used as column delimiter.  */
+      ends_with_thousands_sep = (*p == thousands_sep);
+      if (ends_with_thousands_sep)
+        ++p;
+    }
+
+  if (ends_with_thousands_sep)
+    {
+      /* thousands_sep not followed by digit is not allowed.  */
+      *number = p - 2;
+      return max_digit;
+    }
+
+  if (ch == decimal_point)
+    while (ISDIGIT (ch = *p++))
+      if (max_digit < ch)
+        max_digit = ch;
+
+  *number = p - 1;
+  return max_digit;
+}
+
+/* Return an integer that represents the order of magnitude of the
+   unit following the number.  The number may contain thousands
+   separators and a decimal point, but it may not contain leading blanks.
+   Negative numbers get negative orders; zero numbers have a zero order.  */
+
+static int _GL_ATTRIBUTE_PURE
+find_unit_order (char const *number)
+{
+  bool minus_sign = (*number == '-');
+  char const *p = number + minus_sign;
+  unsigned char max_digit = traverse_raw_number (&p);
+  if ('0' < max_digit)
+    {
+      unsigned char ch = *p;
+      int order = unit_order[ch];
+      return (minus_sign ? -order : order);
+    }
+  else
+    return 0;
+}
+
+/* Compare numbers A and B ending in units with SI or IEC prefixes
+       <none/unknown> < K/k < M < G < T < P < E < Z < Y  */
+
+static int
+human_numcompare (char const *a, char const *b)
+{
+  while (blanks[to_uchar (*a)])
+    a++;
+  while (blanks[to_uchar (*b)])
+    b++;
+
+  int diff = find_unit_order (a) - find_unit_order (b);
+  return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep));
+}
+
+/* Compare strings A and B as numbers without explicitly converting them to
+   machine numbers.  Comparatively slow for short strings, but asymptotically
+   hideously fast. */
+
+static int
+numcompare (char const *a, char const *b)
+{
+  while (blanks[to_uchar (*a)])
+    a++;
+  while (blanks[to_uchar (*b)])
+    b++;
+
+  return strnumcmp (a, b, decimal_point, thousands_sep);
+}
+
+/* Work around a problem whereby the long double value returned by glibc's
+   strtold ("NaN", ...) contains uninitialized bits: clear all bytes of
+   A and B before calling strtold.  FIXME: remove this function once
+   gnulib guarantees that strtold's result is always well defined.  */
+static int
+nan_compare (char const *sa, char const *sb)
+{
+  long_double a;
+  memset (&a, 0, sizeof a);
+  a = strtold (sa, NULL);
+
+  long_double b;
+  memset (&b, 0, sizeof b);
+  b = strtold (sb, NULL);
+
+  return memcmp (&a, &b, sizeof a);
+}
+
+static int
+general_numcompare (char const *sa, char const *sb)
+{
+  /* FIXME: maybe add option to try expensive FP conversion
+     only if A and B can't be compared more cheaply/accurately.  */
+
+  char *ea;
+  char *eb;
+  long_double a = strtold (sa, &ea);
+  long_double b = strtold (sb, &eb);
+
+  /* Put conversion errors at the start of the collating sequence.  */
+  if (sa == ea)
+    return sb == eb ? 0 : -1;
+  if (sb == eb)
+    return 1;
+
+  /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
+     conversion errors but before numbers; sort them by internal
+     bit-pattern, for lack of a more portable alternative.  */
+  return (a < b ? -1
+          : a > b ? 1
+          : a == b ? 0
+          : b == b ? -1
+          : a == a ? 1
+          : nan_compare (sa, sb));
+}
+
+/* Return an integer in 1..12 of the month name MONTH.
+   Return 0 if the name in S is not recognized.  */
+
+static int
+getmonth (char const *month, char **ea)
+{
+  size_t lo = 0;
+  size_t hi = MONTHS_PER_YEAR;
+
+  while (blanks[to_uchar (*month)])
+    month++;
+
+  do
+    {
+      size_t ix = (lo + hi) / 2;
+      char const *m = month;
+      char const *n = monthtab[ix].name;
+
+      for (;; m++, n++)
+        {
+          if (!*n)
+            {
+              if (ea)
+                *ea = (char *) m;
+              return monthtab[ix].val;
+            }
+          if (to_uchar (fold_toupper[to_uchar (*m)]) < to_uchar (*n))
+            {
+              hi = ix;
+              break;
+            }
+          else if (to_uchar (fold_toupper[to_uchar (*m)]) > to_uchar (*n))
+            {
+              lo = ix + 1;
+              break;
+            }
+        }
+    }
+  while (lo < hi);
+
+  return 0;
+}
+
+/* A randomly chosen MD5 state, used for random comparison.  */
+static struct md5_ctx random_md5_state;
+
+/* This is like strxfrm, except it reports any error and exits.  */
+
+static size_t
+xstrxfrm (char *restrict dest, char const *restrict src, size_t destsize)
+{
+  errno = 0;
+  size_t translated_size = strxfrm (dest, src, destsize);
+
+  if (errno)
+    {
+      error (0, errno, _("string transformation failed"));
+      error (0, 0, _("set LC_ALL='C' to work around the problem"));
+      die (SORT_FAILURE, 0,
+           _("the untransformed string was %s"),
+           quotearg_n_style (0, locale_quoting_style, src));
+    }
+
+  return translated_size;
+}
+
+/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
+   using one or more random hash functions.  TEXTA[LENA] and
+   TEXTB[LENB] must be zero.  */
+
+static int
+compare_random (char *restrict texta, size_t lena,
+                char *restrict textb, size_t lenb)
+{
+  /* XFRM_DIFF records the equivalent of memcmp on the transformed
+     data.  This is used to break ties if there is a checksum
+     collision, and this is good enough given the astronomically low
+     probability of a collision.  */
+  int xfrm_diff = 0;
+
+  char stackbuf[4000];
+  char *buf = stackbuf;
+  size_t bufsize = sizeof stackbuf;
+  void *allocated = NULL;
+  uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
+  struct md5_ctx s[2];
+  s[0] = s[1] = random_md5_state;
+
+  if (hard_LC_COLLATE)
+    {
+      char const *lima = texta + lena;
+      char const *limb = textb + lenb;
+
+      while (true)
+        {
+          /* Transform the text into the basis of comparison, so that byte
+             strings that would otherwise considered to be equal are
+             considered equal here even if their bytes differ.
+
+             Each time through this loop, transform one
+             null-terminated string's worth from TEXTA or from TEXTB
+             or both.  That way, there's no need to store the
+             transformation of the whole line, if it contains many
+             null-terminated strings.  */
+
+          /* Store the transformed data into a big-enough buffer.  */
+
+          /* A 3X size guess avoids the overhead of calling strxfrm
+             twice on typical implementations.  Don't worry about
+             size_t overflow, as the guess need not be correct.  */
+          size_t guess_bufsize = 3 * (lena + lenb) + 2;
+          if (bufsize < guess_bufsize)
+            {
+              bufsize = MAX (guess_bufsize, bufsize * 3 / 2);
+              free (allocated);
+              buf = allocated = malloc (bufsize);
+              if (! buf)
+                {
+                  buf = stackbuf;
+                  bufsize = sizeof stackbuf;
+                }
+            }
+
+          size_t sizea =
+            (texta < lima ? xstrxfrm (buf, texta, bufsize) + 1 : 0);
+          bool a_fits = sizea <= bufsize;
+          size_t sizeb =
+            (textb < limb
+             ? (xstrxfrm ((a_fits ? buf + sizea : NULL), textb,
+                          (a_fits ? bufsize - sizea : 0))
+                + 1)
+             : 0);
+
+          if (! (a_fits && sizea + sizeb <= bufsize))
+            {
+              bufsize = sizea + sizeb;
+              if (bufsize < SIZE_MAX / 3)
+                bufsize = bufsize * 3 / 2;
+              free (allocated);
+              buf = allocated = xmalloc (bufsize);
+              if (texta < lima)
+                strxfrm (buf, texta, sizea);
+              if (textb < limb)
+                strxfrm (buf + sizea, textb, sizeb);
+            }
+
+          /* Advance past NULs to the next part of each input string,
+             exiting the loop if both strings are exhausted.  When
+             exiting the loop, prepare to finish off the tiebreaker
+             comparison properly.  */
+          if (texta < lima)
+            texta += strlen (texta) + 1;
+          if (textb < limb)
+            textb += strlen (textb) + 1;
+          if (! (texta < lima || textb < limb))
+            {
+              lena = sizea; texta = buf;
+              lenb = sizeb; textb = buf + sizea;
+              break;
+            }
+
+          /* Accumulate the transformed data in the corresponding
+             checksums.  */
+          md5_process_bytes (buf, sizea, &s[0]);
+          md5_process_bytes (buf + sizea, sizeb, &s[1]);
+
+          /* Update the tiebreaker comparison of the transformed data.  */
+          if (! xfrm_diff)
+            {
+              xfrm_diff = memcmp (buf, buf + sizea, MIN (sizea, sizeb));
+              if (! xfrm_diff)
+                xfrm_diff = (sizea > sizeb) - (sizea < sizeb);
+            }
+        }
+    }
+
+  /* Compute and compare the checksums.  */
+  md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
+  md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
+  int diff = memcmp (dig[0], dig[1], sizeof dig[0]);
+
+  /* Fall back on the tiebreaker if the checksums collide.  */
+  if (! diff)
+    {
+      if (! xfrm_diff)
+        {
+          xfrm_diff = memcmp (texta, textb, MIN (lena, lenb));
+          if (! xfrm_diff)
+            xfrm_diff = (lena > lenb) - (lena < lenb);
+        }
+
+      diff = xfrm_diff;
+    }
+
+  free (allocated);
+
+  return diff;
+}
+
+/* Return true if KEY is a numeric key.  */
+
+static inline bool
+key_numeric (struct keyfield const *key)
+{
+  return key->numeric || key->general_numeric || key->human_numeric;
+}
+
+/* Compare two lines A and B trying every key in sequence until there
+   are no more keys or a difference is found. */
+
+static int
+keycompare (struct line const *a, struct line const *b)
+{
+  struct keyfield *key = keylist;
+
+  /* For the first iteration only, the key positions have been
+     precomputed for us. */
+  char *texta = a->keybeg;
+  char *textb = b->keybeg;
+  char *lima = a->keylim;
+  char *limb = b->keylim;
+
+  int diff;
+
+  while (true)
+    {
+      char const *translate = key->translate;
+      bool const *ignore = key->ignore;
+
+      /* Treat field ends before field starts as empty fields.  */
+      lima = MAX (texta, lima);
+      limb = MAX (textb, limb);
+
+      /* Find the lengths. */
+      size_t lena = lima - texta;
+      size_t lenb = limb - textb;
+
+      if (hard_LC_COLLATE || key_numeric (key)
+          || key->month || key->random || key->version)
+        {
+          char *ta;
+          char *tb;
+          size_t tlena;
+          size_t tlenb;
+
+          char enda IF_LINT (= 0);
+          char endb IF_LINT (= 0);
+          void *allocated IF_LINT (= NULL);
+          char stackbuf[4000];
+
+          if (ignore || translate)
+            {
+              /* Compute with copies of the keys, which are the result of
+                 translating or ignoring characters, and which need their
+                 own storage.  */
+
+              size_t i;
+
+              /* Allocate space for copies.  */
+              size_t size = lena + 1 + lenb + 1;
+              if (size <= sizeof stackbuf)
+                ta = stackbuf, allocated = NULL;
+              else
+                ta = allocated = xmalloc (size);
+              tb = ta + lena + 1;
+
+              /* Put into each copy a version of the key in which the
+                 requested characters are ignored or translated.  */
+              for (tlena = i = 0; i < lena; i++)
+                if (! (ignore && ignore[to_uchar (texta[i])]))
+                  ta[tlena++] = (translate
+                                 ? translate[to_uchar (texta[i])]
+                                 : texta[i]);
+              ta[tlena] = '\0';
+
+              for (tlenb = i = 0; i < lenb; i++)
+                if (! (ignore && ignore[to_uchar (textb[i])]))
+                  tb[tlenb++] = (translate
+                                 ? translate[to_uchar (textb[i])]
+                                 : textb[i]);
+              tb[tlenb] = '\0';
+            }
+          else
+            {
+              /* Use the keys in-place, temporarily null-terminated.  */
+              ta = texta; tlena = lena; enda = ta[tlena]; ta[tlena] = '\0';
+              tb = textb; tlenb = lenb; endb = tb[tlenb]; tb[tlenb] = '\0';
+            }
+
+          if (key->numeric)
+            diff = numcompare (ta, tb);
+          else if (key->general_numeric)
+            diff = general_numcompare (ta, tb);
+          else if (key->human_numeric)
+            diff = human_numcompare (ta, tb);
+          else if (key->month)
+            diff = getmonth (ta, NULL) - getmonth (tb, NULL);
+          else if (key->random)
+            diff = compare_random (ta, tlena, tb, tlenb);
+          else if (key->version)
+            diff = filevercmp (ta, tb);
+          else
+            {
+              /* Locale-dependent string sorting.  This is slower than
+                 C-locale sorting, which is implemented below.  */
+              if (tlena == 0)
+                diff = - NONZERO (tlenb);
+              else if (tlenb == 0)
+                diff = 1;
+              else
+                diff = xmemcoll0 (ta, tlena + 1, tb, tlenb + 1);
+            }
+
+          if (ignore || translate)
+            free (allocated);
+          else
+            {
+              ta[tlena] = enda;
+              tb[tlenb] = endb;
+            }
+        }
+      else if (ignore)
+        {
+#define CMP_WITH_IGNORE(A, B)						\
+  do									\
+    {									\
+          while (true)							\
+            {								\
+              while (texta < lima && ignore[to_uchar (*texta)])		\
+                ++texta;						\
+              while (textb < limb && ignore[to_uchar (*textb)])		\
+                ++textb;						\
+              if (! (texta < lima && textb < limb))			\
+                break;							\
+              diff = to_uchar (A) - to_uchar (B);			\
+              if (diff)							\
+                goto not_equal;						\
+              ++texta;							\
+              ++textb;							\
+            }								\
+                                                                        \
+          diff = (texta < lima) - (textb < limb);			\
+    }									\
+  while (0)
+
+          if (translate)
+            CMP_WITH_IGNORE (translate[to_uchar (*texta)],
+                             translate[to_uchar (*textb)]);
+          else
+            CMP_WITH_IGNORE (*texta, *textb);
+        }
+      else if (lena == 0)
+        diff = - NONZERO (lenb);
+      else if (lenb == 0)
+        goto greater;
+      else
+        {
+          if (translate)
+            {
+              while (texta < lima && textb < limb)
+                {
+                  diff = (to_uchar (translate[to_uchar (*texta++)])
+                          - to_uchar (translate[to_uchar (*textb++)]));
+                  if (diff)
+                    goto not_equal;
+                }
+            }
+          else
+            {
+              diff = memcmp (texta, textb, MIN (lena, lenb));
+              if (diff)
+                goto not_equal;
+            }
+          diff = lena < lenb ? -1 : lena != lenb;
+        }
+
+      if (diff)
+        goto not_equal;
+
+      key = key->next;
+      if (! key)
+        break;
+
+      /* Find the beginning and limit of the next field.  */
+      if (key->eword != SIZE_MAX)
+        lima = limfield (a, key), limb = limfield (b, key);
+      else
+        lima = a->text + a->length - 1, limb = b->text + b->length - 1;
+
+      if (key->sword != SIZE_MAX)
+        texta = begfield (a, key), textb = begfield (b, key);
+      else
+        {
+          texta = a->text, textb = b->text;
+          if (key->skipsblanks)
+            {
+              while (texta < lima && blanks[to_uchar (*texta)])
+                ++texta;
+              while (textb < limb && blanks[to_uchar (*textb)])
+                ++textb;
+            }
+        }
+    }
+
+  return 0;
+
+ greater:
+  diff = 1;
+ not_equal:
+  return key->reverse ? -diff : diff;
+}
+
+/* Compare two lines A and B, returning negative, zero, or positive
+   depending on whether A compares less than, equal to, or greater than B. */
+
+static int
+compare (struct line const *a, struct line const *b)
+{
+  int diff;
+  size_t alen, blen;
+
+  /* First try to compare on the specified keys (if any).
+     The only two cases with no key at all are unadorned sort,
+     and unadorned sort -r. */
+  if (keylist)
+    {
+      diff = keycompare (a, b);
+      if (diff || unique || stable)
+        return diff;
+    }
+
+  /* If the keys all compare equal (or no keys were specified)
+     fall through to the default comparison.  */
+  alen = a->length - 1, blen = b->length - 1;
+
+  if (alen == 0)
+    diff = - NONZERO (blen);
+  else if (blen == 0)
+    diff = 1;
+  else if (hard_LC_COLLATE)
+    {
+      /* Note xmemcoll0 is a performance enhancement as
+         it will not unconditionally write '\0' after the
+         passed in buffers, which was seen to give around
+         a 3% increase in performance for short lines.  */
+      diff = xmemcoll0 (a->text, alen + 1, b->text, blen + 1);
+    }
+  else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
+    diff = alen < blen ? -1 : alen != blen;
+
+  return reverse ? -diff : diff;
+}
+
+static void
+insertkey (struct keyfield *key_arg)
+{
+  struct keyfield **p;
+  struct keyfield *key = xmemdup (key_arg, sizeof *key);
+
+  for (p = &keylist; *p; p = &(*p)->next)
+    continue;
+  *p = key;
+  key->next = NULL;
+}
+
+/* Report a bad field specification SPEC, with extra info MSGID.  */
+
+static void badfieldspec (char const *, char const *)
+     ATTRIBUTE_NORETURN;
+static void
+badfieldspec (char const *spec, char const *msgid)
+{
+  die (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
+       _(msgid), quote (spec));
+}
+
+/* Parse the leading integer in STRING and store the resulting value
+   (which must fit into size_t) into *VAL.  Return the address of the
+   suffix after the integer.  If the value is too large, silently
+   substitute SIZE_MAX.  If MSGID is NULL, return NULL after
+   failure; otherwise, report MSGID and exit on failure.  */
+
+static char const *
+parse_field_count (char const *string, size_t *val, char const *msgid)
+{
+  char *suffix;
+  uintmax_t n;
+
+  switch (xstrtoumax (string, &suffix, 10, &n, ""))
+    {
+    case LONGINT_OK:
+    case LONGINT_INVALID_SUFFIX_CHAR:
+      *val = n;
+      if (*val == n)
+        break;
+      /* Fall through.  */
+    case LONGINT_OVERFLOW:
+    case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
+      *val = SIZE_MAX;
+      break;
+
+    case LONGINT_INVALID:
+      if (msgid)
+        die (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
+             _(msgid), quote (string));
+      return NULL;
+    }
+
+  return suffix;
+}
+
+/* Set the ordering options for KEY specified in S.
+   Return the address of the first character in S that
+   is not a valid ordering option.
+   BLANKTYPE is the kind of blanks that 'b' should skip. */
+
+static char *
+set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype)
+{
+  while (*s)
+    {
+      switch (*s)
+        {
+        case 'b':
+          if (blanktype == bl_start || blanktype == bl_both)
+            key->skipsblanks = true;
+          if (blanktype == bl_end || blanktype == bl_both)
+            key->skipeblanks = true;
+          break;
+        case 'd':
+          key->ignore = nondictionary;
+          break;
+        case 'f':
+          key->translate = fold_toupper;
+          break;
+        case 'g':
+          key->general_numeric = true;
+          break;
+        case 'h':
+          key->human_numeric = true;
+          break;
+        case 'i':
+          /* Option order should not matter, so don't let -i override
+             -d.  -d implies -i, but -i does not imply -d.  */
+          if (! key->ignore)
+            key->ignore = nonprinting;
+          break;
+        case 'M':
+          key->month = true;
+          break;
+        case 'n':
+          key->numeric = true;
+          break;
+        case 'R':
+          key->random = true;
+          break;
+        case 'r':
+          key->reverse = true;
+          break;
+        case 'V':
+          key->version = true;
+          break;
+        default:
+          return (char *) s;
+        }
+      ++s;
+    }
+  return (char *) s;
+}
+
+/* Initialize KEY.  */
+
+static struct keyfield *
+key_init (struct keyfield *key)
+{
+  memset (key, 0, sizeof *key);
+  key->eword = SIZE_MAX;
+  return key;
+}
+
+static void add_key(void)
+{
+  struct keyfield *key;
+  struct keyfield key_buf;  char const *s;
+  key = key_init (&key_buf);
+
+  /* Get POS1. */
+  s = parse_field_count (optarg, &key->sword,
+                         N_("invalid number at field start"));
+  if (! key->sword--)
+    {
+      /* Provoke with 'sort -k0' */
+      badfieldspec (optarg, N_("field number is zero"));
+    }
+  if (*s == '.')
+    {
+      s = parse_field_count (s + 1, &key->schar,
+                             N_("invalid number after '.'"));
+      if (! key->schar--)
+        {
+          /* Provoke with 'sort -k1.0' */
+          badfieldspec (optarg, N_("character offset is zero"));
+        }
+    }
+  if (! (key->sword || key->schar))
+    key->sword = SIZE_MAX;
+  s = set_ordering (s, key, bl_start);
+  if (*s != ',')
+    {
+      key->eword = SIZE_MAX;
+      key->echar = 0;
+    }
+  else
+    {
+      /* Get POS2. */
+      s = parse_field_count (s + 1, &key->eword,
+                             N_("invalid number after ','"));
+      if (! key->eword--)
+        {
+          /* Provoke with 'sort -k1,0' */
+          badfieldspec (optarg, N_("field number is zero"));
+        }
+      if (*s == '.')
+        {
+          s = parse_field_count (s + 1, &key->echar,
+                                 N_("invalid number after '.'"));
+        }
+      s = set_ordering (s, key, bl_end);
+    }
+  if (*s)
+    badfieldspec (optarg, N_("stray character in field spec"));
+  insertkey (key);
+}
diff --git a/src/sort.c b/src/sort.c
index 94315d2a1..6f28b63ae 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -34,13 +34,11 @@
 #include "die.h"
 #include "error.h"
 #include "fadvise.h"
-#include "filevercmp.h"
 #include "flexmember.h"
 #include "hard-locale.h"
 #include "hash.h"
 #include "heap.h"
 #include "ignore-value.h"
-#include "md5.h"
 #include "mbswidth.h"
 #include "nproc.h"
 #include "physmem.h"
@@ -50,11 +48,12 @@
 #include "readtokens0.h"
 #include "stdio--.h"
 #include "stdlib--.h"
-#include "strnumcmp.h"
 #include "xmemcoll.h"
 #include "xnanosleep.h"
 #include "xstrtol.h"
 
+#include "sort-uniq-keyfuncs.c"
+
 #ifndef RLIMIT_DATA
 struct rlimit { size_t rlim_cur; };
 # define getrlimit(Resource, Rlp) (-1)
@@ -90,16 +89,6 @@ struct rlimit { size_t rlim_cur; };
 # define OPEN_MAX 20
 #endif
 
-#define UCHAR_LIM (UCHAR_MAX + 1)
-
-#if HAVE_C99_STRTOLD
-# define long_double long double
-#else
-# define long_double double
-# undef strtold
-# define strtold strtod
-#endif
-
 #ifndef DEFAULT_TMPDIR
 # define DEFAULT_TMPDIR "/tmp"
 #endif
@@ -124,18 +113,6 @@ verify (4 <= SUBTHREAD_LINES_HEURISTIC);
    diminishing performance gains.  */
 enum { DEFAULT_MAX_THREADS = 8 };
 
-/* Exit statuses.  */
-enum
-  {
-    /* POSIX says to exit with status 1 if invoked with -c and the
-       input is not properly sorted.  */
-    SORT_OUT_OF_ORDER = 1,
-
-    /* POSIX says any other irregular exit must exit with a status
-       code greater than 1.  */
-    SORT_FAILURE = 2
-  };
-
 enum
   {
     /* The number of times we should try to fork a compression process
@@ -159,35 +136,12 @@ enum
     MERGE_ROOT = 1
   };
 
-/* The representation of the decimal point in the current locale.  */
-static int decimal_point;
-
-/* Thousands separator; if -1, then there isn't one.  */
-static int thousands_sep;
-
-/* Nonzero if the corresponding locales are hard.  */
-static bool hard_LC_COLLATE;
 #if HAVE_NL_LANGINFO
 static bool hard_LC_TIME;
 #endif
-
-#define NONZERO(x) ((x) != 0)
-
-/* The kind of blanks for '-b' to skip in various options. */
-enum blanktype { bl_start, bl_end, bl_both };
-
 /* The character marking end of line. Default to \n. */
 static char eolchar = '\n';
 
-/* Lines are held in core as counted strings. */
-struct line
-{
-  char *text;			/* Text of the line. */
-  size_t length;		/* Length including final newline. */
-  char *keybeg;			/* Start of first key. */
-  char *keylim;			/* Limit of first key. */
-};
-
 /* Input buffers. */
 struct buffer
 {
@@ -204,38 +158,6 @@ struct buffer
   bool eof;			/* An EOF has been read.  */
 };
 
-/* Sort key.  */
-struct keyfield
-{
-  size_t sword;			/* Zero-origin 'word' to start at. */
-  size_t schar;			/* Additional characters to skip. */
-  size_t eword;			/* Zero-origin last 'word' of key. */
-  size_t echar;			/* Additional characters in field. */
-  bool const *ignore;		/* Boolean array of characters to ignore. */
-  char const *translate;	/* Translation applied to characters. */
-  bool skipsblanks;		/* Skip leading blanks when finding start.  */
-  bool skipeblanks;		/* Skip leading blanks when finding end.  */
-  bool numeric;			/* Flag for numeric comparison.  Handle
-                                   strings of digits with optional decimal
-                                   point, but no exponential notation. */
-  bool random;			/* Sort by random hash of key.  */
-  bool general_numeric;		/* Flag for general, numeric comparison.
-                                   Handle numbers in exponential notation. */
-  bool human_numeric;		/* Flag for sorting by human readable
-                                   units with either SI or IEC prefixes. */
-  bool month;			/* Flag for comparison by month name. */
-  bool reverse;			/* Reverse the sense of comparison. */
-  bool version;			/* sort by version number */
-  bool traditional_used;	/* Traditional key option format is used. */
-  struct keyfield *next;	/* Next keyfield to try. */
-};
-
-struct month
-{
-  char const *name;
-  int val;
-};
-
 /* Binary merge tree node. */
 struct merge_node
 {
@@ -266,44 +188,6 @@ struct merge_node_queue
 /* Used to implement --unique (-u).  */
 static struct line saved_line;
 
-/* FIXME: None of these tables work with multibyte character sets.
-   Also, there are many other bugs when handling multibyte characters.
-   One way to fix this is to rewrite 'sort' to use wide characters
-   internally, but doing this with good performance is a bit
-   tricky.  */
-
-/* Table of blanks.  */
-static bool blanks[UCHAR_LIM];
-
-/* Table of non-printing characters. */
-static bool nonprinting[UCHAR_LIM];
-
-/* Table of non-dictionary characters (not letters, digits, or blanks). */
-static bool nondictionary[UCHAR_LIM];
-
-/* Translation table folding lower case to upper.  */
-static char fold_toupper[UCHAR_LIM];
-
-#define MONTHS_PER_YEAR 12
-
-/* Table mapping month names to integers.
-   Alphabetic order allows binary search. */
-static struct month monthtab[] =
-{
-  {"APR", 4},
-  {"AUG", 8},
-  {"DEC", 12},
-  {"FEB", 2},
-  {"JAN", 1},
-  {"JUL", 7},
-  {"JUN", 6},
-  {"MAR", 3},
-  {"MAY", 5},
-  {"NOV", 11},
-  {"OCT", 10},
-  {"SEP", 9}
-};
-
 /* During the merge phase, the number of files to merge at once. */
 #define NMERGE_DEFAULT 16
 
@@ -338,32 +222,9 @@ static size_t temp_dir_count;
 /* Number of allocated slots in temp_dirs.  */
 static size_t temp_dir_alloc;
 
-/* Flag to reverse the order of all comparisons. */
-static bool reverse;
-
-/* Flag for stable sort.  This turns off the last ditch bytewise
-   comparison of lines, and instead leaves lines in the same order
-   they were read if all keys compare equal.  */
-static bool stable;
-
-/* If TAB has this value, blanks separate fields.  */
-enum { TAB_DEFAULT = CHAR_MAX + 1 };
-
-/* Tab character separating fields.  If TAB_DEFAULT, then fields are
-   separated by the empty string between a non-blank character and a blank
-   character. */
-static int tab = TAB_DEFAULT;
-
-/* Flag to remove consecutive duplicate lines from the output.
-   Only the last of a sequence of equal lines will be output. */
-static bool unique;
-
 /* Nonzero if any of the input files are the standard input. */
 static bool have_read_stdin;
 
-/* List of key field comparisons to be tried.  */
-static struct keyfield *keylist;
-
 /* Program used to (de)compress temp files.  Must accept -d.  */
 static char const *compress_program;
 
@@ -1593,150 +1454,6 @@ buffer_linelim (struct buffer const *buf)
   return linelim;
 }
 
-/* Return a pointer to the first character of the field specified
-   by KEY in LINE. */
-
-static char *
-begfield (struct line const *line, struct keyfield const *key)
-{
-  char *ptr = line->text, *lim = ptr + line->length - 1;
-  size_t sword = key->sword;
-  size_t schar = key->schar;
-
-  /* The leading field separator itself is included in a field when -t
-     is absent.  */
-
-  if (tab != TAB_DEFAULT)
-    while (ptr < lim && sword--)
-      {
-        while (ptr < lim && *ptr != tab)
-          ++ptr;
-        if (ptr < lim)
-          ++ptr;
-      }
-  else
-    while (ptr < lim && sword--)
-      {
-        while (ptr < lim && blanks[to_uchar (*ptr)])
-          ++ptr;
-        while (ptr < lim && !blanks[to_uchar (*ptr)])
-          ++ptr;
-      }
-
-  /* If we're ignoring leading blanks when computing the Start
-     of the field, skip past them here.  */
-  if (key->skipsblanks)
-    while (ptr < lim && blanks[to_uchar (*ptr)])
-      ++ptr;
-
-  /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
-  ptr = MIN (lim, ptr + schar);
-
-  return ptr;
-}
-
-/* Return the limit of (a pointer to the first character after) the field
-   in LINE specified by KEY. */
-
-static char *
-limfield (struct line const *line, struct keyfield const *key)
-{
-  char *ptr = line->text, *lim = ptr + line->length - 1;
-  size_t eword = key->eword, echar = key->echar;
-
-  if (echar == 0)
-    eword++; /* Skip all of end field.  */
-
-  /* Move PTR past EWORD fields or to one past the last byte on LINE,
-     whichever comes first.  If there are more than EWORD fields, leave
-     PTR pointing at the beginning of the field having zero-based index,
-     EWORD.  If a delimiter character was specified (via -t), then that
-     'beginning' is the first character following the delimiting TAB.
-     Otherwise, leave PTR pointing at the first 'blank' character after
-     the preceding field.  */
-  if (tab != TAB_DEFAULT)
-    while (ptr < lim && eword--)
-      {
-        while (ptr < lim && *ptr != tab)
-          ++ptr;
-        if (ptr < lim && (eword || echar))
-          ++ptr;
-      }
-  else
-    while (ptr < lim && eword--)
-      {
-        while (ptr < lim && blanks[to_uchar (*ptr)])
-          ++ptr;
-        while (ptr < lim && !blanks[to_uchar (*ptr)])
-          ++ptr;
-      }
-
-#ifdef POSIX_UNSPECIFIED
-  /* The following block of code makes GNU sort incompatible with
-     standard Unix sort, so it's ifdef'd out for now.
-     The POSIX spec isn't clear on how to interpret this.
-     FIXME: request clarification.
-
-     From: kwzh@gnu.ai.mit.edu (Karl Heuer)
-     Date: Thu, 30 May 96 12:20:41 -0400
-     [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
-
-     [...]I believe I've found another bug in 'sort'.
-
-     $ cat /tmp/sort.in
-     a b c 2 d
-     pq rs 1 t
-     $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
-     a b c 2 d
-     pq rs 1 t
-     $ /bin/sort -k1.7,1.7 </tmp/sort.in
-     pq rs 1 t
-     a b c 2 d
-
-     Unix sort produced the answer I expected: sort on the single character
-     in column 7.  GNU sort produced different results, because it disagrees
-     on the interpretation of the key-end spec "M.N".  Unix sort reads this
-     as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
-     "skip M-1 fields, then either N-1 characters or the rest of the current
-     field, whichever comes first".  This extra clause applies only to
-     key-ends, not key-starts.
-     */
-
-  /* Make LIM point to the end of (one byte past) the current field.  */
-  if (tab != TAB_DEFAULT)
-    {
-      char *newlim;
-      newlim = memchr (ptr, tab, lim - ptr);
-      if (newlim)
-        lim = newlim;
-    }
-  else
-    {
-      char *newlim;
-      newlim = ptr;
-      while (newlim < lim && blanks[to_uchar (*newlim)])
-        ++newlim;
-      while (newlim < lim && !blanks[to_uchar (*newlim)])
-        ++newlim;
-      lim = newlim;
-    }
-#endif
-
-  if (echar != 0) /* We need to skip over a portion of the end field.  */
-    {
-      /* If we're ignoring leading blanks when computing the End
-         of the field, skip past them here.  */
-      if (key->skipeblanks)
-        while (ptr < lim && blanks[to_uchar (*ptr)])
-          ++ptr;
-
-      /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
-      ptr = MIN (lim, ptr + echar);
-    }
-
-  return ptr;
-}
-
 /* Fill BUF reading from FP, moving buf->left bytes from the end
    of buf->buf to the beginning first.  If EOF is reached and the
    file wasn't terminated by a newline, supply one.  Set up BUF's line
@@ -1857,225 +1574,6 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
     }
 }
 
-/* Table that maps characters to order-of-magnitude values.  */
-static char const unit_order[UCHAR_LIM] =
-  {
-#if ! ('K' == 75 && 'M' == 77 && 'G' == 71 && 'T' == 84 && 'P' == 80 \
-     && 'E' == 69 && 'Z' == 90 && 'Y' == 89 && 'k' == 107)
-    /* This initializer syntax works on all C99 hosts.  For now, use
-       it only on non-ASCII hosts, to ease the pain of porting to
-       pre-C99 ASCII hosts.  */
-    ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
-    ['k']=1,
-#else
-    /* Generate the following table with this command:
-       perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
-       foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
-       |fmt  */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
-    0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-#endif
-  };
-
-/* Traverse number given as *number consisting of digits, thousands_sep, and
-   decimal_point chars only.  Returns the highest digit found in the number,
-   or '\0' if no digit has been found.  Upon return *number points at the
-   character that immediately follows after the given number.  */
-static unsigned char
-traverse_raw_number (char const **number)
-{
-  char const *p = *number;
-  unsigned char ch;
-  unsigned char max_digit = '\0';
-  bool ends_with_thousands_sep = false;
-
-  /* Scan to end of number.
-     Decimals or separators not followed by digits stop the scan.
-     Numbers ending in decimals or separators are thus considered
-     to be lacking in units.
-     FIXME: add support for multibyte thousands_sep and decimal_point.  */
-
-  while (ISDIGIT (ch = *p++))
-    {
-      if (max_digit < ch)
-        max_digit = ch;
-
-      /* Allow to skip only one occurrence of thousands_sep to avoid finding
-         the unit in the next column in case thousands_sep matches as blank
-         and is used as column delimiter.  */
-      ends_with_thousands_sep = (*p == thousands_sep);
-      if (ends_with_thousands_sep)
-        ++p;
-    }
-
-  if (ends_with_thousands_sep)
-    {
-      /* thousands_sep not followed by digit is not allowed.  */
-      *number = p - 2;
-      return max_digit;
-    }
-
-  if (ch == decimal_point)
-    while (ISDIGIT (ch = *p++))
-      if (max_digit < ch)
-        max_digit = ch;
-
-  *number = p - 1;
-  return max_digit;
-}
-
-/* Return an integer that represents the order of magnitude of the
-   unit following the number.  The number may contain thousands
-   separators and a decimal point, but it may not contain leading blanks.
-   Negative numbers get negative orders; zero numbers have a zero order.  */
-
-static int _GL_ATTRIBUTE_PURE
-find_unit_order (char const *number)
-{
-  bool minus_sign = (*number == '-');
-  char const *p = number + minus_sign;
-  unsigned char max_digit = traverse_raw_number (&p);
-  if ('0' < max_digit)
-    {
-      unsigned char ch = *p;
-      int order = unit_order[ch];
-      return (minus_sign ? -order : order);
-    }
-  else
-    return 0;
-}
-
-/* Compare numbers A and B ending in units with SI or IEC prefixes
-       <none/unknown> < K/k < M < G < T < P < E < Z < Y  */
-
-static int
-human_numcompare (char const *a, char const *b)
-{
-  while (blanks[to_uchar (*a)])
-    a++;
-  while (blanks[to_uchar (*b)])
-    b++;
-
-  int diff = find_unit_order (a) - find_unit_order (b);
-  return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep));
-}
-
-/* Compare strings A and B as numbers without explicitly converting them to
-   machine numbers.  Comparatively slow for short strings, but asymptotically
-   hideously fast. */
-
-static int
-numcompare (char const *a, char const *b)
-{
-  while (blanks[to_uchar (*a)])
-    a++;
-  while (blanks[to_uchar (*b)])
-    b++;
-
-  return strnumcmp (a, b, decimal_point, thousands_sep);
-}
-
-/* Work around a problem whereby the long double value returned by glibc's
-   strtold ("NaN", ...) contains uninitialized bits: clear all bytes of
-   A and B before calling strtold.  FIXME: remove this function once
-   gnulib guarantees that strtold's result is always well defined.  */
-static int
-nan_compare (char const *sa, char const *sb)
-{
-  long_double a;
-  memset (&a, 0, sizeof a);
-  a = strtold (sa, NULL);
-
-  long_double b;
-  memset (&b, 0, sizeof b);
-  b = strtold (sb, NULL);
-
-  return memcmp (&a, &b, sizeof a);
-}
-
-static int
-general_numcompare (char const *sa, char const *sb)
-{
-  /* FIXME: maybe add option to try expensive FP conversion
-     only if A and B can't be compared more cheaply/accurately.  */
-
-  char *ea;
-  char *eb;
-  long_double a = strtold (sa, &ea);
-  long_double b = strtold (sb, &eb);
-
-  /* Put conversion errors at the start of the collating sequence.  */
-  if (sa == ea)
-    return sb == eb ? 0 : -1;
-  if (sb == eb)
-    return 1;
-
-  /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
-     conversion errors but before numbers; sort them by internal
-     bit-pattern, for lack of a more portable alternative.  */
-  return (a < b ? -1
-          : a > b ? 1
-          : a == b ? 0
-          : b == b ? -1
-          : a == a ? 1
-          : nan_compare (sa, sb));
-}
-
-/* Return an integer in 1..12 of the month name MONTH.
-   Return 0 if the name in S is not recognized.  */
-
-static int
-getmonth (char const *month, char **ea)
-{
-  size_t lo = 0;
-  size_t hi = MONTHS_PER_YEAR;
-
-  while (blanks[to_uchar (*month)])
-    month++;
-
-  do
-    {
-      size_t ix = (lo + hi) / 2;
-      char const *m = month;
-      char const *n = monthtab[ix].name;
-
-      for (;; m++, n++)
-        {
-          if (!*n)
-            {
-              if (ea)
-                *ea = (char *) m;
-              return monthtab[ix].val;
-            }
-          if (to_uchar (fold_toupper[to_uchar (*m)]) < to_uchar (*n))
-            {
-              hi = ix;
-              break;
-            }
-          else if (to_uchar (fold_toupper[to_uchar (*m)]) > to_uchar (*n))
-            {
-              lo = ix + 1;
-              break;
-            }
-        }
-    }
-  while (lo < hi);
-
-  return 0;
-}
-
-/* A randomly chosen MD5 state, used for random comparison.  */
-static struct md5_ctx random_md5_state;
-
 /* Initialize the randomly chosen MD5 state.  */
 
 static void
@@ -2092,159 +1590,6 @@ random_md5_state_init (char const *random_source)
   md5_process_bytes (buf, sizeof buf, &random_md5_state);
 }
 
-/* This is like strxfrm, except it reports any error and exits.  */
-
-static size_t
-xstrxfrm (char *restrict dest, char const *restrict src, size_t destsize)
-{
-  errno = 0;
-  size_t translated_size = strxfrm (dest, src, destsize);
-
-  if (errno)
-    {
-      error (0, errno, _("string transformation failed"));
-      error (0, 0, _("set LC_ALL='C' to work around the problem"));
-      die (SORT_FAILURE, 0,
-           _("the untransformed string was %s"),
-           quotearg_n_style (0, locale_quoting_style, src));
-    }
-
-  return translated_size;
-}
-
-/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
-   using one or more random hash functions.  TEXTA[LENA] and
-   TEXTB[LENB] must be zero.  */
-
-static int
-compare_random (char *restrict texta, size_t lena,
-                char *restrict textb, size_t lenb)
-{
-  /* XFRM_DIFF records the equivalent of memcmp on the transformed
-     data.  This is used to break ties if there is a checksum
-     collision, and this is good enough given the astronomically low
-     probability of a collision.  */
-  int xfrm_diff = 0;
-
-  char stackbuf[4000];
-  char *buf = stackbuf;
-  size_t bufsize = sizeof stackbuf;
-  void *allocated = NULL;
-  uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
-  struct md5_ctx s[2];
-  s[0] = s[1] = random_md5_state;
-
-  if (hard_LC_COLLATE)
-    {
-      char const *lima = texta + lena;
-      char const *limb = textb + lenb;
-
-      while (true)
-        {
-          /* Transform the text into the basis of comparison, so that byte
-             strings that would otherwise considered to be equal are
-             considered equal here even if their bytes differ.
-
-             Each time through this loop, transform one
-             null-terminated string's worth from TEXTA or from TEXTB
-             or both.  That way, there's no need to store the
-             transformation of the whole line, if it contains many
-             null-terminated strings.  */
-
-          /* Store the transformed data into a big-enough buffer.  */
-
-          /* A 3X size guess avoids the overhead of calling strxfrm
-             twice on typical implementations.  Don't worry about
-             size_t overflow, as the guess need not be correct.  */
-          size_t guess_bufsize = 3 * (lena + lenb) + 2;
-          if (bufsize < guess_bufsize)
-            {
-              bufsize = MAX (guess_bufsize, bufsize * 3 / 2);
-              free (allocated);
-              buf = allocated = malloc (bufsize);
-              if (! buf)
-                {
-                  buf = stackbuf;
-                  bufsize = sizeof stackbuf;
-                }
-            }
-
-          size_t sizea =
-            (texta < lima ? xstrxfrm (buf, texta, bufsize) + 1 : 0);
-          bool a_fits = sizea <= bufsize;
-          size_t sizeb =
-            (textb < limb
-             ? (xstrxfrm ((a_fits ? buf + sizea : NULL), textb,
-                          (a_fits ? bufsize - sizea : 0))
-                + 1)
-             : 0);
-
-          if (! (a_fits && sizea + sizeb <= bufsize))
-            {
-              bufsize = sizea + sizeb;
-              if (bufsize < SIZE_MAX / 3)
-                bufsize = bufsize * 3 / 2;
-              free (allocated);
-              buf = allocated = xmalloc (bufsize);
-              if (texta < lima)
-                strxfrm (buf, texta, sizea);
-              if (textb < limb)
-                strxfrm (buf + sizea, textb, sizeb);
-            }
-
-          /* Advance past NULs to the next part of each input string,
-             exiting the loop if both strings are exhausted.  When
-             exiting the loop, prepare to finish off the tiebreaker
-             comparison properly.  */
-          if (texta < lima)
-            texta += strlen (texta) + 1;
-          if (textb < limb)
-            textb += strlen (textb) + 1;
-          if (! (texta < lima || textb < limb))
-            {
-              lena = sizea; texta = buf;
-              lenb = sizeb; textb = buf + sizea;
-              break;
-            }
-
-          /* Accumulate the transformed data in the corresponding
-             checksums.  */
-          md5_process_bytes (buf, sizea, &s[0]);
-          md5_process_bytes (buf + sizea, sizeb, &s[1]);
-
-          /* Update the tiebreaker comparison of the transformed data.  */
-          if (! xfrm_diff)
-            {
-              xfrm_diff = memcmp (buf, buf + sizea, MIN (sizea, sizeb));
-              if (! xfrm_diff)
-                xfrm_diff = (sizea > sizeb) - (sizea < sizeb);
-            }
-        }
-    }
-
-  /* Compute and compare the checksums.  */
-  md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
-  md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
-  int diff = memcmp (dig[0], dig[1], sizeof dig[0]);
-
-  /* Fall back on the tiebreaker if the checksums collide.  */
-  if (! diff)
-    {
-      if (! xfrm_diff)
-        {
-          xfrm_diff = memcmp (texta, textb, MIN (lena, lenb));
-          if (! xfrm_diff)
-            xfrm_diff = (lena > lenb) - (lena < lenb);
-        }
-
-      diff = xfrm_diff;
-    }
-
-  free (allocated);
-
-  return diff;
-}
-
 /* Return the printable width of the block of memory starting at
    TEXT and ending just before LIM, counting each tab as one byte.
    FIXME: Should we generally be counting non printable chars?  */
@@ -2279,14 +1624,6 @@ mark_key (size_t offset, size_t width)
     }
 }
 
-/* Return true if KEY is a numeric key.  */
-
-static inline bool
-key_numeric (struct keyfield const *key)
-{
-  return key->numeric || key->general_numeric || key->human_numeric;
-}
-
 /* For LINE, output a debugging line that underlines KEY in LINE.
    If KEY is null, underline the whole line.  */
 
@@ -2510,252 +1847,6 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)
     error (0, 0, _("option '-r' only applies to last-resort comparison"));
 }
 
-/* Compare two lines A and B trying every key in sequence until there
-   are no more keys or a difference is found. */
-
-static int
-keycompare (struct line const *a, struct line const *b)
-{
-  struct keyfield *key = keylist;
-
-  /* For the first iteration only, the key positions have been
-     precomputed for us. */
-  char *texta = a->keybeg;
-  char *textb = b->keybeg;
-  char *lima = a->keylim;
-  char *limb = b->keylim;
-
-  int diff;
-
-  while (true)
-    {
-      char const *translate = key->translate;
-      bool const *ignore = key->ignore;
-
-      /* Treat field ends before field starts as empty fields.  */
-      lima = MAX (texta, lima);
-      limb = MAX (textb, limb);
-
-      /* Find the lengths. */
-      size_t lena = lima - texta;
-      size_t lenb = limb - textb;
-
-      if (hard_LC_COLLATE || key_numeric (key)
-          || key->month || key->random || key->version)
-        {
-          char *ta;
-          char *tb;
-          size_t tlena;
-          size_t tlenb;
-
-          char enda IF_LINT (= 0);
-          char endb IF_LINT (= 0);
-          void *allocated IF_LINT (= NULL);
-          char stackbuf[4000];
-
-          if (ignore || translate)
-            {
-              /* Compute with copies of the keys, which are the result of
-                 translating or ignoring characters, and which need their
-                 own storage.  */
-
-              size_t i;
-
-              /* Allocate space for copies.  */
-              size_t size = lena + 1 + lenb + 1;
-              if (size <= sizeof stackbuf)
-                ta = stackbuf, allocated = NULL;
-              else
-                ta = allocated = xmalloc (size);
-              tb = ta + lena + 1;
-
-              /* Put into each copy a version of the key in which the
-                 requested characters are ignored or translated.  */
-              for (tlena = i = 0; i < lena; i++)
-                if (! (ignore && ignore[to_uchar (texta[i])]))
-                  ta[tlena++] = (translate
-                                 ? translate[to_uchar (texta[i])]
-                                 : texta[i]);
-              ta[tlena] = '\0';
-
-              for (tlenb = i = 0; i < lenb; i++)
-                if (! (ignore && ignore[to_uchar (textb[i])]))
-                  tb[tlenb++] = (translate
-                                 ? translate[to_uchar (textb[i])]
-                                 : textb[i]);
-              tb[tlenb] = '\0';
-            }
-          else
-            {
-              /* Use the keys in-place, temporarily null-terminated.  */
-              ta = texta; tlena = lena; enda = ta[tlena]; ta[tlena] = '\0';
-              tb = textb; tlenb = lenb; endb = tb[tlenb]; tb[tlenb] = '\0';
-            }
-
-          if (key->numeric)
-            diff = numcompare (ta, tb);
-          else if (key->general_numeric)
-            diff = general_numcompare (ta, tb);
-          else if (key->human_numeric)
-            diff = human_numcompare (ta, tb);
-          else if (key->month)
-            diff = getmonth (ta, NULL) - getmonth (tb, NULL);
-          else if (key->random)
-            diff = compare_random (ta, tlena, tb, tlenb);
-          else if (key->version)
-            diff = filevercmp (ta, tb);
-          else
-            {
-              /* Locale-dependent string sorting.  This is slower than
-                 C-locale sorting, which is implemented below.  */
-              if (tlena == 0)
-                diff = - NONZERO (tlenb);
-              else if (tlenb == 0)
-                diff = 1;
-              else
-                diff = xmemcoll0 (ta, tlena + 1, tb, tlenb + 1);
-            }
-
-          if (ignore || translate)
-            free (allocated);
-          else
-            {
-              ta[tlena] = enda;
-              tb[tlenb] = endb;
-            }
-        }
-      else if (ignore)
-        {
-#define CMP_WITH_IGNORE(A, B)						\
-  do									\
-    {									\
-          while (true)							\
-            {								\
-              while (texta < lima && ignore[to_uchar (*texta)])		\
-                ++texta;						\
-              while (textb < limb && ignore[to_uchar (*textb)])		\
-                ++textb;						\
-              if (! (texta < lima && textb < limb))			\
-                break;							\
-              diff = to_uchar (A) - to_uchar (B);			\
-              if (diff)							\
-                goto not_equal;						\
-              ++texta;							\
-              ++textb;							\
-            }								\
-                                                                        \
-          diff = (texta < lima) - (textb < limb);			\
-    }									\
-  while (0)
-
-          if (translate)
-            CMP_WITH_IGNORE (translate[to_uchar (*texta)],
-                             translate[to_uchar (*textb)]);
-          else
-            CMP_WITH_IGNORE (*texta, *textb);
-        }
-      else if (lena == 0)
-        diff = - NONZERO (lenb);
-      else if (lenb == 0)
-        goto greater;
-      else
-        {
-          if (translate)
-            {
-              while (texta < lima && textb < limb)
-                {
-                  diff = (to_uchar (translate[to_uchar (*texta++)])
-                          - to_uchar (translate[to_uchar (*textb++)]));
-                  if (diff)
-                    goto not_equal;
-                }
-            }
-          else
-            {
-              diff = memcmp (texta, textb, MIN (lena, lenb));
-              if (diff)
-                goto not_equal;
-            }
-          diff = lena < lenb ? -1 : lena != lenb;
-        }
-
-      if (diff)
-        goto not_equal;
-
-      key = key->next;
-      if (! key)
-        break;
-
-      /* Find the beginning and limit of the next field.  */
-      if (key->eword != SIZE_MAX)
-        lima = limfield (a, key), limb = limfield (b, key);
-      else
-        lima = a->text + a->length - 1, limb = b->text + b->length - 1;
-
-      if (key->sword != SIZE_MAX)
-        texta = begfield (a, key), textb = begfield (b, key);
-      else
-        {
-          texta = a->text, textb = b->text;
-          if (key->skipsblanks)
-            {
-              while (texta < lima && blanks[to_uchar (*texta)])
-                ++texta;
-              while (textb < limb && blanks[to_uchar (*textb)])
-                ++textb;
-            }
-        }
-    }
-
-  return 0;
-
- greater:
-  diff = 1;
- not_equal:
-  return key->reverse ? -diff : diff;
-}
-
-/* Compare two lines A and B, returning negative, zero, or positive
-   depending on whether A compares less than, equal to, or greater than B. */
-
-static int
-compare (struct line const *a, struct line const *b)
-{
-  int diff;
-  size_t alen, blen;
-
-  /* First try to compare on the specified keys (if any).
-     The only two cases with no key at all are unadorned sort,
-     and unadorned sort -r. */
-  if (keylist)
-    {
-      diff = keycompare (a, b);
-      if (diff || unique || stable)
-        return diff;
-    }
-
-  /* If the keys all compare equal (or no keys were specified)
-     fall through to the default comparison.  */
-  alen = a->length - 1, blen = b->length - 1;
-
-  if (alen == 0)
-    diff = - NONZERO (blen);
-  else if (blen == 0)
-    diff = 1;
-  else if (hard_LC_COLLATE)
-    {
-      /* Note xmemcoll0 is a performance enhancement as
-         it will not unconditionally write '\0' after the
-         passed in buffers, which was seen to give around
-         a 3% increase in performance for short lines.  */
-      diff = xmemcoll0 (a->text, alen + 1, b->text, blen + 1);
-    }
-  else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
-    diff = alen < blen ? -1 : alen != blen;
-
-  return reverse ? -diff : diff;
-}
-
 /* Write LINE to output stream FP; the output file's name is
    OUTPUT_FILE if OUTPUT_FILE is non-null, and is the standard output
    otherwise.  If debugging is enabled and FP is standard output,
@@ -4015,29 +3106,6 @@ sort (char *const *files, size_t nfiles, char const *output_file,
 
 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
 
-static void
-insertkey (struct keyfield *key_arg)
-{
-  struct keyfield **p;
-  struct keyfield *key = xmemdup (key_arg, sizeof *key);
-
-  for (p = &keylist; *p; p = &(*p)->next)
-    continue;
-  *p = key;
-  key->next = NULL;
-}
-
-/* Report a bad field specification SPEC, with extra info MSGID.  */
-
-static void badfieldspec (char const *, char const *)
-     ATTRIBUTE_NORETURN;
-static void
-badfieldspec (char const *spec, char const *msgid)
-{
-  die (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
-       _(msgid), quote (spec));
-}
-
 /* Report incompatible options.  */
 
 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
@@ -4067,41 +3135,6 @@ check_ordering_compatibility (void)
       }
 }
 
-/* Parse the leading integer in STRING and store the resulting value
-   (which must fit into size_t) into *VAL.  Return the address of the
-   suffix after the integer.  If the value is too large, silently
-   substitute SIZE_MAX.  If MSGID is NULL, return NULL after
-   failure; otherwise, report MSGID and exit on failure.  */
-
-static char const *
-parse_field_count (char const *string, size_t *val, char const *msgid)
-{
-  char *suffix;
-  uintmax_t n;
-
-  switch (xstrtoumax (string, &suffix, 10, &n, ""))
-    {
-    case LONGINT_OK:
-    case LONGINT_INVALID_SUFFIX_CHAR:
-      *val = n;
-      if (*val == n)
-        break;
-      /* Fall through.  */
-    case LONGINT_OVERFLOW:
-    case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
-      *val = SIZE_MAX;
-      break;
-
-    case LONGINT_INVALID:
-      if (msgid)
-        die (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
-             _(msgid), quote (string));
-      return NULL;
-    }
-
-  return suffix;
-}
-
 /* Handle interrupts and hangups. */
 
 static void
@@ -4116,75 +3149,6 @@ sighandler (int sig)
   raise (sig);
 }
 
-/* Set the ordering options for KEY specified in S.
-   Return the address of the first character in S that
-   is not a valid ordering option.
-   BLANKTYPE is the kind of blanks that 'b' should skip. */
-
-static char *
-set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype)
-{
-  while (*s)
-    {
-      switch (*s)
-        {
-        case 'b':
-          if (blanktype == bl_start || blanktype == bl_both)
-            key->skipsblanks = true;
-          if (blanktype == bl_end || blanktype == bl_both)
-            key->skipeblanks = true;
-          break;
-        case 'd':
-          key->ignore = nondictionary;
-          break;
-        case 'f':
-          key->translate = fold_toupper;
-          break;
-        case 'g':
-          key->general_numeric = true;
-          break;
-        case 'h':
-          key->human_numeric = true;
-          break;
-        case 'i':
-          /* Option order should not matter, so don't let -i override
-             -d.  -d implies -i, but -i does not imply -d.  */
-          if (! key->ignore)
-            key->ignore = nonprinting;
-          break;
-        case 'M':
-          key->month = true;
-          break;
-        case 'n':
-          key->numeric = true;
-          break;
-        case 'R':
-          key->random = true;
-          break;
-        case 'r':
-          key->reverse = true;
-          break;
-        case 'V':
-          key->version = true;
-          break;
-        default:
-          return (char *) s;
-        }
-      ++s;
-    }
-  return (char *) s;
-}
-
-/* Initialize KEY.  */
-
-static struct keyfield *
-key_init (struct keyfield *key)
-{
-  memset (key, 0, sizeof *key);
-  key->eword = SIZE_MAX;
-  return key;
-}
-
 int
 main (int argc, char **argv)
 {
@@ -4433,54 +3397,7 @@ main (int argc, char **argv)
           break;
 
         case 'k':
-          key = key_init (&key_buf);
-
-          /* Get POS1. */
-          s = parse_field_count (optarg, &key->sword,
-                                 N_("invalid number at field start"));
-          if (! key->sword--)
-            {
-              /* Provoke with 'sort -k0' */
-              badfieldspec (optarg, N_("field number is zero"));
-            }
-          if (*s == '.')
-            {
-              s = parse_field_count (s + 1, &key->schar,
-                                     N_("invalid number after '.'"));
-              if (! key->schar--)
-                {
-                  /* Provoke with 'sort -k1.0' */
-                  badfieldspec (optarg, N_("character offset is zero"));
-                }
-            }
-          if (! (key->sword || key->schar))
-            key->sword = SIZE_MAX;
-          s = set_ordering (s, key, bl_start);
-          if (*s != ',')
-            {
-              key->eword = SIZE_MAX;
-              key->echar = 0;
-            }
-          else
-            {
-              /* Get POS2. */
-              s = parse_field_count (s + 1, &key->eword,
-                                     N_("invalid number after ','"));
-              if (! key->eword--)
-                {
-                  /* Provoke with 'sort -k1,0' */
-                  badfieldspec (optarg, N_("field number is zero"));
-                }
-              if (*s == '.')
-                {
-                  s = parse_field_count (s + 1, &key->echar,
-                                         N_("invalid number after '.'"));
-                }
-              s = set_ordering (s, key, bl_end);
-            }
-          if (*s)
-            badfieldspec (optarg, N_("stray character in field spec"));
-          insertkey (key);
+          add_key();
           break;
 
         case 'm':
author	Erich Eckner <git@eckner.net>	2016-11-28 14:03:15 +0100
committer	Erich Eckner <git@eckner.net>	2017-02-16 13:23:48 +0100
commit	e74f109de104c1966d2d4863aedfb25eefe0358f (patch)
tree	738ec2f7ae295019d346175a86ea85f7e13782e2 /src
parent	1c27b56095b4a82be7d072baabc09262cb4227e5 (diff)
download	coreutils-e74f109de104c1966d2d4863aedfb25eefe0358f.tar.xz