sort: revert recent -h changes and use a more-conservative approach

* NEWS: Document changes to sort -h, which are now minor with respect to the pre-July-30th version. * doc/coreutils.texi (sort invocation): Likewise. The documentation now describes how -h comparison is done rather than being vague with border cases. * src/sort.c (long_double, strtold): Move back to general_numcompare. (LD, compute_human): Remove. (find_unit_order): Remove THOU_SEP parameter, since thousands separators are now allowed by all callers. Revert to previous behavior of sorting by suffix, and returning the order rather than 2 * order + binary, since we no longer care whether binary powers are being used. However, treat all zeros the same, instead of sorting 0M before 0G; this is more consistent with the desired behavior of sorting -1G before -1M. * tests/misc/sort (h1, h3, h6): Adjust to match mostly-reverted behavior. However, check that all zeros sort together. * tests/misc/sort-debug-keys: Omit a "_", since the trailing "i" in "1234Gi" is no longer part of the key.
author: Paul Eggert <eggert@cs.ucla.edu> 2010-08-02 19:18:01 -0700
committer: Paul Eggert <eggert@cs.ucla.edu> 2010-08-02 19:21:02 -0700
commit: 94615d2acfdccbbeb8eb6f8931d0e252b05e1484 (patch)
tree: bc4695eb84b4a2d66e2347f65a3fb9343ae658b4 /src/sort.c
parent: abd040180e210e74448c42f094aab1769ca6c636 (diff)
download: coreutils-94615d2acfdccbbeb8eb6f8931d0e252b05e1484.tar.xz
1 files changed, 31 insertions, 77 deletions
diff --git a/src/sort.c b/src/sort.c
index ac5a07983..e02e547d2 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -92,16 +92,6 @@ struct rlimit { size_t rlim_cur; };
 
 #define UCHAR_LIM (UCHAR_MAX + 1)
 
-#if HAVE_C99_STRTOLD
-# define long_double long double
-# define LD(x) x##L
-#else
-# define long_double double
-# undef strtold
-# define strtold strtod
-# define LD(x) x
-#endif
-
 #ifndef DEFAULT_TMPDIR
 # define DEFAULT_TMPDIR "/tmp"
 #endif
@@ -1803,15 +1793,15 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)
 }
 
 /* Return an integer that represents the order of magnitude of the
-   unit following the number.  If THOU_SEP is not negative, NUMBER can
-   contain thousands separators equal to THOU_SEP.  It can also
-   contain a decimal point.  But it may not contain leading blanks.
+   unit following the number.  The number may contain thousands
+   separators and a decimal point, but it may not contain leading blanks.
+   Negative numbers get negative orders; zero numbers have a zero order.
    Store the address of the end of the number into *ENDPTR.  */
 
 static int
-find_unit_order (char const *number, int thou_sep, char **endptr)
+find_unit_order (char const *number, char **endptr)
 {
-  static char const powers[UCHAR_LIM] =
+  static char const orders[UCHAR_LIM] =
     {
 #if ! ('K' == 75 && 'M' == 77 && 'G' == 71 && 'T' == 84 && 'P' == 80 \
        && 'E' == 69 && 'Z' == 90 && 'Y' == 89 && 'k' == 107)
@@ -1839,7 +1829,10 @@ find_unit_order (char const *number, int thou_sep, char **endptr)
 #endif
     };
 
-  char const *p = number + (*number == '-');
+  bool minus_sign = (*number == '-');
+  char const *p = number + minus_sign;
+  int nonzero = 0;
+  unsigned char ch;
 
   /* Scan to end of number.
      Decimals or separators not followed by digits stop the scan.
@@ -1849,50 +1842,18 @@ find_unit_order (char const *number, int thou_sep, char **endptr)
 
   do
     {
-      while (*p == thousands_sep)
-        p++;
+      while (ISDIGIT (ch = *p++))
+        nonzero |= ch - '0';
     }
-  while (ISDIGIT (*p++));
+  while (ch == thousands_sep);
 
-  if (p[-1] == decimal_point)
-    while (ISDIGIT (*p++))
-      continue;
-
-  unsigned char ch = p[-1];
-  int power = powers[ch];
-  int binary = (power ? *p == 'i': 0);
-  *endptr = (char *) p + (power ? binary : -1);
-  return 2 * power + binary;
-}
-
-/* Convert the string P (ending at ENDP) to a floating point value.
-   The string is assumed to be followed by a SI or IEC prefix of type
-   ORDER.  */
-
-static long_double
-compute_human (char const *p, char *endp, int order)
-{
-  static long_double const multiplier[] =
-    {
-      LD (1e00), LD (                        1.0),
-      LD (1e03), LD (                     1024.0),
-      LD (1e06), LD (                  1048576.0),
-      LD (1e09), LD (               1073741824.0),
-      LD (1e12), LD (            1099511627776.0),
-      LD (1e15), LD (         1125899906842624.0),
-      LD (1e18), LD (      1152921504606846976.0),
-      LD (1e21), LD (   1180591620717411303424.0),
-      LD (1e24), LD (1208925819614629174706176.0)
-    };
+  if (ch == decimal_point)
+    while (ISDIGIT (ch = *p++))
+      nonzero |= ch - '0';
 
-  char *e = endp;
-  if (order)
-    e -= 1 + (order & 1);
-  char ch = *e;
-  *e = '\0';
-  long_double v = strtold (p, NULL);
-  *e = ch;
-  return v * multiplier[order];
+  int order = (nonzero ? orders[ch] : 0);
+  *endptr = (char *) p - !order;
+  return (minus_sign ? -order : order);
 }
 
 /* Compare numbers A and B ending in units with SI or IEC prefixes
@@ -1910,24 +1871,8 @@ human_numcompare (char *a, char *b, char **ea)
   while (blanks[to_uchar (*b)])
     b++;
 
-  int order_a = find_unit_order (a, -1, ea);
-  int order_b = find_unit_order (b, -1, &endb);
-
-  if (order_a == order_b)
-    {
-      /* Use strnumcmp if the orders are the same, since it has no
-         rounding problems and is faster.  Do not allow thousands
-         separators since strtold does not.  */
-      return strnumcmp (a, b, decimal_point, -1);
-    }
-  else
-    {
-      /* Fall back on floating point, despite its rounding errors,
-         since strnumcmp can't handle mixed orders.  */
-      long_double aval = compute_human (a, *ea, order_a);
-      long_double bval = compute_human (b, endb, order_b);
-      return (aval < bval ? -1 : aval > bval);
-    }
+  int diff = find_unit_order (a, ea) - find_unit_order (b, &endb);
+  return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep));
 }
 
 /* Compare strings A and B as numbers without explicitly converting them to
@@ -1945,8 +1890,8 @@ numcompare (char const *a, char const *b, char **ea)
   if (debug)
     {
       /* Approximate strnumcmp extents with find_unit_order.  */
-      int order = find_unit_order (a, thousands_sep, ea);
-      *ea -= !!order + (order & 1);
+      int order = find_unit_order (a, ea);
+      *ea -= !!order;
     }
 
   return strnumcmp (a, b, decimal_point, thousands_sep);
@@ -1957,6 +1902,15 @@ general_numcompare (char const *sa, char const *sb, char **ea)
 {
   /* FIXME: maybe add option to try expensive FP conversion
      only if A and B can't be compared more cheaply/accurately.  */
+
+#if HAVE_C99_STRTOLD
+# define long_double long double
+#else
+# define long_double double
+# undef strtold
+# define strtold strtod
+#endif
+
   char *eb;
   long_double a = strtold (sa, ea);
   long_double b = strtold (sb, &eb);
author	Paul Eggert <eggert@cs.ucla.edu>	2010-08-02 19:18:01 -0700
committer	Paul Eggert <eggert@cs.ucla.edu>	2010-08-02 19:21:02 -0700
commit	94615d2acfdccbbeb8eb6f8931d0e252b05e1484 (patch)
tree	bc4695eb84b4a2d66e2347f65a3fb9343ae658b4 /src/sort.c
parent	abd040180e210e74448c42f094aab1769ca6c636 (diff)
download	coreutils-94615d2acfdccbbeb8eb6f8931d0e252b05e1484.tar.xz