summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2005-05-27 20:34:03 +0000
committerPaul Eggert <eggert@cs.ucla.edu>2005-05-27 20:34:03 +0000
commit606321fa327af39ab3d67fb420cd84e9afc9de5a (patch)
tree7cc12d85c1e2faa82a92550e88180efb3a67a9ce /lib
parent8cb4f7039ddbe3051ed143ab8dd4727ede58b299 (diff)
downloadcoreutils-606321fa327af39ab3d67fb420cd84e9afc9de5a.tar.xz
Break integer-in-string comparison code out from src/sort.
Diffstat (limited to 'lib')
-rw-r--r--lib/ChangeLog5
-rw-r--r--lib/strintcmp.c31
-rw-r--r--lib/strnumcmp-in.h241
-rw-r--r--lib/strnumcmp.c30
-rw-r--r--lib/strnumcmp.h2
5 files changed, 309 insertions, 0 deletions
diff --git a/lib/ChangeLog b/lib/ChangeLog
index 3c8ab513d..9ea166c97 100644
--- a/lib/ChangeLog
+++ b/lib/ChangeLog
@@ -1,3 +1,8 @@
+2005-05-27 Paul Eggert <eggert@cs.ucla.edu>
+
+ * strnumcmp.c, strnumcmp.h, strnumcmp-in.h, strintcmp.c:
+ New files.
+
2005-05-22 Paul Eggert <eggert@cs.ucla.edu>
* fts.c (fd_safer) [_LGPL_PACKAGE]: New static function,
diff --git a/lib/strintcmp.c b/lib/strintcmp.c
new file mode 100644
index 000000000..9f085d539
--- /dev/null
+++ b/lib/strintcmp.c
@@ -0,0 +1,31 @@
+/* Compare integer strings.
+
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Paul Eggert. */
+
+#include "strnumcmp-in.h"
+
+/* Compare strings A and B as integers without explicitly converting
+ them to machine numbers, to avoid overflow problems and perhaps
+ improve performance. */
+
+int
+strintcmp (char const *a, char const *b)
+{
+ return numcompare (a, b, -1, -1);
+}
diff --git a/lib/strnumcmp-in.h b/lib/strnumcmp-in.h
new file mode 100644
index 000000000..195047313
--- /dev/null
+++ b/lib/strnumcmp-in.h
@@ -0,0 +1,241 @@
+/* Compare numeric strings. This is an internal include file.
+
+ Copyright (C) 1988, 1991, 1992, 1993, 1995, 1996, 1998, 1999, 2000,
+ 2003, 2004, 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Mike Haertel. */
+
+#include "strnumcmp.h"
+
+#include <stddef.h>
+
+#define NEGATION_SIGN '-'
+#define NUMERIC_ZERO '0'
+
+/* ISDIGIT differs from isdigit, as follows:
+ - Its arg may be any int or unsigned int; it need not be an unsigned char.
+ - It's guaranteed to evaluate its argument exactly once.
+ - It's typically faster.
+ POSIX says that only '0' through '9' are digits. Prefer ISDIGIT to
+ ISDIGIT_LOCALE unless it's important to use the locale's definition
+ of `digit' even when the host does not conform to POSIX. */
+#define ISDIGIT(c) ((unsigned int) (c) - '0' <= 9)
+
+
+/* Compare strings A and B containing decimal fractions < 1.
+ DECIMAL_POINT is the decimal point. Each string
+ should begin with a decimal point followed immediately by the digits
+ of the fraction. Strings not of this form are treated as zero. */
+
+/* The goal here, is to take two numbers a and b... compare these
+ in parallel. Instead of converting each, and then comparing the
+ outcome. Most likely stopping the comparison before the conversion
+ is complete. The algorithm used, in the old "sort" utility:
+
+ Algorithm: fraccompare
+ Action : compare two decimal fractions
+ accepts : char *a, char *b
+ returns : -1 if a<b, 0 if a=b, 1 if a>b.
+ implement:
+
+ if *a == decimal_point AND *b == decimal_point
+ find first character different in a and b.
+ if both are digits, return the difference *a - *b.
+ if *a is a digit
+ skip past zeros
+ if digit return 1, else 0
+ if *b is a digit
+ skip past zeros
+ if digit return -1, else 0
+ if *a is a decimal_point
+ skip past decimal_point and zeros
+ if digit return 1, else 0
+ if *b is a decimal_point
+ skip past decimal_point and zeros
+ if digit return -1, else 0
+ return 0 */
+
+static inline int
+fraccompare (char const *a, char const *b, char decimal_point)
+{
+ if (*a == decimal_point && *b == decimal_point)
+ {
+ while (*++a == *++b)
+ if (! ISDIGIT (*a))
+ return 0;
+ if (ISDIGIT (*a) && ISDIGIT (*b))
+ return *a - *b;
+ if (ISDIGIT (*a))
+ goto a_trailing_nonzero;
+ if (ISDIGIT (*b))
+ goto b_trailing_nonzero;
+ return 0;
+ }
+ else if (*a++ == decimal_point)
+ {
+ a_trailing_nonzero:
+ while (*a == NUMERIC_ZERO)
+ a++;
+ return ISDIGIT (*a);
+ }
+ else if (*b++ == decimal_point)
+ {
+ b_trailing_nonzero:
+ while (*b == NUMERIC_ZERO)
+ b++;
+ return - ISDIGIT (*b);
+ }
+ return 0;
+}
+
+/* Compare strings A and B as numbers without explicitly converting
+ them to machine numbers, to avoid overflow problems and perhaps
+ improve performance. DECIMAL_POINT is the decimal point and
+ THOUSANDS_SEP the thousands separator. A DECIMAL_POINT of -1
+ causes comparisons to act as if there is no decimal point
+ character, and likewise for THOUSANDS_SEP. */
+
+static inline int
+numcompare (char const *a, char const *b,
+ int decimal_point, int thousands_sep)
+{
+ unsigned char tmpa = *a;
+ unsigned char tmpb = *b;
+ int tmp;
+ size_t log_a;
+ size_t log_b;
+
+ if (tmpa == NEGATION_SIGN)
+ {
+ do
+ tmpa = *++a;
+ while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep);
+ if (tmpb != NEGATION_SIGN)
+ {
+ if (tmpa == decimal_point)
+ do
+ tmpa = *++a;
+ while (tmpa == NUMERIC_ZERO);
+ if (ISDIGIT (tmpa))
+ return -1;
+ while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep)
+ tmpb = *++b;
+ if (tmpb == decimal_point)
+ do
+ tmpb = *++b;
+ while (tmpb == NUMERIC_ZERO);
+ return - ISDIGIT (tmpb);
+ }
+ do
+ tmpb = *++b;
+ while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep);
+
+ while (tmpa == tmpb && ISDIGIT (tmpa))
+ {
+ do
+ tmpa = *++a;
+ while (tmpa == thousands_sep);
+ do
+ tmpb = *++b;
+ while (tmpb == thousands_sep);
+ }
+
+ if ((tmpa == decimal_point && !ISDIGIT (tmpb))
+ || (tmpb == decimal_point && !ISDIGIT (tmpa)))
+ return fraccompare (b, a, decimal_point);
+
+ tmp = tmpb - tmpa;
+
+ for (log_a = 0; ISDIGIT (tmpa); ++log_a)
+ do
+ tmpa = *++a;
+ while (tmpa == thousands_sep);
+
+ for (log_b = 0; ISDIGIT (tmpb); ++log_b)
+ do
+ tmpb = *++b;
+ while (tmpb == thousands_sep);
+
+ if (log_a != log_b)
+ return log_a < log_b ? 1 : -1;
+
+ if (!log_a)
+ return 0;
+
+ return tmp;
+ }
+ else if (tmpb == NEGATION_SIGN)
+ {
+ do
+ tmpb = *++b;
+ while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep);
+ if (tmpb == decimal_point)
+ do
+ tmpb = *++b;
+ while (tmpb == NUMERIC_ZERO);
+ if (ISDIGIT (tmpb))
+ return 1;
+ while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep)
+ tmpa = *++a;
+ if (tmpa == decimal_point)
+ do
+ tmpa = *++a;
+ while (tmpa == NUMERIC_ZERO);
+ return ISDIGIT (tmpa);
+ }
+ else
+ {
+ while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep)
+ tmpa = *++a;
+ while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep)
+ tmpb = *++b;
+
+ while (tmpa == tmpb && ISDIGIT (tmpa))
+ {
+ do
+ tmpa = *++a;
+ while (tmpa == thousands_sep);
+ do
+ tmpb = *++b;
+ while (tmpb == thousands_sep);
+ }
+
+ if ((tmpa == decimal_point && !ISDIGIT (tmpb))
+ || (tmpb == decimal_point && !ISDIGIT (tmpa)))
+ return fraccompare (a, b, decimal_point);
+
+ tmp = tmpa - tmpb;
+
+ for (log_a = 0; ISDIGIT (tmpa); ++log_a)
+ do
+ tmpa = *++a;
+ while (tmpa == thousands_sep);
+
+ for (log_b = 0; ISDIGIT (tmpb); ++log_b)
+ do
+ tmpb = *++b;
+ while (tmpb == thousands_sep);
+
+ if (log_a != log_b)
+ return log_a < log_b ? -1 : 1;
+
+ if (!log_a)
+ return 0;
+
+ return tmp;
+ }
+}
diff --git a/lib/strnumcmp.c b/lib/strnumcmp.c
new file mode 100644
index 000000000..902270aec
--- /dev/null
+++ b/lib/strnumcmp.c
@@ -0,0 +1,30 @@
+/* Compare numeric strings.
+
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Paul Eggert. */
+
+#include "strnumcmp-in.h"
+
+/* Externally-visible name for numcompare. */
+
+int
+strnumcmp (char const *a, char const *b,
+ int decimal_point, int thousands_sep)
+{
+ return numcompare (a, b, decimal_point, thousands_sep);
+}
diff --git a/lib/strnumcmp.h b/lib/strnumcmp.h
new file mode 100644
index 000000000..91ad3519b
--- /dev/null
+++ b/lib/strnumcmp.h
@@ -0,0 +1,2 @@
+int strintcmp (char const *, char const *);
+int strnumcmp (char const *, char const *, int, int);