From 606321fa327af39ab3d67fb420cd84e9afc9de5a Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 27 May 2005 20:34:03 +0000 Subject: Break integer-in-string comparison code out from src/sort. --- lib/ChangeLog | 5 ++ lib/strintcmp.c | 31 +++++++ lib/strnumcmp-in.h | 241 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/strnumcmp.c | 30 +++++++ lib/strnumcmp.h | 2 + 5 files changed, 309 insertions(+) create mode 100644 lib/strintcmp.c create mode 100644 lib/strnumcmp-in.h create mode 100644 lib/strnumcmp.c create mode 100644 lib/strnumcmp.h (limited to 'lib') diff --git a/lib/ChangeLog b/lib/ChangeLog index 3c8ab513d..9ea166c97 100644 --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,3 +1,8 @@ +2005-05-27 Paul Eggert + + * strnumcmp.c, strnumcmp.h, strnumcmp-in.h, strintcmp.c: + New files. + 2005-05-22 Paul Eggert * fts.c (fd_safer) [_LGPL_PACKAGE]: New static function, diff --git a/lib/strintcmp.c b/lib/strintcmp.c new file mode 100644 index 000000000..9f085d539 --- /dev/null +++ b/lib/strintcmp.c @@ -0,0 +1,31 @@ +/* Compare integer strings. + + Copyright (C) 2005 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Paul Eggert. */ + +#include "strnumcmp-in.h" + +/* Compare strings A and B as integers without explicitly converting + them to machine numbers, to avoid overflow problems and perhaps + improve performance. */ + +int +strintcmp (char const *a, char const *b) +{ + return numcompare (a, b, -1, -1); +} diff --git a/lib/strnumcmp-in.h b/lib/strnumcmp-in.h new file mode 100644 index 000000000..195047313 --- /dev/null +++ b/lib/strnumcmp-in.h @@ -0,0 +1,241 @@ +/* Compare numeric strings. This is an internal include file. + + Copyright (C) 1988, 1991, 1992, 1993, 1995, 1996, 1998, 1999, 2000, + 2003, 2004, 2005 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Mike Haertel. */ + +#include "strnumcmp.h" + +#include + +#define NEGATION_SIGN '-' +#define NUMERIC_ZERO '0' + +/* ISDIGIT differs from isdigit, as follows: + - Its arg may be any int or unsigned int; it need not be an unsigned char. + - It's guaranteed to evaluate its argument exactly once. + - It's typically faster. + POSIX says that only '0' through '9' are digits. Prefer ISDIGIT to + ISDIGIT_LOCALE unless it's important to use the locale's definition + of `digit' even when the host does not conform to POSIX. */ +#define ISDIGIT(c) ((unsigned int) (c) - '0' <= 9) + + +/* Compare strings A and B containing decimal fractions < 1. + DECIMAL_POINT is the decimal point. Each string + should begin with a decimal point followed immediately by the digits + of the fraction. Strings not of this form are treated as zero. */ + +/* The goal here, is to take two numbers a and b... compare these + in parallel. Instead of converting each, and then comparing the + outcome. Most likely stopping the comparison before the conversion + is complete. The algorithm used, in the old "sort" utility: + + Algorithm: fraccompare + Action : compare two decimal fractions + accepts : char *a, char *b + returns : -1 if ab. + implement: + + if *a == decimal_point AND *b == decimal_point + find first character different in a and b. + if both are digits, return the difference *a - *b. + if *a is a digit + skip past zeros + if digit return 1, else 0 + if *b is a digit + skip past zeros + if digit return -1, else 0 + if *a is a decimal_point + skip past decimal_point and zeros + if digit return 1, else 0 + if *b is a decimal_point + skip past decimal_point and zeros + if digit return -1, else 0 + return 0 */ + +static inline int +fraccompare (char const *a, char const *b, char decimal_point) +{ + if (*a == decimal_point && *b == decimal_point) + { + while (*++a == *++b) + if (! ISDIGIT (*a)) + return 0; + if (ISDIGIT (*a) && ISDIGIT (*b)) + return *a - *b; + if (ISDIGIT (*a)) + goto a_trailing_nonzero; + if (ISDIGIT (*b)) + goto b_trailing_nonzero; + return 0; + } + else if (*a++ == decimal_point) + { + a_trailing_nonzero: + while (*a == NUMERIC_ZERO) + a++; + return ISDIGIT (*a); + } + else if (*b++ == decimal_point) + { + b_trailing_nonzero: + while (*b == NUMERIC_ZERO) + b++; + return - ISDIGIT (*b); + } + return 0; +} + +/* Compare strings A and B as numbers without explicitly converting + them to machine numbers, to avoid overflow problems and perhaps + improve performance. DECIMAL_POINT is the decimal point and + THOUSANDS_SEP the thousands separator. A DECIMAL_POINT of -1 + causes comparisons to act as if there is no decimal point + character, and likewise for THOUSANDS_SEP. */ + +static inline int +numcompare (char const *a, char const *b, + int decimal_point, int thousands_sep) +{ + unsigned char tmpa = *a; + unsigned char tmpb = *b; + int tmp; + size_t log_a; + size_t log_b; + + if (tmpa == NEGATION_SIGN) + { + do + tmpa = *++a; + while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep); + if (tmpb != NEGATION_SIGN) + { + if (tmpa == decimal_point) + do + tmpa = *++a; + while (tmpa == NUMERIC_ZERO); + if (ISDIGIT (tmpa)) + return -1; + while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep) + tmpb = *++b; + if (tmpb == decimal_point) + do + tmpb = *++b; + while (tmpb == NUMERIC_ZERO); + return - ISDIGIT (tmpb); + } + do + tmpb = *++b; + while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep); + + while (tmpa == tmpb && ISDIGIT (tmpa)) + { + do + tmpa = *++a; + while (tmpa == thousands_sep); + do + tmpb = *++b; + while (tmpb == thousands_sep); + } + + if ((tmpa == decimal_point && !ISDIGIT (tmpb)) + || (tmpb == decimal_point && !ISDIGIT (tmpa))) + return fraccompare (b, a, decimal_point); + + tmp = tmpb - tmpa; + + for (log_a = 0; ISDIGIT (tmpa); ++log_a) + do + tmpa = *++a; + while (tmpa == thousands_sep); + + for (log_b = 0; ISDIGIT (tmpb); ++log_b) + do + tmpb = *++b; + while (tmpb == thousands_sep); + + if (log_a != log_b) + return log_a < log_b ? 1 : -1; + + if (!log_a) + return 0; + + return tmp; + } + else if (tmpb == NEGATION_SIGN) + { + do + tmpb = *++b; + while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep); + if (tmpb == decimal_point) + do + tmpb = *++b; + while (tmpb == NUMERIC_ZERO); + if (ISDIGIT (tmpb)) + return 1; + while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep) + tmpa = *++a; + if (tmpa == decimal_point) + do + tmpa = *++a; + while (tmpa == NUMERIC_ZERO); + return ISDIGIT (tmpa); + } + else + { + while (tmpa == NUMERIC_ZERO || tmpa == thousands_sep) + tmpa = *++a; + while (tmpb == NUMERIC_ZERO || tmpb == thousands_sep) + tmpb = *++b; + + while (tmpa == tmpb && ISDIGIT (tmpa)) + { + do + tmpa = *++a; + while (tmpa == thousands_sep); + do + tmpb = *++b; + while (tmpb == thousands_sep); + } + + if ((tmpa == decimal_point && !ISDIGIT (tmpb)) + || (tmpb == decimal_point && !ISDIGIT (tmpa))) + return fraccompare (a, b, decimal_point); + + tmp = tmpa - tmpb; + + for (log_a = 0; ISDIGIT (tmpa); ++log_a) + do + tmpa = *++a; + while (tmpa == thousands_sep); + + for (log_b = 0; ISDIGIT (tmpb); ++log_b) + do + tmpb = *++b; + while (tmpb == thousands_sep); + + if (log_a != log_b) + return log_a < log_b ? -1 : 1; + + if (!log_a) + return 0; + + return tmp; + } +} diff --git a/lib/strnumcmp.c b/lib/strnumcmp.c new file mode 100644 index 000000000..902270aec --- /dev/null +++ b/lib/strnumcmp.c @@ -0,0 +1,30 @@ +/* Compare numeric strings. + + Copyright (C) 2005 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Paul Eggert. */ + +#include "strnumcmp-in.h" + +/* Externally-visible name for numcompare. */ + +int +strnumcmp (char const *a, char const *b, + int decimal_point, int thousands_sep) +{ + return numcompare (a, b, decimal_point, thousands_sep); +} diff --git a/lib/strnumcmp.h b/lib/strnumcmp.h new file mode 100644 index 000000000..91ad3519b --- /dev/null +++ b/lib/strnumcmp.h @@ -0,0 +1,2 @@ +int strintcmp (char const *, char const *); +int strnumcmp (char const *, char const *, int, int); -- cgit v1.2.3-54-g00ecf