diff options
-rw-r--r-- | lib/Makefile.am | 3 | ||||
-rw-r--r-- | lib/memxfrm.c | 105 | ||||
-rw-r--r-- | lib/memxfrm.h | 2 | ||||
-rw-r--r-- | lib/xmemxfrm.c | 62 | ||||
-rw-r--r-- | lib/xmemxfrm.h | 2 | ||||
-rw-r--r-- | m4/memxfrm.m4 | 15 | ||||
-rw-r--r-- | m4/prereq.m4 | 3 | ||||
-rw-r--r-- | po/POTFILES.in | 1 | ||||
-rw-r--r-- | src/sort.c | 167 |
9 files changed, 118 insertions, 242 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am index c89ef751d..b4a591b3f 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -20,8 +20,7 @@ include gnulib.mk AM_CFLAGS += $(GNULIB_WARN_CFLAGS) $(WERROR_CFLAGS) libcoreutils_a_SOURCES += \ - buffer-lcm.c buffer-lcm.h \ - xmemxfrm.c xmemxfrm.h + buffer-lcm.c buffer-lcm.h libcoreutils_a_LIBADD += $(LIBOBJS) libcoreutils_a_DEPENDENCIES += $(LIBOBJS) diff --git a/lib/memxfrm.c b/lib/memxfrm.c deleted file mode 100644 index 12a1ae9e4..000000000 --- a/lib/memxfrm.c +++ /dev/null @@ -1,105 +0,0 @@ -/* Locale-specific memory transformation - - Copyright (C) 2006, 2009-2010 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -/* Written by Paul Eggert <eggert@cs.ucla.edu>. */ - -#include <config.h> - -#include "memxfrm.h" - -#include <errno.h> -#include <stdlib.h> -#include <string.h> - -/* Store into DEST (of size DESTSIZE) the text in SRC (of size SRCSIZE) - transformed so that the result of memcmp on two transformed texts - (with ties going to the longer text) is the same as the result of - memcoll on the two texts before their transformation. Perhaps - temporarily modify the byte after SRC, but restore its original - contents before returning. - - Return the size of the resulting text, or an indeterminate value if - there is an error. Set errno to an error number if there is an - error, and to zero otherwise. DEST contains an indeterminate value - if there is an error or if the resulting size is greater than - DESTSIZE. */ - -size_t -memxfrm (char *restrict dest, size_t destsize, - char *restrict src, size_t srcsize) -{ -#if HAVE_STRXFRM - - size_t di = 0; - size_t si = 0; - size_t result = 0; - - char ch = src[srcsize]; - src[srcsize] = '\0'; - - while (si < srcsize) - { - size_t slen = strlen (src + si); - - size_t result0 = result; - errno = 0; - result += strxfrm (dest + di, src + si, destsize - di) + 1; - if (errno != 0) - break; - if (result <= result0) - { - errno = ERANGE; - break; - } - - if (result == destsize + 1 && si + slen == srcsize) - { - /* The destination is exactly the right size, but strxfrm wants - room for a trailing null. Work around the problem with a - temporary buffer. */ - size_t bufsize = destsize - di + 1; - char stackbuf[4000]; - char *buf = stackbuf; - if (sizeof stackbuf < bufsize) - { - buf = malloc (bufsize); - if (! buf) - break; - } - strxfrm (buf, src + si, bufsize); - memcpy (dest + di, buf, destsize - di); - if (sizeof stackbuf < bufsize) - free (buf); - errno = 0; - } - - di = (result < destsize ? result : destsize); - si += slen + 1; - } - - src[srcsize] = ch; - return result - (si != srcsize); - -#else - - if (srcsize < destsize) - memcpy (dest, src, srcsize); - errno = 0; - return srcsize; - -#endif -} diff --git a/lib/memxfrm.h b/lib/memxfrm.h deleted file mode 100644 index 605421dc2..000000000 --- a/lib/memxfrm.h +++ /dev/null @@ -1,2 +0,0 @@ -#include <stddef.h> -size_t memxfrm (char *restrict, size_t, char *restrict, size_t); diff --git a/lib/xmemxfrm.c b/lib/xmemxfrm.c deleted file mode 100644 index 9cdda9833..000000000 --- a/lib/xmemxfrm.c +++ /dev/null @@ -1,62 +0,0 @@ -/* Locale-specific memory transformation - - Copyright (C) 2006, 2009-2010 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -/* Written by Paul Eggert <eggert@cs.ucla.edu>. */ - -#include <config.h> - -#include "xmemxfrm.h" - -#include <errno.h> -#include <stdlib.h> - -#include "gettext.h" -#define _(msgid) gettext (msgid) - -#include "error.h" -#include "exitfail.h" -#include "memxfrm.h" -#include "quotearg.h" - -/* Store into DEST (of size DESTSIZE) the text in SRC (of size - SRCSIZE) transformed so that the result of memcmp on two - transformed texts (with ties going to the longer text) is the same - as the result of memcoll on the two texts before their - transformation. Perhaps temporarily modify the byte after SRC, but - restore its original contents before returning. - - Return the size of the resulting text. DEST contains an - indeterminate value if the resulting size is greater than DESTSIZE. - Report an error and exit if there is an error. */ - -size_t -xmemxfrm (char *restrict dest, size_t destsize, - char *restrict src, size_t srcsize) -{ - size_t translated_size = memxfrm (dest, destsize, src, srcsize); - - if (errno) - { - error (0, errno, _("string transformation failed")); - error (0, 0, _("set LC_ALL='C' to work around the problem")); - error (exit_failure, 0, - _("the untransformed string was %s"), - quotearg_n_style_mem (0, locale_quoting_style, src, srcsize)); - } - - return translated_size; -} diff --git a/lib/xmemxfrm.h b/lib/xmemxfrm.h deleted file mode 100644 index 7c4b8ad7c..000000000 --- a/lib/xmemxfrm.h +++ /dev/null @@ -1,2 +0,0 @@ -#include <stddef.h> -size_t xmemxfrm (char *restrict, size_t, char *restrict, size_t); diff --git a/m4/memxfrm.m4 b/m4/memxfrm.m4 deleted file mode 100644 index 47a17d3bf..000000000 --- a/m4/memxfrm.m4 +++ /dev/null @@ -1,15 +0,0 @@ -dnl Copyright (C) 2006, 2009-2010 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. - -AC_DEFUN([gl_MEMXFRM], -[ - AC_LIBSOURCES([memxfrm.c, memxfrm.h]) - AC_LIBOBJ([memxfrm]) - - AC_REQUIRE([AC_C_RESTRICT]) - - dnl Prerequisites of lib/memcoll.c. - AC_CHECK_FUNCS_ONCE([strxfrm]) -]) diff --git a/m4/prereq.m4 b/m4/prereq.m4 index 24478cb82..8caea38cf 100644 --- a/m4/prereq.m4 +++ b/m4/prereq.m4 @@ -1,4 +1,4 @@ -#serial 76 +#serial 77 dnl We use gl_ for non Autoconf macros. m4_pattern_forbid([^gl_[ABCDEFGHIJKLMNOPQRSTUVXYZ]])dnl @@ -40,7 +40,6 @@ AC_DEFUN([gl_PREREQ], AC_REQUIRE([gl_FD_REOPEN]) AC_REQUIRE([gl_FUNC_XATTR]) AC_REQUIRE([gl_FUNC_XFTS]) - AC_REQUIRE([gl_MEMXFRM]) AC_REQUIRE([gl_STRINTCMP]) AC_REQUIRE([gl_STRNUMCMP]) ]) diff --git a/po/POTFILES.in b/po/POTFILES.in index c862877c4..be20f4c28 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -29,7 +29,6 @@ lib/version-etc.c lib/xalloc-die.c lib/xfreopen.c lib/xmemcoll.c -lib/xmemxfrm.c lib/xprintf.c lib/xstrtol-error.c diff --git a/src/sort.c b/src/sort.c index 1df711da7..54382631e 100644 --- a/src/sort.c +++ b/src/sort.c @@ -48,7 +48,6 @@ #include "stdlib--.h" #include "strnumcmp.h" #include "xmemcoll.h" -#include "xmemxfrm.h" #include "xnanosleep.h" #include "xstrtol.h" @@ -2001,34 +2000,24 @@ random_md5_state_init (char const *random_source) md5_process_bytes (buf, sizeof buf, &random_md5_state); } -/* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB - with length LENGTHB. Return negative if less, zero if equal, - positive if greater. */ +/* This is like strxfrm, except it reports any error and exits. */ -static int -cmp_hashes (char const *texta, size_t lena, - char const *textb, size_t lenb) +static size_t +xstrxfrm (char *restrict dest, char const *restrict src, size_t destsize) { - /* Try random hashes until a pair of hashes disagree. But if the - first pair of random hashes agree, check whether the keys are - identical and if so report no difference. */ - int diff; - uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)]; - struct md5_ctx s[2]; - s[0] = s[1] = random_md5_state; - md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]); - md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]); - diff = memcmp (dig[0], dig[1], sizeof dig[0]); - if (! diff) + errno = 0; + size_t translated_size = strxfrm (dest, src, destsize); + + if (errno) { - /* Break ties with memcmp. This is good enough given the - astronomically low probability of an MD5 collision. */ - diff = memcmp (texta, textb, MIN (lena, lenb)); - if (! diff) - diff = lena < lenb ? -1 : lena != lenb; + error (0, errno, _("string transformation failed")); + error (0, 0, _("set LC_ALL='C' to work around the problem")); + error (SORT_FAILURE, 0, + _("the untransformed string was %s"), + quotearg_n_style (0, locale_quoting_style, src)); } - return diff; + return translated_size; } /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB) @@ -2038,41 +2027,117 @@ static int compare_random (char *restrict texta, size_t lena, char *restrict textb, size_t lenb) { - int diff; + /* XFRM_DIFF records the equivalent of memcmp on the transformed + data. This is used to break ties if there is an checksum + collision, and this is good enough given the astronomically low + probability of a collision. */ + int xfrm_diff = 0; + + char stackbuf[4000]; + char *buf = stackbuf; + size_t bufsize = sizeof stackbuf; + uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)]; + struct md5_ctx s[2]; + s[0] = s[1] = random_md5_state; - if (! hard_LC_COLLATE) - diff = cmp_hashes (texta, lena, textb, lenb); - else + if (hard_LC_COLLATE) { - /* Transform the text into the basis of comparison, so that byte - strings that would otherwise considered to be equal are - considered equal here even if their bytes differ. */ - - char *buf = NULL; - char stackbuf[4000]; - size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena); - bool a_fits = tlena <= sizeof stackbuf; - size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL), - (a_fits ? sizeof stackbuf - tlena : 0), - textb, lenb); - - if (a_fits && tlena + tlenb <= sizeof stackbuf) - buf = stackbuf; - else + /* Null-terminate the keys so that strxfrm knows where to stop. */ + char *lima = texta + lena; char enda = *lima; *lima = '\0'; + char *limb = textb + lenb; char endb = *limb; *limb = '\0'; + + while (true) { - /* Adding 1 to the buffer size lets xmemxfrm run a bit - faster by avoiding the need for an extra buffer copy. */ - buf = xmalloc (tlena + tlenb + 1); - xmemxfrm (buf, tlena + 1, texta, lena); - xmemxfrm (buf + tlena, tlenb + 1, textb, lenb); + /* Transform the text into the basis of comparison, so that byte + strings that would otherwise considered to be equal are + considered equal here even if their bytes differ. + + Each time through this loop, transform one + null-terminated string's worth from TEXTA or from TEXTB + or both. That way, there's no need to store the + transformation of the whole line, if it contains many + null-terminated strings. */ + + /* Store the transformed data into a big-enough buffer. */ + + size_t sizea = + (texta < lima ? xstrxfrm (buf, texta, bufsize) + 1 : 0); + bool a_fits = sizea <= bufsize; + size_t sizeb = + (textb < limb + ? (xstrxfrm ((a_fits ? buf + sizea : NULL), textb, + (a_fits ? bufsize - sizea : 0)) + + 1) + : 0); + + if (! (a_fits && sizea + sizeb <= bufsize)) + { + bufsize = sizea + sizeb; + if (bufsize < SIZE_MAX / 3) + bufsize = bufsize * 3 / 2; + buf = (buf == stackbuf + ? xmalloc (bufsize) + : xrealloc (buf, bufsize)); + if (texta < lima) + strxfrm (buf, texta, sizea); + if (textb < limb) + strxfrm (buf + sizea, textb, sizeb); + } + + /* Advance past NULs to the next part of each input string, + exiting the loop if both strings are exhausted. When + exiting the loop, prepare to finish off the tiebreaker + comparison properly. */ + if (texta < lima) + texta += strlen (texta) + 1; + if (textb < limb) + textb += strlen (textb) + 1; + if (! (texta < lima || textb < limb)) + { + lena = sizea; texta = buf; + lenb = sizeb; textb = buf + sizea; + break; + } + + /* Accumulate the transformed data in the corresponding + checksums. */ + md5_process_bytes (buf, sizea, &s[0]); + md5_process_bytes (buf + sizea, sizeb, &s[1]); + + /* Update the tiebreaker comparison of the transformed data. */ + if (! xfrm_diff) + { + xfrm_diff = memcmp (buf, buf + sizea, MIN (sizea, sizeb)); + if (! xfrm_diff) + xfrm_diff = (sizea > sizeb) - (sizea < sizeb); + } } - diff = cmp_hashes (buf, tlena, buf + tlena, tlenb); + *lima = enda; + *limb = endb; + } + + /* Compute and compare the checksums. */ + md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]); + md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]); + int diff = memcmp (dig[0], dig[1], sizeof dig[0]); + + /* Fall back on the tiebreaker if the checksums collide. */ + if (! diff) + { + if (! xfrm_diff) + { + xfrm_diff = memcmp (texta, textb, MIN (lena, lenb)); + if (! xfrm_diff) + xfrm_diff = (lena > lenb) - (lena < lenb); + } - if (buf != stackbuf) - free (buf); + diff = xfrm_diff; } + if (buf != stackbuf) + free (buf); + return diff; } |