summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Makefile.am3
-rw-r--r--lib/memxfrm.c105
-rw-r--r--lib/memxfrm.h2
-rw-r--r--lib/xmemxfrm.c62
-rw-r--r--lib/xmemxfrm.h2
-rw-r--r--m4/memxfrm.m415
-rw-r--r--m4/prereq.m43
-rw-r--r--po/POTFILES.in1
-rw-r--r--src/sort.c167
9 files changed, 118 insertions, 242 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index c89ef751d..b4a591b3f 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -20,8 +20,7 @@ include gnulib.mk
AM_CFLAGS += $(GNULIB_WARN_CFLAGS) $(WERROR_CFLAGS)
libcoreutils_a_SOURCES += \
- buffer-lcm.c buffer-lcm.h \
- xmemxfrm.c xmemxfrm.h
+ buffer-lcm.c buffer-lcm.h
libcoreutils_a_LIBADD += $(LIBOBJS)
libcoreutils_a_DEPENDENCIES += $(LIBOBJS)
diff --git a/lib/memxfrm.c b/lib/memxfrm.c
deleted file mode 100644
index 12a1ae9e4..000000000
--- a/lib/memxfrm.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/* Locale-specific memory transformation
-
- Copyright (C) 2006, 2009-2010 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-/* Written by Paul Eggert <eggert@cs.ucla.edu>. */
-
-#include <config.h>
-
-#include "memxfrm.h"
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-
-/* Store into DEST (of size DESTSIZE) the text in SRC (of size SRCSIZE)
- transformed so that the result of memcmp on two transformed texts
- (with ties going to the longer text) is the same as the result of
- memcoll on the two texts before their transformation. Perhaps
- temporarily modify the byte after SRC, but restore its original
- contents before returning.
-
- Return the size of the resulting text, or an indeterminate value if
- there is an error. Set errno to an error number if there is an
- error, and to zero otherwise. DEST contains an indeterminate value
- if there is an error or if the resulting size is greater than
- DESTSIZE. */
-
-size_t
-memxfrm (char *restrict dest, size_t destsize,
- char *restrict src, size_t srcsize)
-{
-#if HAVE_STRXFRM
-
- size_t di = 0;
- size_t si = 0;
- size_t result = 0;
-
- char ch = src[srcsize];
- src[srcsize] = '\0';
-
- while (si < srcsize)
- {
- size_t slen = strlen (src + si);
-
- size_t result0 = result;
- errno = 0;
- result += strxfrm (dest + di, src + si, destsize - di) + 1;
- if (errno != 0)
- break;
- if (result <= result0)
- {
- errno = ERANGE;
- break;
- }
-
- if (result == destsize + 1 && si + slen == srcsize)
- {
- /* The destination is exactly the right size, but strxfrm wants
- room for a trailing null. Work around the problem with a
- temporary buffer. */
- size_t bufsize = destsize - di + 1;
- char stackbuf[4000];
- char *buf = stackbuf;
- if (sizeof stackbuf < bufsize)
- {
- buf = malloc (bufsize);
- if (! buf)
- break;
- }
- strxfrm (buf, src + si, bufsize);
- memcpy (dest + di, buf, destsize - di);
- if (sizeof stackbuf < bufsize)
- free (buf);
- errno = 0;
- }
-
- di = (result < destsize ? result : destsize);
- si += slen + 1;
- }
-
- src[srcsize] = ch;
- return result - (si != srcsize);
-
-#else
-
- if (srcsize < destsize)
- memcpy (dest, src, srcsize);
- errno = 0;
- return srcsize;
-
-#endif
-}
diff --git a/lib/memxfrm.h b/lib/memxfrm.h
deleted file mode 100644
index 605421dc2..000000000
--- a/lib/memxfrm.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <stddef.h>
-size_t memxfrm (char *restrict, size_t, char *restrict, size_t);
diff --git a/lib/xmemxfrm.c b/lib/xmemxfrm.c
deleted file mode 100644
index 9cdda9833..000000000
--- a/lib/xmemxfrm.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Locale-specific memory transformation
-
- Copyright (C) 2006, 2009-2010 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-/* Written by Paul Eggert <eggert@cs.ucla.edu>. */
-
-#include <config.h>
-
-#include "xmemxfrm.h"
-
-#include <errno.h>
-#include <stdlib.h>
-
-#include "gettext.h"
-#define _(msgid) gettext (msgid)
-
-#include "error.h"
-#include "exitfail.h"
-#include "memxfrm.h"
-#include "quotearg.h"
-
-/* Store into DEST (of size DESTSIZE) the text in SRC (of size
- SRCSIZE) transformed so that the result of memcmp on two
- transformed texts (with ties going to the longer text) is the same
- as the result of memcoll on the two texts before their
- transformation. Perhaps temporarily modify the byte after SRC, but
- restore its original contents before returning.
-
- Return the size of the resulting text. DEST contains an
- indeterminate value if the resulting size is greater than DESTSIZE.
- Report an error and exit if there is an error. */
-
-size_t
-xmemxfrm (char *restrict dest, size_t destsize,
- char *restrict src, size_t srcsize)
-{
- size_t translated_size = memxfrm (dest, destsize, src, srcsize);
-
- if (errno)
- {
- error (0, errno, _("string transformation failed"));
- error (0, 0, _("set LC_ALL='C' to work around the problem"));
- error (exit_failure, 0,
- _("the untransformed string was %s"),
- quotearg_n_style_mem (0, locale_quoting_style, src, srcsize));
- }
-
- return translated_size;
-}
diff --git a/lib/xmemxfrm.h b/lib/xmemxfrm.h
deleted file mode 100644
index 7c4b8ad7c..000000000
--- a/lib/xmemxfrm.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <stddef.h>
-size_t xmemxfrm (char *restrict, size_t, char *restrict, size_t);
diff --git a/m4/memxfrm.m4 b/m4/memxfrm.m4
deleted file mode 100644
index 47a17d3bf..000000000
--- a/m4/memxfrm.m4
+++ /dev/null
@@ -1,15 +0,0 @@
-dnl Copyright (C) 2006, 2009-2010 Free Software Foundation, Inc.
-dnl This file is free software; the Free Software Foundation
-dnl gives unlimited permission to copy and/or distribute it,
-dnl with or without modifications, as long as this notice is preserved.
-
-AC_DEFUN([gl_MEMXFRM],
-[
- AC_LIBSOURCES([memxfrm.c, memxfrm.h])
- AC_LIBOBJ([memxfrm])
-
- AC_REQUIRE([AC_C_RESTRICT])
-
- dnl Prerequisites of lib/memcoll.c.
- AC_CHECK_FUNCS_ONCE([strxfrm])
-])
diff --git a/m4/prereq.m4 b/m4/prereq.m4
index 24478cb82..8caea38cf 100644
--- a/m4/prereq.m4
+++ b/m4/prereq.m4
@@ -1,4 +1,4 @@
-#serial 76
+#serial 77
dnl We use gl_ for non Autoconf macros.
m4_pattern_forbid([^gl_[ABCDEFGHIJKLMNOPQRSTUVXYZ]])dnl
@@ -40,7 +40,6 @@ AC_DEFUN([gl_PREREQ],
AC_REQUIRE([gl_FD_REOPEN])
AC_REQUIRE([gl_FUNC_XATTR])
AC_REQUIRE([gl_FUNC_XFTS])
- AC_REQUIRE([gl_MEMXFRM])
AC_REQUIRE([gl_STRINTCMP])
AC_REQUIRE([gl_STRNUMCMP])
])
diff --git a/po/POTFILES.in b/po/POTFILES.in
index c862877c4..be20f4c28 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -29,7 +29,6 @@ lib/version-etc.c
lib/xalloc-die.c
lib/xfreopen.c
lib/xmemcoll.c
-lib/xmemxfrm.c
lib/xprintf.c
lib/xstrtol-error.c
diff --git a/src/sort.c b/src/sort.c
index 1df711da7..54382631e 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -48,7 +48,6 @@
#include "stdlib--.h"
#include "strnumcmp.h"
#include "xmemcoll.h"
-#include "xmemxfrm.h"
#include "xnanosleep.h"
#include "xstrtol.h"
@@ -2001,34 +2000,24 @@ random_md5_state_init (char const *random_source)
md5_process_bytes (buf, sizeof buf, &random_md5_state);
}
-/* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
- with length LENGTHB. Return negative if less, zero if equal,
- positive if greater. */
+/* This is like strxfrm, except it reports any error and exits. */
-static int
-cmp_hashes (char const *texta, size_t lena,
- char const *textb, size_t lenb)
+static size_t
+xstrxfrm (char *restrict dest, char const *restrict src, size_t destsize)
{
- /* Try random hashes until a pair of hashes disagree. But if the
- first pair of random hashes agree, check whether the keys are
- identical and if so report no difference. */
- int diff;
- uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
- struct md5_ctx s[2];
- s[0] = s[1] = random_md5_state;
- md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
- md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
- diff = memcmp (dig[0], dig[1], sizeof dig[0]);
- if (! diff)
+ errno = 0;
+ size_t translated_size = strxfrm (dest, src, destsize);
+
+ if (errno)
{
- /* Break ties with memcmp. This is good enough given the
- astronomically low probability of an MD5 collision. */
- diff = memcmp (texta, textb, MIN (lena, lenb));
- if (! diff)
- diff = lena < lenb ? -1 : lena != lenb;
+ error (0, errno, _("string transformation failed"));
+ error (0, 0, _("set LC_ALL='C' to work around the problem"));
+ error (SORT_FAILURE, 0,
+ _("the untransformed string was %s"),
+ quotearg_n_style (0, locale_quoting_style, src));
}
- return diff;
+ return translated_size;
}
/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
@@ -2038,41 +2027,117 @@ static int
compare_random (char *restrict texta, size_t lena,
char *restrict textb, size_t lenb)
{
- int diff;
+ /* XFRM_DIFF records the equivalent of memcmp on the transformed
+ data. This is used to break ties if there is an checksum
+ collision, and this is good enough given the astronomically low
+ probability of a collision. */
+ int xfrm_diff = 0;
+
+ char stackbuf[4000];
+ char *buf = stackbuf;
+ size_t bufsize = sizeof stackbuf;
+ uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
+ struct md5_ctx s[2];
+ s[0] = s[1] = random_md5_state;
- if (! hard_LC_COLLATE)
- diff = cmp_hashes (texta, lena, textb, lenb);
- else
+ if (hard_LC_COLLATE)
{
- /* Transform the text into the basis of comparison, so that byte
- strings that would otherwise considered to be equal are
- considered equal here even if their bytes differ. */
-
- char *buf = NULL;
- char stackbuf[4000];
- size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
- bool a_fits = tlena <= sizeof stackbuf;
- size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
- (a_fits ? sizeof stackbuf - tlena : 0),
- textb, lenb);
-
- if (a_fits && tlena + tlenb <= sizeof stackbuf)
- buf = stackbuf;
- else
+ /* Null-terminate the keys so that strxfrm knows where to stop. */
+ char *lima = texta + lena; char enda = *lima; *lima = '\0';
+ char *limb = textb + lenb; char endb = *limb; *limb = '\0';
+
+ while (true)
{
- /* Adding 1 to the buffer size lets xmemxfrm run a bit
- faster by avoiding the need for an extra buffer copy. */
- buf = xmalloc (tlena + tlenb + 1);
- xmemxfrm (buf, tlena + 1, texta, lena);
- xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
+ /* Transform the text into the basis of comparison, so that byte
+ strings that would otherwise considered to be equal are
+ considered equal here even if their bytes differ.
+
+ Each time through this loop, transform one
+ null-terminated string's worth from TEXTA or from TEXTB
+ or both. That way, there's no need to store the
+ transformation of the whole line, if it contains many
+ null-terminated strings. */
+
+ /* Store the transformed data into a big-enough buffer. */
+
+ size_t sizea =
+ (texta < lima ? xstrxfrm (buf, texta, bufsize) + 1 : 0);
+ bool a_fits = sizea <= bufsize;
+ size_t sizeb =
+ (textb < limb
+ ? (xstrxfrm ((a_fits ? buf + sizea : NULL), textb,
+ (a_fits ? bufsize - sizea : 0))
+ + 1)
+ : 0);
+
+ if (! (a_fits && sizea + sizeb <= bufsize))
+ {
+ bufsize = sizea + sizeb;
+ if (bufsize < SIZE_MAX / 3)
+ bufsize = bufsize * 3 / 2;
+ buf = (buf == stackbuf
+ ? xmalloc (bufsize)
+ : xrealloc (buf, bufsize));
+ if (texta < lima)
+ strxfrm (buf, texta, sizea);
+ if (textb < limb)
+ strxfrm (buf + sizea, textb, sizeb);
+ }
+
+ /* Advance past NULs to the next part of each input string,
+ exiting the loop if both strings are exhausted. When
+ exiting the loop, prepare to finish off the tiebreaker
+ comparison properly. */
+ if (texta < lima)
+ texta += strlen (texta) + 1;
+ if (textb < limb)
+ textb += strlen (textb) + 1;
+ if (! (texta < lima || textb < limb))
+ {
+ lena = sizea; texta = buf;
+ lenb = sizeb; textb = buf + sizea;
+ break;
+ }
+
+ /* Accumulate the transformed data in the corresponding
+ checksums. */
+ md5_process_bytes (buf, sizea, &s[0]);
+ md5_process_bytes (buf + sizea, sizeb, &s[1]);
+
+ /* Update the tiebreaker comparison of the transformed data. */
+ if (! xfrm_diff)
+ {
+ xfrm_diff = memcmp (buf, buf + sizea, MIN (sizea, sizeb));
+ if (! xfrm_diff)
+ xfrm_diff = (sizea > sizeb) - (sizea < sizeb);
+ }
}
- diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
+ *lima = enda;
+ *limb = endb;
+ }
+
+ /* Compute and compare the checksums. */
+ md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
+ md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
+ int diff = memcmp (dig[0], dig[1], sizeof dig[0]);
+
+ /* Fall back on the tiebreaker if the checksums collide. */
+ if (! diff)
+ {
+ if (! xfrm_diff)
+ {
+ xfrm_diff = memcmp (texta, textb, MIN (lena, lenb));
+ if (! xfrm_diff)
+ xfrm_diff = (lena > lenb) - (lena < lenb);
+ }
- if (buf != stackbuf)
- free (buf);
+ diff = xfrm_diff;
}
+ if (buf != stackbuf)
+ free (buf);
+
return diff;
}