summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul R. Eggert <eggert@cs.ucla.edu>2010-07-19 10:46:41 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2010-07-19 10:48:00 -0700
commite5444fa2a7a2ad4246e7a5e961d5c4aad2aecbe9 (patch)
treec0ec89d3e37b4de1cff1657fc7ccdaca057f7fa0
parente7523efb7d73cefcbd76b499f19fb473f8eb2d13 (diff)
downloadcoreutils-e5444fa2a7a2ad4246e7a5e961d5c4aad2aecbe9.tar.xz
sort: -R no longer disables multithreading
* src/sort.c (random_md5_state): New static var. (random_md5_state_init): New function, to initialize random_md5_state. (random_state, randread_source): Remove. (cmp_hashes): Use random_md5_state rather than random_state. Break ties using memcmp, not by getting more randomness. If MD5 collisions turn into a problem in practice, we should simply use a better checksum. (main): If -R is given, call random_md5_state_init rather than going single-threaded.
-rw-r--r--src/sort.c80
1 files changed, 27 insertions, 53 deletions
diff --git a/src/sort.c b/src/sort.c
index 960df747a..afa45a8b3 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -2028,42 +2028,23 @@ getmonth (char const *month, size_t len, char const **ea)
return 0;
}
-/* A source of random data. */
-static struct randread_source *randread_source;
+/* A randomly chosen MD5 state, used for random comparison. */
+static struct md5_ctx random_md5_state;
-/* Return the Ith randomly-generated state. The caller must invoke
- random_state (H) for all H less than I before invoking random_state
- (I). */
+/* Initialize the randomly chosen MD5 state. */
-static struct md5_ctx
-random_state (size_t i)
+static void
+random_md5_state_init (char const *random_source)
{
- /* An array of states resulting from the random data, and counts of
- its used and allocated members. */
- static struct md5_ctx *state;
- static size_t used;
- static size_t allocated;
-
- struct md5_ctx *s = &state[i];
-
- if (used <= i)
- {
- unsigned char buf[MD5_DIGEST_SIZE];
-
- used++;
-
- if (allocated <= i)
- {
- state = X2NREALLOC (state, &allocated);
- s = &state[i];
- }
-
- randread (randread_source, buf, sizeof buf);
- md5_init_ctx (s);
- md5_process_bytes (buf, sizeof buf, s);
- }
-
- return *s;
+ unsigned char buf[MD5_DIGEST_SIZE];
+ struct randread_source *r = randread_new (random_source, sizeof buf);
+ if (! r)
+ die (_("open failed"), random_source);
+ randread (r, buf, sizeof buf);
+ if (randread_free (r) != 0)
+ die (_("close failed"), random_source);
+ md5_init_ctx (&random_md5_state);
+ md5_process_bytes (buf, sizeof buf, &random_md5_state);
}
/* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
@@ -2078,19 +2059,19 @@ cmp_hashes (char const *texta, size_t lena,
first pair of random hashes agree, check whether the keys are
identical and if so report no difference. */
int diff;
- size_t i;
- for (i = 0; ; i++)
+ uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
+ struct md5_ctx s[2];
+ s[0] = s[1] = random_md5_state;
+ md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
+ md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
+ diff = memcmp (dig[0], dig[1], sizeof dig[0]);
+ if (! diff)
{
- uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
- struct md5_ctx s[2];
- s[0] = s[1] = random_state (i);
- md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
- md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
- diff = memcmp (dig[0], dig[1], sizeof dig[0]);
- if (diff != 0)
- break;
- if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
- break;
+ /* Break ties with memcmp. This is good enough given the
+ astronomically low probability of an MD5 collision. */
+ diff = memcmp (texta, textb, MIN (lena, lenb));
+ if (! diff)
+ diff = lena < lenb ? -1 : lena != lenb;
}
return diff;
@@ -4485,14 +4466,7 @@ main (int argc, char **argv)
reverse = gkey.reverse;
if (need_random)
- {
- /* Threading does not lock the randread_source structure, so
- downgrade to one thread to avoid race conditions. */
- nthreads = 1;
- randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
- if (! randread_source)
- die (_("open failed"), random_source);
- }
+ random_md5_state_init (random_source);
if (temp_dir_count == 0)
{