diff options
author | Paul R. Eggert <eggert@cs.ucla.edu> | 2010-07-19 10:46:41 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2010-07-19 10:48:00 -0700 |
commit | e5444fa2a7a2ad4246e7a5e961d5c4aad2aecbe9 (patch) | |
tree | c0ec89d3e37b4de1cff1657fc7ccdaca057f7fa0 | |
parent | e7523efb7d73cefcbd76b499f19fb473f8eb2d13 (diff) | |
download | coreutils-e5444fa2a7a2ad4246e7a5e961d5c4aad2aecbe9.tar.xz |
sort: -R no longer disables multithreading
* src/sort.c (random_md5_state): New static var.
(random_md5_state_init): New function, to initialize random_md5_state.
(random_state, randread_source): Remove.
(cmp_hashes): Use random_md5_state rather than random_state.
Break ties using memcmp, not by getting more randomness.
If MD5 collisions turn into a problem in practice, we should
simply use a better checksum.
(main): If -R is given, call random_md5_state_init rather than
going single-threaded.
-rw-r--r-- | src/sort.c | 80 |
1 files changed, 27 insertions, 53 deletions
diff --git a/src/sort.c b/src/sort.c index 960df747a..afa45a8b3 100644 --- a/src/sort.c +++ b/src/sort.c @@ -2028,42 +2028,23 @@ getmonth (char const *month, size_t len, char const **ea) return 0; } -/* A source of random data. */ -static struct randread_source *randread_source; +/* A randomly chosen MD5 state, used for random comparison. */ +static struct md5_ctx random_md5_state; -/* Return the Ith randomly-generated state. The caller must invoke - random_state (H) for all H less than I before invoking random_state - (I). */ +/* Initialize the randomly chosen MD5 state. */ -static struct md5_ctx -random_state (size_t i) +static void +random_md5_state_init (char const *random_source) { - /* An array of states resulting from the random data, and counts of - its used and allocated members. */ - static struct md5_ctx *state; - static size_t used; - static size_t allocated; - - struct md5_ctx *s = &state[i]; - - if (used <= i) - { - unsigned char buf[MD5_DIGEST_SIZE]; - - used++; - - if (allocated <= i) - { - state = X2NREALLOC (state, &allocated); - s = &state[i]; - } - - randread (randread_source, buf, sizeof buf); - md5_init_ctx (s); - md5_process_bytes (buf, sizeof buf, s); - } - - return *s; + unsigned char buf[MD5_DIGEST_SIZE]; + struct randread_source *r = randread_new (random_source, sizeof buf); + if (! r) + die (_("open failed"), random_source); + randread (r, buf, sizeof buf); + if (randread_free (r) != 0) + die (_("close failed"), random_source); + md5_init_ctx (&random_md5_state); + md5_process_bytes (buf, sizeof buf, &random_md5_state); } /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB @@ -2078,19 +2059,19 @@ cmp_hashes (char const *texta, size_t lena, first pair of random hashes agree, check whether the keys are identical and if so report no difference. */ int diff; - size_t i; - for (i = 0; ; i++) + uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)]; + struct md5_ctx s[2]; + s[0] = s[1] = random_md5_state; + md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]); + md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]); + diff = memcmp (dig[0], dig[1], sizeof dig[0]); + if (! diff) { - uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)]; - struct md5_ctx s[2]; - s[0] = s[1] = random_state (i); - md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]); - md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]); - diff = memcmp (dig[0], dig[1], sizeof dig[0]); - if (diff != 0) - break; - if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0) - break; + /* Break ties with memcmp. This is good enough given the + astronomically low probability of an MD5 collision. */ + diff = memcmp (texta, textb, MIN (lena, lenb)); + if (! diff) + diff = lena < lenb ? -1 : lena != lenb; } return diff; @@ -4485,14 +4466,7 @@ main (int argc, char **argv) reverse = gkey.reverse; if (need_random) - { - /* Threading does not lock the randread_source structure, so - downgrade to one thread to avoid race conditions. */ - nthreads = 1; - randread_source = randread_new (random_source, MD5_DIGEST_SIZE); - if (! randread_source) - die (_("open failed"), random_source); - } + random_md5_state_init (random_source); if (temp_dir_count == 0) { |