From 73742c2566e6f8d56b3b132026ad477b9924e774 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 8 Aug 2006 22:22:47 +0000 Subject: New file, introduced for shuf, sort -R, and/or shred. --- lib/memxfrm.c | 108 +++++++++++++++ lib/memxfrm.h | 2 + lib/rand-isaac.c | 300 +++++++++++++++++++++++++++++++++++++++++ lib/rand-isaac.h | 44 ++++++ lib/randint.c | 230 +++++++++++++++++++++++++++++++ lib/randint.h | 52 ++++++++ lib/randperm.c | 105 +++++++++++++++ lib/randperm.h | 4 + lib/randread.c | 298 +++++++++++++++++++++++++++++++++++++++++ lib/randread.h | 34 +++++ lib/xmemxfrm.c | 65 +++++++++ lib/xmemxfrm.h | 2 + m4/memxfrm.m4 | 15 +++ m4/randint.m4 | 12 ++ m4/randperm.m4 | 10 ++ m4/randread.m4 | 11 ++ man/shuf.x | 4 + src/shuf.c | 401 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/misc/shuf | 37 +++++ 19 files changed, 1734 insertions(+) create mode 100644 lib/memxfrm.c create mode 100644 lib/memxfrm.h create mode 100644 lib/rand-isaac.c create mode 100644 lib/rand-isaac.h create mode 100644 lib/randint.c create mode 100644 lib/randint.h create mode 100644 lib/randperm.c create mode 100644 lib/randperm.h create mode 100644 lib/randread.c create mode 100644 lib/randread.h create mode 100644 lib/xmemxfrm.c create mode 100644 lib/xmemxfrm.h create mode 100644 m4/memxfrm.m4 create mode 100644 m4/randint.m4 create mode 100644 m4/randperm.m4 create mode 100644 m4/randread.m4 create mode 100644 man/shuf.x create mode 100644 src/shuf.c create mode 100755 tests/misc/shuf diff --git a/lib/memxfrm.c b/lib/memxfrm.c new file mode 100644 index 000000000..79db94b8f --- /dev/null +++ b/lib/memxfrm.c @@ -0,0 +1,108 @@ +/* Locale-specific memory transformation + + Copyright (C) 2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Paul Eggert . */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "memxfrm.h" + +#include +#include +#include + +/* Store into DEST (of size DESTSIZE) the text in SRC (of size SRCSIZE) + transformed so that the result of memcmp on two transformed texts + (with ties going to the longer text) is the same as the result of + memcoll on the two texts before their transformation. Perhaps + temporarily modify the byte after SRC, but restore its original + contents before returning. + + Return the size of the resulting text, or an indeterminate value if + there is an error. Set errno to an error number if there is an + error, and to zero otherwise. DEST contains an indeterminate value + if there is an error or if the resulting size is greater than + DESTSIZE. */ + +size_t +memxfrm (char *restrict dest, size_t destsize, + char *restrict src, size_t srcsize) +{ +#if HAVE_STRXFRM + + size_t di = 0; + size_t si = 0; + size_t result = 0; + + char ch = src[srcsize]; + src[srcsize] = '\0'; + + while (si < srcsize) + { + size_t slen = strlen (src + si); + + size_t result0 = result; + errno = 0; + result += strxfrm (dest + di, src + si, destsize - di) + 1; + if (errno != 0) + break; + if (result <= result0) + { + errno = ERANGE; + break; + } + + if (result == destsize + 1 && si + slen == srcsize) + { + /* The destination is exactly the right size, but strxfrm wants + room for a trailing null. Work around the problem with a + temporary buffer. */ + size_t bufsize = destsize - di + 1; + char stackbuf[4000]; + char *buf = stackbuf; + if (sizeof stackbuf < bufsize) + { + buf = malloc (bufsize); + if (! buf) + break; + } + strxfrm (buf, src + si, bufsize); + memcpy (dest + di, buf, destsize - di); + if (sizeof stackbuf < bufsize) + free (buf); + errno = 0; + } + + di = (result < destsize ? result : destsize); + si += slen + 1; + } + + src[srcsize] = ch; + return result - (si != srcsize); + +#else + + if (srcsize < destsize) + memcpy (dest, src, srcsize); + errno = 0; + return srcsize; + +#endif +} diff --git a/lib/memxfrm.h b/lib/memxfrm.h new file mode 100644 index 000000000..605421dc2 --- /dev/null +++ b/lib/memxfrm.h @@ -0,0 +1,2 @@ +#include +size_t memxfrm (char *restrict, size_t, char *restrict, size_t); diff --git a/lib/rand-isaac.c b/lib/rand-isaac.c new file mode 100644 index 000000000..6a0185934 --- /dev/null +++ b/lib/rand-isaac.c @@ -0,0 +1,300 @@ +/* Bob Jenkins's cryptographic random number generator, ISAAC. + + Copyright (C) 1999-2006 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999 Colin Plumb. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Written by Colin Plumb. */ + +/* + * -------------------------------------------------------------------- + * We need a source of random numbers for some data. + * Cryptographically secure is desirable, but it's not life-or-death + * so I can be a little bit experimental in the choice of RNGs here. + * + * This generator is based somewhat on RC4, but has analysis + * + * pointing to it actually being better. I like it because it's nice + * and fast, and because the author did good work analyzing it. + * -------------------------------------------------------------------- + */ + +#include "rand-isaac.h" + +#include +#include + +#include "gethrxtime.h" + + +/* This index operation is more efficient on many processors */ +#define ind(mm, x) \ + (* (uint32_t *) ((char *) (mm) \ + + ((x) & (ISAAC_WORDS - 1) * sizeof (uint32_t)))) + +/* + * The central step. This uses two temporaries, x and y. mm is the + * whole state array, while m is a pointer to the current word. off is + * the offset from m to the word ISAAC_WORDS/2 words away in the mm array, + * i.e. +/- ISAAC_WORDS/2. + */ +#define isaac_step(mix, a, b, mm, m, off, r) \ +( \ + a = ((a) ^ (mix)) + (m)[off], \ + x = *(m), \ + *(m) = y = ind (mm, x) + (a) + (b), \ + *(r) = b = ind (mm, (y) >> ISAAC_LOG) + x \ +) + +/* Use and update *S to generate random data to fill R. */ +void +isaac_refill (struct isaac_state *s, uint32_t r[ISAAC_WORDS]) +{ + uint32_t a, b; /* Caches of a and b */ + uint32_t x, y; /* Temps needed by isaac_step macro */ + uint32_t *m = s->mm; /* Pointer into state array */ + + a = s->a; + b = s->b + (++s->c); + + do + { + isaac_step (a << 13, a, b, s->mm, m, ISAAC_WORDS / 2, r); + isaac_step (a >> 6, a, b, s->mm, m + 1, ISAAC_WORDS / 2, r + 1); + isaac_step (a << 2, a, b, s->mm, m + 2, ISAAC_WORDS / 2, r + 2); + isaac_step (a >> 16, a, b, s->mm, m + 3, ISAAC_WORDS / 2, r + 3); + r += 4; + } + while ((m += 4) < s->mm + ISAAC_WORDS / 2); + do + { + isaac_step (a << 13, a, b, s->mm, m, -ISAAC_WORDS / 2, r); + isaac_step (a >> 6, a, b, s->mm, m + 1, -ISAAC_WORDS / 2, r + 1); + isaac_step (a << 2, a, b, s->mm, m + 2, -ISAAC_WORDS / 2, r + 2); + isaac_step (a >> 16, a, b, s->mm, m + 3, -ISAAC_WORDS / 2, r + 3); + r += 4; + } + while ((m += 4) < s->mm + ISAAC_WORDS); + s->a = a; + s->b = b; +} + +/* + * The basic seed-scrambling step for initialization, based on Bob + * Jenkins' 256-bit hash. + */ +#define mix(a,b,c,d,e,f,g,h) \ + ( a ^= b << 11, d += a, \ + b += c, b ^= c >> 2, e += b, \ + c += d, c ^= d << 8, f += c, \ + d += e, d ^= e >> 16, g += d, \ + e += f, e ^= f << 10, h += e, \ + f += g, f ^= g >> 4, a += f, \ + g += h, g ^= h << 8, b += g, \ + h += a, h ^= a >> 9, c += h, \ + a += b ) + +/* The basic ISAAC initialization pass. */ +static void +isaac_mix (struct isaac_state *s, uint32_t const seed[/* ISAAC_WORDS */]) +{ + int i; + uint32_t a = s->iv[0]; + uint32_t b = s->iv[1]; + uint32_t c = s->iv[2]; + uint32_t d = s->iv[3]; + uint32_t e = s->iv[4]; + uint32_t f = s->iv[5]; + uint32_t g = s->iv[6]; + uint32_t h = s->iv[7]; + + for (i = 0; i < ISAAC_WORDS; i += 8) + { + a += seed[i]; + b += seed[i + 1]; + c += seed[i + 2]; + d += seed[i + 3]; + e += seed[i + 4]; + f += seed[i + 5]; + g += seed[i + 6]; + h += seed[i + 7]; + + mix (a, b, c, d, e, f, g, h); + + s->mm[i] = a; + s->mm[i + 1] = b; + s->mm[i + 2] = c; + s->mm[i + 3] = d; + s->mm[i + 4] = e; + s->mm[i + 5] = f; + s->mm[i + 6] = g; + s->mm[i + 7] = h; + } + + s->iv[0] = a; + s->iv[1] = b; + s->iv[2] = c; + s->iv[3] = d; + s->iv[4] = e; + s->iv[5] = f; + s->iv[6] = g; + s->iv[7] = h; +} + +#if 0 /* Provided for reference only; not used in this code */ +/* + * Initialize the ISAAC RNG with the given seed material. + * Its size MUST be a multiple of ISAAC_BYTES, and may be + * stored in the s->mm array. + * + * This is a generalization of the original ISAAC initialization code + * to support larger seed sizes. For seed sizes of 0 and ISAAC_BYTES, + * it is identical. + */ +static void +isaac_init (struct isaac_state *s, uint32_t const *seed, size_t seedsize) +{ + static uint32_t const iv[8] = + { + 0x1367df5a, 0x95d90059, 0xc3163e4b, 0x0f421ad8, + 0xd92a4a78, 0xa51a3c49, 0xc4efea1b, 0x30609119}; + int i; + +# if 0 + /* The initialization of iv is a precomputed form of: */ + for (i = 0; i < 7; i++) + iv[i] = 0x9e3779b9; /* the golden ratio */ + for (i = 0; i < 4; ++i) /* scramble it */ + mix (iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]); +# endif + s->a = s->b = s->c = 0; + + for (i = 0; i < 8; i++) + s->iv[i] = iv[i]; + + if (seedsize) + { + /* First pass (as in reference ISAAC code) */ + isaac_mix (s, seed); + /* Second and subsequent passes (extension to ISAAC) */ + while (seedsize -= ISAAC_BYTES) + { + seed += ISAAC_WORDS; + for (i = 0; i < ISAAC_WORDS; i++) + s->mm[i] += seed[i]; + isaac_mix (s, s->mm); + } + } + else + { + /* The no seed case (as in reference ISAAC code) */ + for (i = 0; i < ISAAC_WORDS; i++) + s->mm[i] = 0; + } + + /* Final pass */ + isaac_mix (s, s->mm); +} +#endif + +/* Initialize *S to a somewhat-random value. */ +static void +isaac_seed_start (struct isaac_state *s) +{ + static uint32_t const iv[8] = + { + 0x1367df5a, 0x95d90059, 0xc3163e4b, 0x0f421ad8, + 0xd92a4a78, 0xa51a3c49, 0xc4efea1b, 0x30609119 + }; + +#if 0 + /* The initialization of iv is a precomputed form of: */ + int i; + for (i = 0; i < 7; i++) + iv[i] = 0x9e3779b9; /* the golden ratio */ + for (i = 0; i < 4; ++i) /* scramble it */ + mix (iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]); +#endif + + memset (s->mm, 0, sizeof s->mm); + memcpy (s->iv, iv, sizeof s->iv); + + /* s->c gets used for a data pointer during the seeding phase */ + s->a = s->b = s->c = 0; +} + +/* Add a buffer of seed material. */ +static void +isaac_seed_data (struct isaac_state *s, void const *buffer, size_t size) +{ + unsigned char const *buf = buffer; + unsigned char *p; + size_t avail; + size_t i; + + avail = sizeof s->mm - s->c; /* s->c is used as a write pointer */ + + /* Do any full buffers that are necessary */ + while (size > avail) + { + p = (unsigned char *) s->mm + s->c; + for (i = 0; i < avail; i++) + p[i] ^= buf[i]; + buf += avail; + size -= avail; + isaac_mix (s, s->mm); + s->c = 0; + avail = sizeof s->mm; + } + + /* And the final partial block */ + p = (unsigned char *) s->mm + s->c; + for (i = 0; i < size; i++) + p[i] ^= buf[i]; + s->c = size; +} + + +/* End of seeding phase; get everything ready to produce output. */ +static void +isaac_seed_finish (struct isaac_state *s) +{ + isaac_mix (s, s->mm); + isaac_mix (s, s->mm); + /* Now reinitialize c to start things off right */ + s->c = 0; +} +#define ISAAC_SEED(s,x) isaac_seed_data (s, &(x), sizeof (x)) + +/* Initialize *S to a somewhat-random value; this starts seeding, + seeds with somewhat-random data, and finishes seeding. */ +void +isaac_seed (struct isaac_state *s) +{ + isaac_seed_start (s); + + { pid_t t = getpid (); ISAAC_SEED (s, t); } + { pid_t t = getppid (); ISAAC_SEED (s, t); } + { uid_t t = getuid (); ISAAC_SEED (s, t); } + { gid_t t = getgid (); ISAAC_SEED (s, t); } + + { + xtime_t t = gethrxtime (); + ISAAC_SEED (s, t); + } + + isaac_seed_finish (s); +} diff --git a/lib/rand-isaac.h b/lib/rand-isaac.h new file mode 100644 index 000000000..f8daee03d --- /dev/null +++ b/lib/rand-isaac.h @@ -0,0 +1,44 @@ +/* Bob Jenkins's cryptographic random number generator, ISAAC. + + Copyright (C) 1999-2005 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999 Colin Plumb. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Written by Colin Plumb. */ + +#ifndef RAND_ISAAC_H +# define RAND_ISAAC_H + +# include + +/* Size of the state tables to use. ISAAC_LOG should be at least 3, + and smaller values give less security. */ +# define ISAAC_LOG 8 +# define ISAAC_WORDS (1 << ISAAC_LOG) +# define ISAAC_BYTES (ISAAC_WORDS * sizeof (uint32_t)) + +/* RNG state variables. The members of this structure are private. */ +struct isaac_state + { + uint32_t mm[ISAAC_WORDS]; /* Main state array */ + uint32_t iv[8]; /* Seeding initial vector */ + uint32_t a, b, c; /* Extra index variables */ + }; + +void isaac_seed (struct isaac_state *); +void isaac_refill (struct isaac_state *, uint32_t[ISAAC_WORDS]); + +#endif diff --git a/lib/randint.c b/lib/randint.c new file mode 100644 index 000000000..e461bf716 --- /dev/null +++ b/lib/randint.c @@ -0,0 +1,230 @@ +/* Generate random integers. + + Copyright (C) 2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* Written by Paul Eggert. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "randint.h" + +#include +#include +#include +#include + + +#if TEST +# include +# include + +int +main (int argc, char **argv) +{ + randint i; + randint n = strtoumax (argv[1], NULL, 10); + randint choices = strtoumax (argv[2], NULL, 10); + char const *name = argv[3]; + struct randint_source *ints = randint_all_new (name, SIZE_MAX); + + for (i = 0; i < n; i++) + printf ("%"PRIuMAX"\n", randint_choose (ints, choices)); + + return (randint_all_free (ints) == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} +#endif + + +#include "xalloc.h" + +#ifndef MAX +# define MAX(a,b) ((a) < (b) ? (b) : (a)) +#endif + +/* A source of random data for generating random integers. */ +struct randint_source +{ + /* The source of random bytes. */ + struct randread_source *source; + + /* RANDNUM is a buffered random integer, whose information has not + yet been delivered to the caller. It is uniformly distributed in + the range 0 <= RANDNUM <= RANDMAX. If RANDMAX is zero, then + RANDNUM must be zero (and in some sense it is not really + "random"). */ + randint randnum; + randint randmax; +}; + +/* Create a new randint_source from SOURCE. */ + +struct randint_source * +randint_new (struct randread_source *source) +{ + struct randint_source *s = xmalloc (sizeof *s); + s->source = source; + s->randnum = s->randmax = 0; + return s; +} + +/* Create a new randint_source by creating a randread_source from + NAME and ESTIMATED_BYTES. Return NULL (setting errno) if + unsuccessful. */ + +struct randint_source * +randint_all_new (char const *name, size_t bytes_bound) +{ + struct randread_source *source = randread_new (name, bytes_bound); + return (source ? randint_new (source) : NULL); +} + +/* Return the random data source of *S. */ + +struct randread_source * +randint_get_source (struct randint_source const *s) +{ + return s->source; +} + +/* HUGE_BYTES is true on hosts hosts where randint and unsigned char + have the same width and where shifting by the word size therefore + has undefined behavior. */ +enum { HUGE_BYTES = RANDINT_MAX == UCHAR_MAX }; + +/* Return X shifted left by CHAR_BIT bits. */ +static inline randint shift_left (randint x) +{ + return HUGE_BYTES ? 0 : x << CHAR_BIT; +} + +/* Return X shifted right by CHAR_BIT bits. */ +static inline randint +shift_right (randint x) +{ + return HUGE_BYTES ? 0 : x >> CHAR_BIT; +} + + +/* Consume random data from *S to generate a random number in the range + 0 .. GENMAX. */ + +randint +randint_genmax (struct randint_source *s, randint genmax) +{ + struct randread_source *source = s->source; + randint randnum = s->randnum; + randint randmax = s->randmax; + randint choices = genmax + 1; + + for (;;) + { + if (randmax < genmax) + { + /* Calculate how many input bytes will be needed, and read + the bytes. */ + + size_t i = 0; + randint rmax = randmax; + unsigned char buf[sizeof randnum]; + + do + { + rmax = shift_left (rmax) + UCHAR_MAX; + i++; + } + while (rmax < genmax); + + randread (source, buf, i); + + /* Increase RANDMAX by appending random bytes to RANDNUM and + UCHAR_MAX to RANDMAX until RANDMAX is no less than + GENMAX. This may lose up to CHAR_BIT bits of information + if shift_right (RANDINT_MAX) < GENMAX, but it is not + worth the programming hassle of saving these bits since + GENMAX is rarely that large in practice. */ + + i = 0; + + do + { + randnum = shift_left (randnum) + buf[i]; + randmax = shift_left (randmax) + UCHAR_MAX; + i++; + } + while (randmax < genmax); + } + + if (randmax == genmax) + { + s->randnum = s->randmax = 0; + return randnum; + } + else + { + /* GENMAX < RANDMAX, so attempt to generate a random number + by taking RANDNUM modulo GENMAX+1. This will choose + fairly so long as RANDNUM falls within an integral + multiple of GENMAX+1; otherwise, LAST_USABLE_CHOICE < RANDNUM, + so discard this attempt and try again. + + Since GENMAX cannot be RANDINT_MAX, CHOICES cannot be + zero and there is no need to worry about dividing by + zero. */ + + randint excess_choices = randmax - genmax; + randint unusable_choices = excess_choices % choices; + randint last_usable_choice = randmax - unusable_choices; + randint reduced_randnum = randnum % choices; + + if (randnum <= last_usable_choice) + { + s->randnum = randnum / choices; + s->randmax = excess_choices / choices; + return reduced_randnum; + } + + /* Retry, but retain the randomness from the fact that RANDNUM fell + into the range LAST_USABLE_CHOICE+1 .. RANDMAX. */ + randnum = reduced_randnum; + randmax = unusable_choices - 1; + } + } +} + +/* Clear *S so that it no longer contains undelivered random data. */ + +void +randint_free (struct randint_source *s) +{ + memset (s, 0, sizeof *s); + free (s); +} + +/* Likewise, but also clear the underlying randread object. Return + 0 if successful, -1 (setting errno) otherwise. */ + +int +randint_all_free (struct randint_source *s) +{ + int r = randread_free (s->source); + int e = errno; + randint_free (s); + errno = e; + return r; +} diff --git a/lib/randint.h b/lib/randint.h new file mode 100644 index 000000000..4245a6d3f --- /dev/null +++ b/lib/randint.h @@ -0,0 +1,52 @@ +/* Generate random integers. + + Copyright (C) 2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* Written by Paul Eggert. */ + +#ifndef RANDINT_H + +# define RANDINT_H 1 + +# include + +# include "randread.h" + +/* An unsigned integer type, used for random integers, and its maximum + value. */ +typedef uintmax_t randint; +# define RANDINT_MAX UINTMAX_MAX + +struct randint_source; + +struct randint_source *randint_new (struct randread_source *); +struct randint_source *randint_all_new (char const *, size_t); +struct randread_source *randint_get_source (struct randint_source const *); +randint randint_genmax (struct randint_source *, randint genmax); + +/* Consume random data from *S to generate a random number in the range + 0 .. CHOICES-1. CHOICES must be nonzero. */ +static inline randint +randint_choose (struct randint_source *s, randint choices) +{ + return randint_genmax (s, choices - 1); +} + +void randint_free (struct randint_source *); +int randint_all_free (struct randint_source *); + +#endif diff --git a/lib/randperm.c b/lib/randperm.c new file mode 100644 index 000000000..96f049446 --- /dev/null +++ b/lib/randperm.c @@ -0,0 +1,105 @@ +/* Generate random permutations. + + Copyright (C) 2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* Written by Paul Eggert. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "randperm.h" + +#include + +#include "xalloc.h" + +/* Return the ceiling of the log base 2 of N. If N is zero, return + an unspecified value. */ + +static size_t +ceil_lg (size_t n) +{ + size_t b = 0; + for (n--; n != 0; n /= 2) + b++; + return b; +} + +/* Return an upper bound on the number of random bytes needed to + generate the first H elements of a random permutation of N + elements. H must not exceed N. */ + +size_t +randperm_bound (size_t h, size_t n) +{ + /* Upper bound on number of bits needed to generate the first number + of the permutation. */ + size_t lg_n = ceil_lg (n); + + /* Upper bound on number of bits needed to generated the first H elements. */ + size_t ar = lg_n * h; + + /* Convert the bit count to a byte count. */ + size_t bound = (ar + CHAR_BIT - 1) / CHAR_BIT; + + return bound; +} + +/* From R, allocate and return the first H elements of a random + permutation of N elements. H must not exceed N. Return NULL if H + is zero. */ + +size_t * +randperm_new (struct randint_source *r, size_t h, size_t n) +{ + size_t *v; + + switch (h) + { + case 0: + v = NULL; + break; + + case 1: + v = xmalloc (sizeof *v); + v[0] = randint_choose (r, n); + break; + + default: + { + size_t i; + + v = xnmalloc (n, sizeof *v); + for (i = 0; i < n; i++) + v[i] = i; + + for (i = 0; i < h; i++) + { + size_t j = i + randint_choose (r, n - i); + size_t t = v[i]; + v[i] = v[j]; + v[j] = t; + } + + v = xnrealloc (v, h, sizeof *v); + } + break; + } + + return v; +} diff --git a/lib/randperm.h b/lib/randperm.h new file mode 100644 index 000000000..79bbf9fe6 --- /dev/null +++ b/lib/randperm.h @@ -0,0 +1,4 @@ +#include "randint.h" +#include +size_t randperm_bound (size_t, size_t); +size_t *randperm_new (struct randint_source *, size_t, size_t); diff --git a/lib/randread.c b/lib/randread.c new file mode 100644 index 000000000..ff3f86900 --- /dev/null +++ b/lib/randread.c @@ -0,0 +1,298 @@ +/* Generate buffers of random data. + + Copyright (C) 2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* Written by Paul Eggert. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "randread.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +#include "rand-isaac.h" +#include "stdio-safer.h" +#include "unlocked-io.h" +#include "xalloc.h" + +#ifndef MIN +# define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8) || __STRICT_ANSI__ +# define __attribute__(x) +#endif + +#ifndef ATTRIBUTE_UNUSED +# define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +#endif + +#if _STRING_ARCH_unaligned +# define ALIGNED_POINTER(ptr, type) true +#else +# define alignof(type) offsetof (struct { char c; type x; }, x) +# define ALIGNED_POINTER(ptr, type) ((size_t) (ptr) % alignof (type) == 0) +#endif + +#ifndef DEFAULT_RANDOM_FILE +# define DEFAULT_RANDOM_FILE "/dev/urandom" +#endif + +/* The maximum buffer size used for reads of random data. Using the + value 2 * ISAAC_BYTES makes this the largest power of two that + would not otherwise cause struct randread_source to grow. */ +#define RANDREAD_BUFFER_SIZE (2 * ISAAC_BYTES) + +/* A source of random data for generating random buffers. */ +struct randread_source +{ + /* Stream to read random bytes from. If null, the behavior is + undefined; the current implementation uses ISAAC in this case, + but this is for old-fashioned implementations that lack + /dev/urandom and callers should not rely on this. */ + FILE *source; + + /* Function to call, and its argument, if there is an input error or + end of file when reading from the stream; errno is nonzero if + there was an error. If this function returns, it should fix the + problem before returning. The default handler assumes that + handler_arg is the file name of the source. */ + void (*handler) (void *); + void *handler_arg; + + /* The buffer for SOURCE. It's kept here to simplify storage + allocation and to make it easier to clear out buffered random + data. */ + union + { + /* The stream buffer, if SOURCE is not null. */ + char c[RANDREAD_BUFFER_SIZE]; + + /* The buffered ISAAC pseudorandom buffer, if SOURCE is null. */ + struct isaac + { + /* The number of bytes that are buffered at the end of data.b. */ + size_t buffered; + + /* State of the ISAAC generator. */ + struct isaac_state state; + + /* Up to a buffer's worth of pseudorandom data. */ + union + { + uint32_t w[ISAAC_WORDS]; + unsigned char b[ISAAC_BYTES]; + } data; + } isaac; + } buf; +}; + + +/* The default error handler. */ + +static void +randread_error (void *file_name) +{ + if (file_name) + error (exit_failure, errno, + _(errno == 0 ? "%s: end of file" : "%s: read error"), + quotearg_colon (file_name)); + abort (); +} + +/* Simply return a new randread_source object with the default error + handler. */ + +static struct randread_source * +simple_new (FILE *source, void *handler_arg) +{ + struct randread_source *s = xmalloc (sizeof *s); + s->source = source; + s->handler = randread_error; + s->handler_arg = handler_arg; + return s; +} + +/* Create and initialize a random data source from NAME, or use a + reasonable default source if NAME is null. BYTES_BOUND is an upper + bound on the number of bytes that will be needed. If zero, it is a + hard bound; otherwise it is just an estimate. + + If NAME is not null, NAME is saved for use as the argument of the + default handler. Unless a non-default handler is used, NAME's + lifetime should be at least that of the returned value. + + Return NULL (setting errno) on failure. */ + +struct randread_source * +randread_new (char const *name, size_t bytes_bound) +{ + if (bytes_bound == 0) + return simple_new (NULL, NULL); + else + { + char const *file_name = (name ? name : DEFAULT_RANDOM_FILE); + FILE *source = fopen_safer (file_name, "rb"); + struct randread_source *s; + + if (! source) + { + if (name) + return NULL; + file_name = NULL; + } + + s = simple_new (source, (void *) file_name); + + if (source) + setvbuf (source, s->buf.c, _IOFBF, MIN (sizeof s->buf.c, bytes_bound)); + else + { + s->buf.isaac.buffered = 0; + isaac_seed (&s->buf.isaac.state); + } + + return s; + } +} + + +/* Set S's handler and its argument. HANDLER (HANDLER_ARG) is called + when there is a read error or end of file from the random data + source; errno is nonzero if there was an error. If HANDLER + returns, it should fix the problem before returning. The default + handler assumes that handler_arg is the file name of the source; it + does not return. */ + +void +randread_set_handler (struct randread_source *s, void (*handler) (void *)) +{ + s->handler = handler; +} + +void +randread_set_handler_arg (struct randread_source *s, void *handler_arg) +{ + s->handler_arg = handler_arg; +} + + +/* Place SIZE random bytes into the buffer beginning at P, using + the stream in S. */ + +static void +readsource (struct randread_source *s, unsigned char *p, size_t size) +{ + for (;;) + { + size_t inbytes = fread (p, sizeof *p, size, s->source); + int fread_errno = errno; + p += inbytes; + size -= inbytes; + if (size == 0) + break; + errno = (ferror (s->source) ? fread_errno : 0); + s->handler (s->handler_arg); + } +} + + +/* Place SIZE pseudorandom bytes into the buffer beginning at P, using + the buffered ISAAC generator in ISAAC. */ + +static void +readisaac (struct isaac *isaac, unsigned char *p, size_t size) +{ + size_t inbytes = isaac->buffered; + + for (;;) + { + if (size <= inbytes) + { + memcpy (p, isaac->data.b + ISAAC_BYTES - inbytes, size); + isaac->buffered = inbytes - size; + return; + } + + memcpy (p, isaac->data.b + ISAAC_BYTES - inbytes, inbytes); + p += inbytes; + size -= inbytes; + + /* If P is aligned, write to *P directly to avoid the overhead + of copying from the buffer. */ + if (ALIGNED_POINTER (p, uint32_t)) + { + uint32_t *wp = (uint32_t *) p; + while (ISAAC_BYTES <= size) + { + isaac_refill (&isaac->state, wp); + wp += ISAAC_WORDS; + size -= ISAAC_BYTES; + if (size == 0) + { + isaac->buffered = 0; + return; + } + } + p = (unsigned char *) wp; + } + + isaac_refill (&isaac->state, isaac->data.w); + inbytes = ISAAC_BYTES; + } +} + + +/* Consume random data from *S to generate a random buffer BUF of size + SIZE. */ + +void +randread (struct randread_source *s, void *buf, size_t size) +{ + if (s->source) + readsource (s, buf, size); + else + readisaac (&s->buf.isaac, buf, size); +} + + +/* Clear *S so that it no longer contains undelivered random data, and + deallocate any system resources associated with *S. Return 0 if + successful, a negative number (setting errno) if not (this is rare, + but can occur in theory if there is an input error). */ + +int +randread_free (struct randread_source *s) +{ + FILE *source = s->source; + memset (s, 0, sizeof *s); + free (s); + return (source ? fclose (source) : 0); +} diff --git a/lib/randread.h b/lib/randread.h new file mode 100644 index 000000000..b256386d2 --- /dev/null +++ b/lib/randread.h @@ -0,0 +1,34 @@ +/* Generate buffers of random data. + + Copyright (C) 2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* Written by Paul Eggert. */ + +#ifndef RANDREAD_H +# define RANDREAD_H 1 + +# include + +struct randread_source; + +struct randread_source *randread_new (char const *, size_t); +void randread (struct randread_source *, void *, size_t); +void randread_set_handler (struct randread_source *, void (*) (void *)); +void randread_set_handler_arg (struct randread_source *, void *); +int randread_free (struct randread_source *); + +#endif diff --git a/lib/xmemxfrm.c b/lib/xmemxfrm.c new file mode 100644 index 000000000..6cc726a73 --- /dev/null +++ b/lib/xmemxfrm.c @@ -0,0 +1,65 @@ +/* Locale-specific memory transformation + + Copyright (C) 2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Written by Paul Eggert . */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "xmemxfrm.h" + +#include +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +#include "error.h" +#include "exitfail.h" +#include "memxfrm.h" +#include "quotearg.h" + +/* Store into DEST (of size DESTSIZE) the text in SRC (of size + SRCSIZE) transformed so that the result of memcmp on two + transformed texts (with ties going to the longer text) is the same + as the result of memcoll on the two texts before their + transformation. Perhaps temporarily modify the byte after SRC, but + restore its original contents before returning. + + Return the size of the resulting text. DEST contains an + indeterminate value if the resulting size is greater than DESTSIZE. + Report an error and exit if there is an error. */ + +size_t +xmemxfrm (char *restrict dest, size_t destsize, + char *restrict src, size_t srcsize) +{ + size_t translated_size = memxfrm (dest, destsize, src, srcsize); + + if (errno) + { + error (0, errno, _("string transformation failed")); + error (0, 0, _("Set LC_ALL='C' to work around the problem.")); + error (exit_failure, 0, + _("The untransformed string was %s."), + quotearg_n_style_mem (0, locale_quoting_style, src, srcsize)); + } + + return translated_size; +} diff --git a/lib/xmemxfrm.h b/lib/xmemxfrm.h new file mode 100644 index 000000000..7c4b8ad7c --- /dev/null +++ b/lib/xmemxfrm.h @@ -0,0 +1,2 @@ +#include +size_t xmemxfrm (char *restrict, size_t, char *restrict, size_t); diff --git a/m4/memxfrm.m4 b/m4/memxfrm.m4 new file mode 100644 index 000000000..ca1c6a93e --- /dev/null +++ b/m4/memxfrm.m4 @@ -0,0 +1,15 @@ +dnl Copyright (C) 2006 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_MEMXFRM], +[ + AC_LIBSOURCES([memxfrm.c, memxfrm.h]) + AC_LIBOBJ([memxfrm]) + + AC_REQUIRE([AC_C_RESTRICT]) + + dnl Prerequisites of lib/memcoll.c. + AC_CHECK_FUNCS_ONCE([strxfrm]) +]) diff --git a/m4/randint.m4 b/m4/randint.m4 new file mode 100644 index 000000000..50209ed2a --- /dev/null +++ b/m4/randint.m4 @@ -0,0 +1,12 @@ +dnl Copyright (C) 2006 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_RANDINT], +[ + AC_LIBSOURCES([randint.c, randint.h]) + AC_LIBOBJ([randint]) + + AC_REQUIRE([AC_C_INLINE]) +]) diff --git a/m4/randperm.m4 b/m4/randperm.m4 new file mode 100644 index 000000000..de2d691db --- /dev/null +++ b/m4/randperm.m4 @@ -0,0 +1,10 @@ +dnl Copyright (C) 2006 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_RANDPERM], +[ + AC_LIBSOURCES([randperm.c, randperm.h]) + AC_LIBOBJ([randperm]) +]) diff --git a/m4/randread.m4 b/m4/randread.m4 new file mode 100644 index 000000000..c30ddd3f2 --- /dev/null +++ b/m4/randread.m4 @@ -0,0 +1,11 @@ +dnl Copyright (C) 2006 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_RANDREAD], +[ + AC_LIBSOURCES([randread.c, randread.h, rand-isaac.c, rand-isaac.h]) + AC_LIBOBJ([randread]) + AC_LIBOBJ([rand-isaac]) +]) diff --git a/man/shuf.x b/man/shuf.x new file mode 100644 index 000000000..bda519fa9 --- /dev/null +++ b/man/shuf.x @@ -0,0 +1,4 @@ +[NAME] +shuf \- generate random permutations +[DESCRIPTION] +.\" Add any additional description here diff --git a/src/shuf.c b/src/shuf.c new file mode 100644 index 000000000..ac1d469c9 --- /dev/null +++ b/src/shuf.c @@ -0,0 +1,401 @@ +/* Shuffle lines of text. + + Copyright (C) 2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Written by Paul Eggert. */ + +#include + +#include +#include +#include "system.h" + +#include "error.h" +#include "getopt.h" +#include "quote.h" +#include "quotearg.h" +#include "randint.h" +#include "randperm.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no `g' prefix). */ +#define PROGRAM_NAME "shuf" + +#define AUTHORS "Paul Eggert" + +/* The name this program was run with. */ +char *program_name; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]\n\ + or: %s -e [OPTION]... [ARG]...\n\ + or: %s -i LO-HI [OPTION]...\n\ +"), + program_name, program_name, program_name); + fputs (_("\ +Write a random permutation of the input lines to standard output.\n\ +\n\ +"), stdout); + fputs (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +"), stdout); + fputs (_("\ + -e, --echo treat each ARG as an input line\n\ + -i, --input-range=LO-HI treat each number LO through HI as an input line\n\ + -n, --head-lines=LINES output at most LINES lines\n\ + -o, --output=FILE write result to FILE instead of standard output\n\ + --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\ + -z, --zero-terminated end lines with 0 byte, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +With no FILE, or when FILE is -, read standard input.\n\ +"), stdout); + printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); + } + + exit (status); +} + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + RANDOM_SOURCE_OPTION = CHAR_MAX + 1 +}; + +static struct option const long_opts[] = +{ + {"echo", no_argument, NULL, 'e'}, + {"input-range", required_argument, NULL, 'i'}, + {"head-count", required_argument, NULL, 'n'}, + {"output", required_argument, NULL, 'o'}, + {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION}, + {"zero-terminated", no_argument, NULL, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {0, 0, 0, 0}, +}; + +static bool +input_numbers_option_used (size_t lo_input, size_t hi_input) +{ + return ! (lo_input == SIZE_MAX && hi_input == 0); +} + +static void +input_from_argv (char **operand, int n_operands, char eolbyte) +{ + char *p; + size_t size = n_operands; + int i; + + for (i = 0; i < n_operands; i++) + size += strlen (operand[i]); + p = xmalloc (size); + + for (i = 0; i < n_operands; i++) + { + char *p1 = stpcpy (p, operand[i]); + operand[i] = p; + p = p1; + *p++ = eolbyte; + } + + operand[n_operands] = p; +} + +/* Read data from file IN. Input lines are delimited by EOLBYTE; + silently append a trailing EOLBYTE if the file ends in some other + byte. Store a pointer to the resulting array of lines into *PLINE. + Return the number of lines read. Report an error and exit on + failure. */ + +static size_t +read_input (FILE *in, char eolbyte, char ***pline) +{ + char *p; + char *buf = NULL; + char *lim; + size_t alloc = 0; + size_t used = 0; + size_t next_alloc = (1 << 13) + 1; + size_t bytes_to_read; + size_t nread; + char **line; + size_t i; + size_t n_lines; + int fread_errno; + struct stat instat; + + if (fstat (fileno (in), &instat) == 0 && S_ISREG (instat.st_mode)) + { + off_t file_size = instat.st_size; + off_t current_offset = ftello (in); + if (0 <= current_offset) + { + off_t remaining_size = + (current_offset < file_size ? file_size - current_offset : 0); + if (SIZE_MAX - 2 < remaining_size) + xalloc_die (); + next_alloc = remaining_size + 2; + } + } + + do + { + if (alloc == used) + { + if (alloc == SIZE_MAX) + xalloc_die (); + alloc = next_alloc; + next_alloc = alloc * 2; + if (next_alloc < alloc) + next_alloc = SIZE_MAX; + buf = xrealloc (buf, alloc); + } + + bytes_to_read = alloc - used - 1; + nread = fread (buf + used, sizeof (char), bytes_to_read, in); + used += nread; + } + while (nread == bytes_to_read); + + fread_errno = errno; + + if (used && buf[used - 1] != eolbyte) + buf[used++] = eolbyte; + + lim = buf + used; + + n_lines = 0; + for (p = buf; p < lim; p = memchr (p, eolbyte, lim - p) + 1) + n_lines++; + + *pline = line = xnmalloc (n_lines + 1, sizeof *line); + + line[0] = p = buf; + for (i = 1; i <= n_lines; i++) + line[i] = p = memchr (p, eolbyte, lim - p) + 1; + + errno = fread_errno; + return n_lines; +} + +static int +write_permuted_output (size_t n_lines, char * const *line, size_t lo_input, + size_t const *permutation) +{ + size_t i; + + if (line) + for (i = 0; i < n_lines; i++) + { + char * const *p = line + permutation[i]; + size_t len = p[1] - p[0]; + if (fwrite (p[0], sizeof *p[0], len, stdout) != len) + return -1; + } + else + for (i = 0; i < n_lines; i++) + { + unsigned long int n = lo_input + permutation[i]; + if (printf ("%lu\n", n) < 0) + return -1; + } + + return 0; +} + +int +main (int argc, char **argv) +{ + bool echo = false; + size_t lo_input = SIZE_MAX; + size_t hi_input = 0; + size_t head_lines = SIZE_MAX; + char const *outfile = NULL; + char *random_source = NULL; + char eolbyte = '\n'; + + int optc; + int n_operands; + char **operand; + size_t n_lines; + char **line; + struct randint_source *randint_source; + size_t const *permutation; + + initialize_main (&argc, &argv); + program_name = argv[0]; + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "ei:n:o:z", long_opts, NULL)) != -1) + switch (optc) + { + case 'e': + echo = true; + break; + + case 'i': + { + unsigned long int argval = 0; + char *p = strchr (optarg, '-'); + bool invalid = !p; + + if (input_numbers_option_used (lo_input, hi_input)) + error (EXIT_FAILURE, 0, _("multiple -i options specified")); + + if (p) + { + *p = '\0'; + invalid = ((xstrtoul (optarg, NULL, 10, &argval, NULL) + != LONGINT_OK) + || SIZE_MAX < argval); + *p = '-'; + lo_input = argval; + optarg = p + 1; + } + + invalid |= ((xstrtoul (optarg, NULL, 10, &argval, NULL) + != LONGINT_OK) + || SIZE_MAX < argval); + hi_input = argval; + n_lines = hi_input - lo_input + 1; + invalid |= ((lo_input <= hi_input) == (n_lines == 0)); + if (invalid) + error (EXIT_FAILURE, 0, _("invalid input range %s"), + quote (optarg)); + } + break; + + case 'n': + { + unsigned long int argval; + strtol_error e = xstrtoul (optarg, NULL, 10, &argval, NULL); + + if (e == LONGINT_OK) + head_lines = MIN (head_lines, argval); + else if (e != LONGINT_OVERFLOW) + error (EXIT_FAILURE, 0, _("invalid line count %s"), + quote (optarg)); + } + break; + + case 'o': + if (outfile && !STREQ (outfile, optarg)) + error (EXIT_FAILURE, 0, _("multiple output files specified")); + outfile = optarg; + break; + + case RANDOM_SOURCE_OPTION: + if (random_source && !STREQ (random_source, optarg)) + error (EXIT_FAILURE, 0, _("multiple random sources specified")); + random_source = optarg; + break; + + case 'z': + eolbyte = '\0'; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + + n_operands = argc - optind; + operand = argv + optind; + + if (echo) + { + if (input_numbers_option_used (lo_input, hi_input)) + error (EXIT_FAILURE, 0, _("cannot combine -e and -i options")); + input_from_argv (operand, n_operands, eolbyte); + n_lines = n_operands; + line = operand; + } + else if (input_numbers_option_used (lo_input, hi_input)) + { + if (n_operands) + { + error (0, 0, _("extra operand %s\n"), quote (operand[0])); + usage (EXIT_FAILURE); + } + n_lines = hi_input - lo_input + 1; + line = NULL; + } + else + { + char **input_lines; + + switch (n_operands) + { + case 0: + break; + + case 1: + if (! (STREQ (operand[0], "-") || freopen (operand[0], "r", stdin))) + error (EXIT_FAILURE, errno, "%s", operand[0]); + break; + + default: + error (0, 0, _("extra operand %s"), quote (operand[1])); + usage (EXIT_FAILURE); + } + + n_lines = read_input (stdin, eolbyte, &input_lines); + line = input_lines; + } + + head_lines = MIN (head_lines, n_lines); + + randint_source = randint_all_new (random_source, + randperm_bound (head_lines, n_lines)); + if (! randint_source) + error (EXIT_FAILURE, errno, "%s", quotearg_colon (random_source)); + + /* Close stdin now, rather than earlier, so that randint_all_new + doesn't have to worry about opening something other than + stdin. */ + if (! (echo || input_numbers_option_used (lo_input, hi_input)) + && (ferror (stdin) || fclose (stdin) != 0)) + error (EXIT_FAILURE, errno, _("read error")); + + permutation = randperm_new (randint_source, head_lines, n_lines); + + if (outfile && ! freopen (outfile, "w", stdout)) + error (EXIT_FAILURE, errno, "%s", quotearg_colon (outfile)); + if (write_permuted_output (head_lines, line, lo_input, permutation) != 0) + error (EXIT_FAILURE, errno, _("write error")); + + return EXIT_SUCCESS; +} diff --git a/tests/misc/shuf b/tests/misc/shuf new file mode 100755 index 000000000..17dea5045 --- /dev/null +++ b/tests/misc/shuf @@ -0,0 +1,37 @@ +#!/bin/sh +# Ensure that shuf randomizes its input. + +if test "$VERBOSE" = yes; then + set -x + shuf --version +fi + +pwd=`pwd` +t0=`echo "$0"|sed 's,.*/,,'`.tmp; tmp=$t0/$$ +trap 'status=$?; cd $pwd; chmod -R u+rwx $t0; rm -rf $t0 && exit $status' 0 +trap '(exit $?); exit $?' 1 2 13 15 + +framework_failure=0 +mkdir -p $tmp || framework_failure=1 +cd $tmp || framework_failure=1 +seq 100 > in || framework_failure=1 + +if test $framework_failure = 1; then + echo "$0: failure in testing framework" 1>&2 + (exit 1); exit 1 +fi + +fail=0 + +shuf in >out || fail=1 + +# Fail if the input is the same as the output. +# This is a probabilistic test :-) +# However, the odds of failure are very low: 1 in 100! (~ 1 in 10^158) +cmp in out > /dev/null && { fail=1; echo "not random?" 1>&2; } + +# Fail if the sorted output is not the same as the input. +sort -n out > out1 +cmp in out1 || { fail=1; echo "not a permutation" 1>&2; } + +(exit $fail); exit $fail -- cgit v1.2.3-54-g00ecf