summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2013-12-06 09:55:17 -0800
committerPaul Eggert <eggert@cs.ucla.edu>2013-12-06 09:55:55 -0800
commite1b753b3e3a3506746f5615cc1f6b5b22f564ccf (patch)
treef4e31a0a98f8e9dcd1b1b3b2f45f5a78e5b02af0
parent34582a1aedae0d7002b76a64a14dc3143198ced0 (diff)
downloadcoreutils-e1b753b3e3a3506746f5615cc1f6b5b22f564ccf.tar.xz
shuf: --repeat, not --repetitions; default --head-count is infinity
Original problem reported by Philipp Thomas in <http://bugs.gnu.org/16061>. * NEWS: shuf --repeat, not shuf --repetitions. * doc/coreutils.texi (shuf invocation): * src/shuf.c (usage, long_opts, main): * tests/misc/shuf.sh: Likewise. Also, the default head-count is infinity.
-rw-r--r--NEWS4
-rw-r--r--doc/coreutils.texi37
-rw-r--r--src/shuf.c67
-rwxr-xr-xtests/misc/shuf.sh50
4 files changed, 70 insertions, 88 deletions
diff --git a/NEWS b/NEWS
index 529c54b7d..87fd27dae 100644
--- a/NEWS
+++ b/NEWS
@@ -92,8 +92,8 @@ GNU coreutils NEWS -*- outline -*-
shred accepts new parameters to the --remove option to give greater
control over that operation, which can greatly reduce sync overhead.
- shuf accepts a new option: --repetitions (-r), to allow repetitions
- of input items in the permuted output.
+ shuf accepts a new option: --repeat (-r), which can repeat items in
+ the output.
** Changes in behavior
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 74d502552..ed311a14a 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -4948,14 +4948,16 @@ Use @var{file} as a source of random data used to determine which
permutation to generate. @xref{Random sources}.
@item -r
-@itemx --repetitions
+@itemx --repeat
@opindex -r
-@opindex --repetitions
-@cindex allowing repetitions in output values
-Changes the default behaviour of @command{shuf}, allowing repetition in
-output values (in which case, @option{--head-count} can be larger
-than the number of input values). If @option{--head-count} is not
-specified, output a single random value.
+@opindex --repeat
+@cindex repeat output values
+Repeat output values, that is, select with replacement. With this
+option the output is not a permutation of the input; instead, each
+output line is randomly chosen from all the inputs. This option is
+typically combined with @option{--head-count}; if
+@option{--head-count} is not given, @command{shuf} repeats
+indefinitely.
@zeroTerminatedOption
@@ -5010,37 +5012,24 @@ and the command @samp{shuf -i 1-4} might output:
@end example
@noindent
-These examples all have four input lines, so @command{shuf} might
+The above examples all have four input lines, so @command{shuf} might
produce any of the twenty-four possible permutations of the input. In
general, if there are @var{n} input lines, there are @var{n}! (i.e.,
@var{n} factorial, or @var{n} * (@var{n} - 1) * @dots{} * 1) possible
output permutations.
@noindent
-To output 50 random numbers between 0 and 9, use:
+To output 50 random numbers each in the range 0 through 9, use:
@example
-shuf --repetitions --input-range 0-9 --head-count 50
-@end example
-
-@noindent
-or (using short options):
-
-@example
-shuf -r -i0-9 -n50
+shuf -r -n 50 -i 0-9
@end example
@noindent
To simulate 100 coin flips, use:
@example
-shuf -r -n100 -e Head Tail
-@end example
-
-@noindent
-or
-@example
-printf '%s\n' Head Tail | shuf -r -n100
+shuf -r -n 100 -e Head Tail
@end example
@exitstatus
diff --git a/src/shuf.c b/src/shuf.c
index f7fc9369c..456140f26 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -76,8 +76,7 @@ Write a random permutation of the input lines to standard output.\n\
-n, --head-count=COUNT output at most COUNT lines\n\
-o, --output=FILE write result to FILE instead of standard output\n\
--random-source=FILE get random bytes from FILE\n\
- -r, --repetitions output COUNT items, allowing repetition.\n\
- -n 1 is implied if not specified.\n\
+ -r, --repeat output lines can be repeated\n\
-z, --zero-terminated end lines with 0 byte, not newline\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -106,7 +105,7 @@ static struct option const long_opts[] =
{"head-count", required_argument, NULL, 'n'},
{"output", required_argument, NULL, 'o'},
{"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
- {"repetitions", no_argument, NULL, 'r'},
+ {"repeat", no_argument, NULL, 'r'},
{"zero-terminated", no_argument, NULL, 'z'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
@@ -300,10 +299,10 @@ read_input (FILE *in, char eolbyte, char ***pline)
return n_lines;
}
-/* output 'n_lines' to stdout from 'line' array,
- chosen by the indices in 'permutation'.
- 'permutation' and 'line' must have at least 'n_lines' elements.
- strings in 'line' must include the line-terminator character. */
+/* Output N_LINES lines to stdout from LINE array,
+ chosen by the indices in PERMUTATION.
+ PERMUTATION and LINE must have at least N_LINES elements.
+ Strings in LINE must include the line-terminator character. */
static int
write_permuted_lines (size_t n_lines, char *const *line,
size_t const *permutation)
@@ -321,8 +320,8 @@ write_permuted_lines (size_t n_lines, char *const *line,
return 0;
}
-/* output 'n_lines' of numbers to stdout, from 'permutation' array.
- 'permutation' must have at least 'n_lines' elements. */
+/* Output N_LINES of numbers to stdout, from PERMUTATION array.
+ PERMUTATION must have at least N_LINES elements. */
static int
write_permuted_numbers (size_t n_lines, size_t lo_input,
size_t const *permutation, char eolbyte)
@@ -339,8 +338,8 @@ write_permuted_numbers (size_t n_lines, size_t lo_input,
return 0;
}
-/* output 'count' numbers to stdout, chosen randomly from range
- lo_input to hi_input. */
+/* Output COUNT numbers to stdout, chosen randomly from range
+ LO_INPUT through HI_INPUT. */
static int
write_random_numbers (struct randint_source *s, size_t count,
size_t lo_input, size_t hi_input, char eolbyte)
@@ -358,9 +357,9 @@ write_random_numbers (struct randint_source *s, size_t count,
return 0;
}
-/* output 'count' lines to stdout from 'lines' array.
- 'lines' must have at least 'n_lines' element in it.
- strings in 'line' must include the line-terminator character. */
+/* Output COUNT lines to stdout from LINES array.
+ LINES must have at least N_LINES elements in it.
+ Strings in LINES_ must include the line-terminator character. */
static int
write_random_lines (struct randint_source *s, size_t count,
char *const *lines, size_t n_lines)
@@ -392,7 +391,7 @@ main (int argc, char **argv)
char eolbyte = '\n';
char **input_lines = NULL;
bool use_reservoir_sampling = false;
- bool repetition = false;
+ bool repeat = false;
int optc;
int n_operands;
@@ -479,7 +478,7 @@ main (int argc, char **argv)
break;
case 'r':
- repetition = true;
+ repeat = true;
break;
case 'z':
@@ -495,20 +494,19 @@ main (int argc, char **argv)
n_operands = argc - optind;
operand = argv + optind;
- /* Check invalid usage */
+ /* Check invalid usage. */
if (echo && input_range)
{
error (0, 0, _("cannot combine -e and -i options"));
usage (EXIT_FAILURE);
}
- if ((n_operands>0 && input_range)
- || (!echo && !input_range && n_operands>=2))
+ if (input_range ? 0 < n_operands : !echo && 1 < n_operands)
{
error (0, 0, _("extra operand %s"), quote (operand[1]));
usage (EXIT_FAILURE);
}
- /* Prepare input */
+ /* Prepare input. */
if (echo)
{
input_from_argv (operand, n_operands, eolbyte);
@@ -522,15 +520,15 @@ main (int argc, char **argv)
}
else
{
- /* Input file specified, re-open it as STDIN */
- if (n_operands==1)
- if (! (STREQ (operand[0], "-") || ! head_lines
- || freopen (operand[0], "r", stdin)))
- error (EXIT_FAILURE, errno, "%s", operand[0]);
+ /* If an input file is specified, re-open it as stdin. */
+ if (n_operands == 1)
+ if (! (STREQ (operand[0], "-") || ! head_lines
+ || freopen (operand[0], "r", stdin)))
+ error (EXIT_FAILURE, errno, "%s", operand[0]);
fadvise (stdin, FADVISE_SEQUENTIAL);
- if (! repetition && head_lines != SIZE_MAX
+ if (! repeat && head_lines != SIZE_MAX
&& (! head_lines || input_size () > RESERVOIR_MIN_INPUT))
{
use_reservoir_sampling = true;
@@ -543,18 +541,13 @@ main (int argc, char **argv)
}
}
- /* When generating random numbers with repetitions,
- the default count is one, unless specified by the user. */
- if (repetition && head_lines == SIZE_MAX)
- head_lines = 1 ;
-
- if (! repetition)
+ if (! repeat)
head_lines = MIN (head_lines, n_lines);
randint_source = randint_all_new (random_source,
- (use_reservoir_sampling || repetition)?
- SIZE_MAX:
- randperm_bound (head_lines, n_lines));
+ (use_reservoir_sampling || repeat
+ ? SIZE_MAX
+ : randperm_bound (head_lines, n_lines)));
if (! randint_source)
error (EXIT_FAILURE, errno, "%s", quotearg_colon (random_source));
@@ -574,14 +567,14 @@ main (int argc, char **argv)
&& (fclose (stdin) != 0))
error (EXIT_FAILURE, errno, _("read error"));
- if (!repetition)
+ if (!repeat)
permutation = randperm_new (randint_source, head_lines, n_lines);
if (outfile && ! freopen (outfile, "w", stdout))
error (EXIT_FAILURE, errno, "%s", quotearg_colon (outfile));
/* Generate output according to requested method */
- if (repetition)
+ if (repeat)
{
if (input_range)
i = write_random_numbers (randint_source, head_lines,
diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh
index a25a6f8ad..28c6483ca 100755
--- a/tests/misc/shuf.sh
+++ b/tests/misc/shuf.sh
@@ -94,66 +94,66 @@ shuf -i0-9 -o A -o B &&
shuf -i0-9 --random-source A --random-source B &&
{ fail=1; echo "shuf did not detect multiple --random-source usage.">&2 ; }
-# Test --repetition option
+# Test --repeat option
-# --repetition without count should return one line
-shuf --rep -i0-10 > exp || framework_failure_
+# --repeat without count should return an indefinite number of lines
+shuf --rep -i 0-10 | head -n 1000 > exp || framework_failure_
c=$(wc -l < exp) || framework_failure_
-test "$c" -eq 1 || { fail=1; echo "--repetition default count is not 1">&2 ; }
+test "$c" -eq 1000 || { fail=1; echo "--repeat does not repeat indefinitely">&2 ; }
-# --repetition can output more values than the input range
+# --repeat can output more values than the input range
shuf --rep -i0-9 -n1000 > exp || framework_failure_
c=$(wc -l < exp) || framework_failure_
-test "$c" -eq 1000 || { fail=1; echo "--repetition with --count failed">&2 ; }
+test "$c" -eq 1000 || { fail=1; echo "--repeat with --count failed">&2 ; }
# Check output values (this is not bullet-proof, but drawing 1000 values
# between 0 and 9 should produce all values, unless there's a bug in shuf
# or a very poor random source, or extremely bad luck)
c=$(sort -nu exp | paste -s -d ' ') || framework_failure_
test "$c" = "0 1 2 3 4 5 6 7 8 9" ||
- { fail=1; echo "--repetition produced bad output">&2 ; }
+ { fail=1; echo "--repeat produced bad output">&2 ; }
-# check --repetition with non-zero low value
+# check --repeat with non-zero low value
shuf --rep -i222-233 -n2000 > exp || framework_failure_
c=$(cat exp | sort -nu | paste -s -d ' ') || framework_failure_
test "$c" = "222 223 224 225 226 227 228 229 230 231 232 233" ||
- { fail=1; echo "--repetition produced bad output with non-zero low">&2 ; }
+ { fail=1; echo "--repeat produced bad output with non-zero low">&2 ; }
-# --repetition,-i,count=0 should not fail and produce no output
+# --repeat,-i,count=0 should not fail and produce no output
shuf --rep -i0-9 -n0 > exp || framework_failure_
# file size should be zero (no output from shuf)
test \! -s exp ||
- { fail=1; echo "--repetition,-i0-9,-n0 produced bad output">&2 ; }
+ { fail=1; echo "--repeat,-i0-9,-n0 produced bad output">&2 ; }
-# --repetition with -e, without count, should return one line
-shuf --rep -e A B C D > exp || framework_failure_
-c=$(cat exp | wc -l) || framework_failure_
-test "$c" -eq 1 ||
- { fail=1; echo "--repetition,-e default count is not 1">&2 ; }
+# --repeat with -e, without count, should repeat indefinitely
+shuf --rep -e A B C D | head -n 1000 > exp || framework_failure_
+c=$(wc -l < exp) || framework_failure_
+test "$c" -eq 1000 ||
+ { fail=1; echo "--repeat,-e does not repeat indefinitely">&2 ; }
-# --repetition with STDIN, without count, should return one line
-printf "A\nB\nC\nD\nE\n" | shuf --rep > exp || framework_failure_
+# --repeat with STDIN, without count, should repeat indefinitely
+printf "A\nB\nC\nD\nE\n" | shuf --rep | head -n 1000 > exp || framework_failure_
c=$(wc -l < exp) || framework_failure_
-test "$c" -eq 1 ||
- { fail=1; echo "--repetition,STDIN default count is not 1">&2 ; }
+test "$c" -eq 1000 ||
+ { fail=1; echo "--repeat,STDIN does not repeat indefinitely">&2 ; }
-# --repetition with STDIN,count - can return move values than input lines
+# --repeat with STDIN,count - can return move values than input lines
printf "A\nB\nC\nD\nE\n" | shuf --rep -n2000 > exp || framework_failure_
c=$(wc -l < exp) || framework_failure_
test "$c" -eq 2000 ||
- { fail=1; echo "--repetition,STDIN,count failed">&2 ; }
+ { fail=1; echo "--repeat,STDIN,count failed">&2 ; }
# Check output values (this is not bullet-proof, but drawing 2000 values
# between A and E should produce all values, unless there's a bug in shuf
# or a very poor random source, or extremely bad luck)
c=$(sort -u exp | paste -s -d ' ') || framework_failure_
test "$c" = "A B C D E" ||
- { fail=1; echo "--repetition,STDIN,count produced bad output">&2 ; }
+ { fail=1; echo "--repeat,STDIN,count produced bad output">&2 ; }
-# --repetition,stdin,count=0 should not fail and produce no output
+# --repeat,stdin,count=0 should not fail and produce no output
printf "A\nB\nC\nD\nE\n" | shuf --rep -n0 > exp || framework_failure_
# file size should be zero (no output from shuf)
test \! -s exp ||
- { fail=1; echo "--repetition,STDIN,-n0 produced bad output">&2 ; }
+ { fail=1; echo "--repeat,STDIN,-n0 produced bad output">&2 ; }
Exit $fail