diff options
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | doc/coreutils.texi | 37 | ||||
-rw-r--r-- | src/shuf.c | 67 | ||||
-rwxr-xr-x | tests/misc/shuf.sh | 50 |
4 files changed, 70 insertions, 88 deletions
@@ -92,8 +92,8 @@ GNU coreutils NEWS -*- outline -*- shred accepts new parameters to the --remove option to give greater control over that operation, which can greatly reduce sync overhead. - shuf accepts a new option: --repetitions (-r), to allow repetitions - of input items in the permuted output. + shuf accepts a new option: --repeat (-r), which can repeat items in + the output. ** Changes in behavior diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 74d502552..ed311a14a 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -4948,14 +4948,16 @@ Use @var{file} as a source of random data used to determine which permutation to generate. @xref{Random sources}. @item -r -@itemx --repetitions +@itemx --repeat @opindex -r -@opindex --repetitions -@cindex allowing repetitions in output values -Changes the default behaviour of @command{shuf}, allowing repetition in -output values (in which case, @option{--head-count} can be larger -than the number of input values). If @option{--head-count} is not -specified, output a single random value. +@opindex --repeat +@cindex repeat output values +Repeat output values, that is, select with replacement. With this +option the output is not a permutation of the input; instead, each +output line is randomly chosen from all the inputs. This option is +typically combined with @option{--head-count}; if +@option{--head-count} is not given, @command{shuf} repeats +indefinitely. @zeroTerminatedOption @@ -5010,37 +5012,24 @@ and the command @samp{shuf -i 1-4} might output: @end example @noindent -These examples all have four input lines, so @command{shuf} might +The above examples all have four input lines, so @command{shuf} might produce any of the twenty-four possible permutations of the input. In general, if there are @var{n} input lines, there are @var{n}! (i.e., @var{n} factorial, or @var{n} * (@var{n} - 1) * @dots{} * 1) possible output permutations. @noindent -To output 50 random numbers between 0 and 9, use: +To output 50 random numbers each in the range 0 through 9, use: @example -shuf --repetitions --input-range 0-9 --head-count 50 -@end example - -@noindent -or (using short options): - -@example -shuf -r -i0-9 -n50 +shuf -r -n 50 -i 0-9 @end example @noindent To simulate 100 coin flips, use: @example -shuf -r -n100 -e Head Tail -@end example - -@noindent -or -@example -printf '%s\n' Head Tail | shuf -r -n100 +shuf -r -n 100 -e Head Tail @end example @exitstatus diff --git a/src/shuf.c b/src/shuf.c index f7fc9369c..456140f26 100644 --- a/src/shuf.c +++ b/src/shuf.c @@ -76,8 +76,7 @@ Write a random permutation of the input lines to standard output.\n\ -n, --head-count=COUNT output at most COUNT lines\n\ -o, --output=FILE write result to FILE instead of standard output\n\ --random-source=FILE get random bytes from FILE\n\ - -r, --repetitions output COUNT items, allowing repetition.\n\ - -n 1 is implied if not specified.\n\ + -r, --repeat output lines can be repeated\n\ -z, --zero-terminated end lines with 0 byte, not newline\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); @@ -106,7 +105,7 @@ static struct option const long_opts[] = {"head-count", required_argument, NULL, 'n'}, {"output", required_argument, NULL, 'o'}, {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION}, - {"repetitions", no_argument, NULL, 'r'}, + {"repeat", no_argument, NULL, 'r'}, {"zero-terminated", no_argument, NULL, 'z'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, @@ -300,10 +299,10 @@ read_input (FILE *in, char eolbyte, char ***pline) return n_lines; } -/* output 'n_lines' to stdout from 'line' array, - chosen by the indices in 'permutation'. - 'permutation' and 'line' must have at least 'n_lines' elements. - strings in 'line' must include the line-terminator character. */ +/* Output N_LINES lines to stdout from LINE array, + chosen by the indices in PERMUTATION. + PERMUTATION and LINE must have at least N_LINES elements. + Strings in LINE must include the line-terminator character. */ static int write_permuted_lines (size_t n_lines, char *const *line, size_t const *permutation) @@ -321,8 +320,8 @@ write_permuted_lines (size_t n_lines, char *const *line, return 0; } -/* output 'n_lines' of numbers to stdout, from 'permutation' array. - 'permutation' must have at least 'n_lines' elements. */ +/* Output N_LINES of numbers to stdout, from PERMUTATION array. + PERMUTATION must have at least N_LINES elements. */ static int write_permuted_numbers (size_t n_lines, size_t lo_input, size_t const *permutation, char eolbyte) @@ -339,8 +338,8 @@ write_permuted_numbers (size_t n_lines, size_t lo_input, return 0; } -/* output 'count' numbers to stdout, chosen randomly from range - lo_input to hi_input. */ +/* Output COUNT numbers to stdout, chosen randomly from range + LO_INPUT through HI_INPUT. */ static int write_random_numbers (struct randint_source *s, size_t count, size_t lo_input, size_t hi_input, char eolbyte) @@ -358,9 +357,9 @@ write_random_numbers (struct randint_source *s, size_t count, return 0; } -/* output 'count' lines to stdout from 'lines' array. - 'lines' must have at least 'n_lines' element in it. - strings in 'line' must include the line-terminator character. */ +/* Output COUNT lines to stdout from LINES array. + LINES must have at least N_LINES elements in it. + Strings in LINES_ must include the line-terminator character. */ static int write_random_lines (struct randint_source *s, size_t count, char *const *lines, size_t n_lines) @@ -392,7 +391,7 @@ main (int argc, char **argv) char eolbyte = '\n'; char **input_lines = NULL; bool use_reservoir_sampling = false; - bool repetition = false; + bool repeat = false; int optc; int n_operands; @@ -479,7 +478,7 @@ main (int argc, char **argv) break; case 'r': - repetition = true; + repeat = true; break; case 'z': @@ -495,20 +494,19 @@ main (int argc, char **argv) n_operands = argc - optind; operand = argv + optind; - /* Check invalid usage */ + /* Check invalid usage. */ if (echo && input_range) { error (0, 0, _("cannot combine -e and -i options")); usage (EXIT_FAILURE); } - if ((n_operands>0 && input_range) - || (!echo && !input_range && n_operands>=2)) + if (input_range ? 0 < n_operands : !echo && 1 < n_operands) { error (0, 0, _("extra operand %s"), quote (operand[1])); usage (EXIT_FAILURE); } - /* Prepare input */ + /* Prepare input. */ if (echo) { input_from_argv (operand, n_operands, eolbyte); @@ -522,15 +520,15 @@ main (int argc, char **argv) } else { - /* Input file specified, re-open it as STDIN */ - if (n_operands==1) - if (! (STREQ (operand[0], "-") || ! head_lines - || freopen (operand[0], "r", stdin))) - error (EXIT_FAILURE, errno, "%s", operand[0]); + /* If an input file is specified, re-open it as stdin. */ + if (n_operands == 1) + if (! (STREQ (operand[0], "-") || ! head_lines + || freopen (operand[0], "r", stdin))) + error (EXIT_FAILURE, errno, "%s", operand[0]); fadvise (stdin, FADVISE_SEQUENTIAL); - if (! repetition && head_lines != SIZE_MAX + if (! repeat && head_lines != SIZE_MAX && (! head_lines || input_size () > RESERVOIR_MIN_INPUT)) { use_reservoir_sampling = true; @@ -543,18 +541,13 @@ main (int argc, char **argv) } } - /* When generating random numbers with repetitions, - the default count is one, unless specified by the user. */ - if (repetition && head_lines == SIZE_MAX) - head_lines = 1 ; - - if (! repetition) + if (! repeat) head_lines = MIN (head_lines, n_lines); randint_source = randint_all_new (random_source, - (use_reservoir_sampling || repetition)? - SIZE_MAX: - randperm_bound (head_lines, n_lines)); + (use_reservoir_sampling || repeat + ? SIZE_MAX + : randperm_bound (head_lines, n_lines))); if (! randint_source) error (EXIT_FAILURE, errno, "%s", quotearg_colon (random_source)); @@ -574,14 +567,14 @@ main (int argc, char **argv) && (fclose (stdin) != 0)) error (EXIT_FAILURE, errno, _("read error")); - if (!repetition) + if (!repeat) permutation = randperm_new (randint_source, head_lines, n_lines); if (outfile && ! freopen (outfile, "w", stdout)) error (EXIT_FAILURE, errno, "%s", quotearg_colon (outfile)); /* Generate output according to requested method */ - if (repetition) + if (repeat) { if (input_range) i = write_random_numbers (randint_source, head_lines, diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh index a25a6f8ad..28c6483ca 100755 --- a/tests/misc/shuf.sh +++ b/tests/misc/shuf.sh @@ -94,66 +94,66 @@ shuf -i0-9 -o A -o B && shuf -i0-9 --random-source A --random-source B && { fail=1; echo "shuf did not detect multiple --random-source usage.">&2 ; } -# Test --repetition option +# Test --repeat option -# --repetition without count should return one line -shuf --rep -i0-10 > exp || framework_failure_ +# --repeat without count should return an indefinite number of lines +shuf --rep -i 0-10 | head -n 1000 > exp || framework_failure_ c=$(wc -l < exp) || framework_failure_ -test "$c" -eq 1 || { fail=1; echo "--repetition default count is not 1">&2 ; } +test "$c" -eq 1000 || { fail=1; echo "--repeat does not repeat indefinitely">&2 ; } -# --repetition can output more values than the input range +# --repeat can output more values than the input range shuf --rep -i0-9 -n1000 > exp || framework_failure_ c=$(wc -l < exp) || framework_failure_ -test "$c" -eq 1000 || { fail=1; echo "--repetition with --count failed">&2 ; } +test "$c" -eq 1000 || { fail=1; echo "--repeat with --count failed">&2 ; } # Check output values (this is not bullet-proof, but drawing 1000 values # between 0 and 9 should produce all values, unless there's a bug in shuf # or a very poor random source, or extremely bad luck) c=$(sort -nu exp | paste -s -d ' ') || framework_failure_ test "$c" = "0 1 2 3 4 5 6 7 8 9" || - { fail=1; echo "--repetition produced bad output">&2 ; } + { fail=1; echo "--repeat produced bad output">&2 ; } -# check --repetition with non-zero low value +# check --repeat with non-zero low value shuf --rep -i222-233 -n2000 > exp || framework_failure_ c=$(cat exp | sort -nu | paste -s -d ' ') || framework_failure_ test "$c" = "222 223 224 225 226 227 228 229 230 231 232 233" || - { fail=1; echo "--repetition produced bad output with non-zero low">&2 ; } + { fail=1; echo "--repeat produced bad output with non-zero low">&2 ; } -# --repetition,-i,count=0 should not fail and produce no output +# --repeat,-i,count=0 should not fail and produce no output shuf --rep -i0-9 -n0 > exp || framework_failure_ # file size should be zero (no output from shuf) test \! -s exp || - { fail=1; echo "--repetition,-i0-9,-n0 produced bad output">&2 ; } + { fail=1; echo "--repeat,-i0-9,-n0 produced bad output">&2 ; } -# --repetition with -e, without count, should return one line -shuf --rep -e A B C D > exp || framework_failure_ -c=$(cat exp | wc -l) || framework_failure_ -test "$c" -eq 1 || - { fail=1; echo "--repetition,-e default count is not 1">&2 ; } +# --repeat with -e, without count, should repeat indefinitely +shuf --rep -e A B C D | head -n 1000 > exp || framework_failure_ +c=$(wc -l < exp) || framework_failure_ +test "$c" -eq 1000 || + { fail=1; echo "--repeat,-e does not repeat indefinitely">&2 ; } -# --repetition with STDIN, without count, should return one line -printf "A\nB\nC\nD\nE\n" | shuf --rep > exp || framework_failure_ +# --repeat with STDIN, without count, should repeat indefinitely +printf "A\nB\nC\nD\nE\n" | shuf --rep | head -n 1000 > exp || framework_failure_ c=$(wc -l < exp) || framework_failure_ -test "$c" -eq 1 || - { fail=1; echo "--repetition,STDIN default count is not 1">&2 ; } +test "$c" -eq 1000 || + { fail=1; echo "--repeat,STDIN does not repeat indefinitely">&2 ; } -# --repetition with STDIN,count - can return move values than input lines +# --repeat with STDIN,count - can return move values than input lines printf "A\nB\nC\nD\nE\n" | shuf --rep -n2000 > exp || framework_failure_ c=$(wc -l < exp) || framework_failure_ test "$c" -eq 2000 || - { fail=1; echo "--repetition,STDIN,count failed">&2 ; } + { fail=1; echo "--repeat,STDIN,count failed">&2 ; } # Check output values (this is not bullet-proof, but drawing 2000 values # between A and E should produce all values, unless there's a bug in shuf # or a very poor random source, or extremely bad luck) c=$(sort -u exp | paste -s -d ' ') || framework_failure_ test "$c" = "A B C D E" || - { fail=1; echo "--repetition,STDIN,count produced bad output">&2 ; } + { fail=1; echo "--repeat,STDIN,count produced bad output">&2 ; } -# --repetition,stdin,count=0 should not fail and produce no output +# --repeat,stdin,count=0 should not fail and produce no output printf "A\nB\nC\nD\nE\n" | shuf --rep -n0 > exp || framework_failure_ # file size should be zero (no output from shuf) test \! -s exp || - { fail=1; echo "--repetition,STDIN,-n0 produced bad output">&2 ; } + { fail=1; echo "--repeat,STDIN,-n0 produced bad output">&2 ; } Exit $fail |