diff options
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | NEWS | 7 | ||||
-rw-r--r-- | doc/coreutils.texi | 31 | ||||
-rw-r--r-- | src/sort.c | 33 | ||||
-rwxr-xr-x | tests/misc/sort-compress | 6 |
5 files changed, 50 insertions, 37 deletions
@@ -1,3 +1,13 @@ +2007-02-19 Paul Eggert <eggert@cs.ucla.edu> + + * NEWS: sort now uses a --compress-program option rather than + an environment variable. + * doc/coreutils.texi (sort invocation): Document this. + * src/sort.c (usage): Likewise. + (COMPRESS_PROGRAM_OPTION): New const. + (long_options, create_temp, main): Support new option. + * tests/misc/sort-compress: Test it. + 2007-02-19 Jim Meyering <jim@meyering.net> * bootstrap: Fix typo s/dowloading/downloading/ in --help output. @@ -1,6 +1,6 @@ GNU coreutils NEWS -*- outline -*- -* Noteworthy changes in release 6.7-dirty (????-??-??) [stable] +* Noteworthy changes in release 6.7-dirty (????-??-??) [not-unstable] ** Bug fixes @@ -45,8 +45,9 @@ GNU coreutils NEWS -*- outline -*- ** New features By default, sort usually compresses each temporary file it writes. - When sorting very large inputs, this can result in sort using far - less temporary disk space and in improved performance. + This can help save both time and disk space when sorting large inputs. + The default compression program is gzip, but this can be overridden + with sort's new --compress-program=PROG option. ** New features diff --git a/doc/coreutils.texi b/doc/coreutils.texi index f738d8399..1a2dba43e 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -3467,20 +3467,6 @@ value as the directory for temporary files instead of @file{/tmp}. The @option{--temporary-directory} (@option{-T}) option in turn overrides the environment variable. -@vindex GNUSORT_COMPRESSOR -To improve performance when sorting very large files, GNU sort will, -by default, try to compress temporary files with the program -@file{gzip}. The environment variable @env{GNUSORT_COMPRESSOR} can be -set to the name of another program to be used. The program specified -must compress standard input to standard output when no arguments are -given to it, and it must decompress standard input to standard output -when the @option{-d} argument is given to it. If the program exits -with nonzero status, sort will terminate with an error. To disable -compression of temporary files, set the variable to the empty string. -Whitespace and the backslash character should not appear in the -program name. They are reserved for future use. - - The following options affect the ordering of output lines. They may be specified globally or as part of a specific key field. If no key fields are specified, global options apply to comparison of entire @@ -3647,6 +3633,23 @@ Other options are: @table @samp +@item --compress-program=@var{prog} +If @var{prog} is not the empty string, compress any temporary files +with the program @var{prog} rather than with the default compression +method. The default is currently @command{gzip} but this may change. + +With no arguments, @var{prog} must compress standard input to standard +output, and when given the @option{-d} option it must decompress +standard input to standard output. + +Terminate with an error if @var{prog} exits with nonzero status. + +Whitespace and the backslash character should not appear in +@var{prog}; they are reserved for future use. + +If @var{prog} is the empty string, do not compress temporary +files. + @item -k @var{pos1}[,@var{pos2}] @itemx --key=@var{pos1}[,@var{pos2}] @opindex -k diff --git a/src/sort.c b/src/sort.c index c7ae0c8c0..6a7de9c52 100644 --- a/src/sort.c +++ b/src/sort.c @@ -281,7 +281,7 @@ static bool have_read_stdin; static struct keyfield *keylist; /* Program used to (de)compress temp files. Must accept -d. */ -static const char *compress_program; +static char const *compress_program; static void sortlines_temp (struct line *, size_t, struct line *); @@ -339,6 +339,8 @@ Other options:\n\ \n\ -c, --check, --check=diagnose-first check for sorted input; do not sort\n\ -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\ + --compress-program=PROG compress temporaries with PROG;\n\ + decompress them with PROG -d\n\ -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\ -m, --merge merge already sorted files; do not sort\n\ "), stdout); @@ -390,6 +392,7 @@ native byte values.\n\ enum { CHECK_OPTION = CHAR_MAX + 1, + COMPRESS_PROGRAM_OPTION, RANDOM_SOURCE_OPTION }; @@ -399,6 +402,7 @@ static struct option const long_options[] = { {"ignore-leading-blanks", no_argument, NULL, 'b'}, {"check", optional_argument, NULL, CHECK_OPTION}, + {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION}, {"dictionary-order", no_argument, NULL, 'd'}, {"ignore-case", no_argument, NULL, 'f'}, {"general-numeric-sort", no_argument, NULL, 'g'}, @@ -839,29 +843,18 @@ pipe_fork (int pipefds[2], size_t tries) static char * create_temp (FILE **pfp, pid_t *ppid) { - static bool compress_program_known; int tempfd; struct tempnode *node = create_temp_file (&tempfd); char *name = node->name; - if (! compress_program_known) + if (! compress_program) { - compress_program = getenv ("GNUSORT_COMPRESSOR"); - if (compress_program == NULL) - { - static const char *default_program = "gzip"; - const char *path_program = find_in_path (default_program); - - if (path_program != default_program) - compress_program = path_program; - } - else if (*compress_program == '\0') - compress_program = NULL; - - compress_program_known = true; + static char const default_compress_program[] = "gzip"; + char const *prog = find_in_path (default_compress_program); + compress_program = (prog == default_compress_program ? "" : prog); } - if (compress_program) + if (*compress_program) { int pipefds[2]; @@ -2946,6 +2939,12 @@ main (int argc, char **argv) checkonly = c; break; + case COMPRESS_PROGRAM_OPTION: + if (compress_program && strcmp (compress_program, optarg) != 0) + error (SORT_FAILURE, 0, _("multiple compress programs specified")); + compress_program = optarg; + break; + case 'k': key = key_init (&key_buf); diff --git a/tests/misc/sort-compress b/tests/misc/sort-compress index af961d202..b0f4dd703 100755 --- a/tests/misc/sort-compress +++ b/tests/misc/sort-compress @@ -64,21 +64,21 @@ test -f ok || fail=1 rm -f ok # This is to make sure we can disable compression -PATH=.:$PATH GNUSORT_COMPRESSOR= sort -S 1k in > out || fail=1 +PATH=.:$PATH sort --compress-program= -S 1k in > out || fail=1 cmp exp out || fail=1 test $fail = 1 && diff out exp 2> /dev/null test -f ok && fail=1 # This is to make sure we can use something other than gzip mv gzip dzip || fail=1 -GNUSORT_COMPRESSOR=./dzip sort -S 1k in > out || fail=1 +sort --compress-program=./dzip -S 1k in > out || fail=1 cmp exp out || fail=1 test $fail = 1 && diff out exp 2> /dev/null test -f ok || fail=1 rm -f ok # Make sure it can find other programs in PATH correctly -PATH=.:$PATH GNUSORT_COMPRESSOR=dzip sort -S 1k in > out || fail=1 +PATH=.:$PATH sort --compress-program=dzip -S 1k in > out || fail=1 cmp exp out || fail=1 test $fail = 1 && diff out exp 2> /dev/null test -f ok || fail=1 |