summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2007-02-19 22:11:18 +0100
committerJim Meyering <jim@meyering.net>2007-02-19 22:11:18 +0100
commitc363fcb9f06cec38040cf4294521096aa70179d9 (patch)
treebce2a2960c9016ec2979efad9a1572c1f9471d83
parente189deb855253b184b64fc1e532c2f00f438ae64 (diff)
downloadcoreutils-c363fcb9f06cec38040cf4294521096aa70179d9.tar.xz
* NEWS: sort now uses a --compress-program option rather than
an environment variable. * doc/coreutils.texi (sort invocation): Document this. * src/sort.c (usage): Likewise. (COMPRESS_PROGRAM_OPTION): New const. (long_options, create_temp, main): Support new option. * tests/misc/sort-compress: Test it.
-rw-r--r--ChangeLog10
-rw-r--r--NEWS7
-rw-r--r--doc/coreutils.texi31
-rw-r--r--src/sort.c33
-rwxr-xr-xtests/misc/sort-compress6
5 files changed, 50 insertions, 37 deletions
diff --git a/ChangeLog b/ChangeLog
index 8a162c60d..c0112b6c1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2007-02-19 Paul Eggert <eggert@cs.ucla.edu>
+
+ * NEWS: sort now uses a --compress-program option rather than
+ an environment variable.
+ * doc/coreutils.texi (sort invocation): Document this.
+ * src/sort.c (usage): Likewise.
+ (COMPRESS_PROGRAM_OPTION): New const.
+ (long_options, create_temp, main): Support new option.
+ * tests/misc/sort-compress: Test it.
+
2007-02-19 Jim Meyering <jim@meyering.net>
* bootstrap: Fix typo s/dowloading/downloading/ in --help output.
diff --git a/NEWS b/NEWS
index e0b829870..a09987701 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,6 @@
GNU coreutils NEWS -*- outline -*-
-* Noteworthy changes in release 6.7-dirty (????-??-??) [stable]
+* Noteworthy changes in release 6.7-dirty (????-??-??) [not-unstable]
** Bug fixes
@@ -45,8 +45,9 @@ GNU coreutils NEWS -*- outline -*-
** New features
By default, sort usually compresses each temporary file it writes.
- When sorting very large inputs, this can result in sort using far
- less temporary disk space and in improved performance.
+ This can help save both time and disk space when sorting large inputs.
+ The default compression program is gzip, but this can be overridden
+ with sort's new --compress-program=PROG option.
** New features
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index f738d8399..1a2dba43e 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3467,20 +3467,6 @@ value as the directory for temporary files instead of @file{/tmp}. The
@option{--temporary-directory} (@option{-T}) option in turn overrides
the environment variable.
-@vindex GNUSORT_COMPRESSOR
-To improve performance when sorting very large files, GNU sort will,
-by default, try to compress temporary files with the program
-@file{gzip}. The environment variable @env{GNUSORT_COMPRESSOR} can be
-set to the name of another program to be used. The program specified
-must compress standard input to standard output when no arguments are
-given to it, and it must decompress standard input to standard output
-when the @option{-d} argument is given to it. If the program exits
-with nonzero status, sort will terminate with an error. To disable
-compression of temporary files, set the variable to the empty string.
-Whitespace and the backslash character should not appear in the
-program name. They are reserved for future use.
-
-
The following options affect the ordering of output lines. They may be
specified globally or as part of a specific key field. If no key
fields are specified, global options apply to comparison of entire
@@ -3647,6 +3633,23 @@ Other options are:
@table @samp
+@item --compress-program=@var{prog}
+If @var{prog} is not the empty string, compress any temporary files
+with the program @var{prog} rather than with the default compression
+method. The default is currently @command{gzip} but this may change.
+
+With no arguments, @var{prog} must compress standard input to standard
+output, and when given the @option{-d} option it must decompress
+standard input to standard output.
+
+Terminate with an error if @var{prog} exits with nonzero status.
+
+Whitespace and the backslash character should not appear in
+@var{prog}; they are reserved for future use.
+
+If @var{prog} is the empty string, do not compress temporary
+files.
+
@item -k @var{pos1}[,@var{pos2}]
@itemx --key=@var{pos1}[,@var{pos2}]
@opindex -k
diff --git a/src/sort.c b/src/sort.c
index c7ae0c8c0..6a7de9c52 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -281,7 +281,7 @@ static bool have_read_stdin;
static struct keyfield *keylist;
/* Program used to (de)compress temp files. Must accept -d. */
-static const char *compress_program;
+static char const *compress_program;
static void sortlines_temp (struct line *, size_t, struct line *);
@@ -339,6 +339,8 @@ Other options:\n\
\n\
-c, --check, --check=diagnose-first check for sorted input; do not sort\n\
-C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
+ --compress-program=PROG compress temporaries with PROG;\n\
+ decompress them with PROG -d\n\
-k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
-m, --merge merge already sorted files; do not sort\n\
"), stdout);
@@ -390,6 +392,7 @@ native byte values.\n\
enum
{
CHECK_OPTION = CHAR_MAX + 1,
+ COMPRESS_PROGRAM_OPTION,
RANDOM_SOURCE_OPTION
};
@@ -399,6 +402,7 @@ static struct option const long_options[] =
{
{"ignore-leading-blanks", no_argument, NULL, 'b'},
{"check", optional_argument, NULL, CHECK_OPTION},
+ {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
{"dictionary-order", no_argument, NULL, 'd'},
{"ignore-case", no_argument, NULL, 'f'},
{"general-numeric-sort", no_argument, NULL, 'g'},
@@ -839,29 +843,18 @@ pipe_fork (int pipefds[2], size_t tries)
static char *
create_temp (FILE **pfp, pid_t *ppid)
{
- static bool compress_program_known;
int tempfd;
struct tempnode *node = create_temp_file (&tempfd);
char *name = node->name;
- if (! compress_program_known)
+ if (! compress_program)
{
- compress_program = getenv ("GNUSORT_COMPRESSOR");
- if (compress_program == NULL)
- {
- static const char *default_program = "gzip";
- const char *path_program = find_in_path (default_program);
-
- if (path_program != default_program)
- compress_program = path_program;
- }
- else if (*compress_program == '\0')
- compress_program = NULL;
-
- compress_program_known = true;
+ static char const default_compress_program[] = "gzip";
+ char const *prog = find_in_path (default_compress_program);
+ compress_program = (prog == default_compress_program ? "" : prog);
}
- if (compress_program)
+ if (*compress_program)
{
int pipefds[2];
@@ -2946,6 +2939,12 @@ main (int argc, char **argv)
checkonly = c;
break;
+ case COMPRESS_PROGRAM_OPTION:
+ if (compress_program && strcmp (compress_program, optarg) != 0)
+ error (SORT_FAILURE, 0, _("multiple compress programs specified"));
+ compress_program = optarg;
+ break;
+
case 'k':
key = key_init (&key_buf);
diff --git a/tests/misc/sort-compress b/tests/misc/sort-compress
index af961d202..b0f4dd703 100755
--- a/tests/misc/sort-compress
+++ b/tests/misc/sort-compress
@@ -64,21 +64,21 @@ test -f ok || fail=1
rm -f ok
# This is to make sure we can disable compression
-PATH=.:$PATH GNUSORT_COMPRESSOR= sort -S 1k in > out || fail=1
+PATH=.:$PATH sort --compress-program= -S 1k in > out || fail=1
cmp exp out || fail=1
test $fail = 1 && diff out exp 2> /dev/null
test -f ok && fail=1
# This is to make sure we can use something other than gzip
mv gzip dzip || fail=1
-GNUSORT_COMPRESSOR=./dzip sort -S 1k in > out || fail=1
+sort --compress-program=./dzip -S 1k in > out || fail=1
cmp exp out || fail=1
test $fail = 1 && diff out exp 2> /dev/null
test -f ok || fail=1
rm -f ok
# Make sure it can find other programs in PATH correctly
-PATH=.:$PATH GNUSORT_COMPRESSOR=dzip sort -S 1k in > out || fail=1
+PATH=.:$PATH sort --compress-program=dzip -S 1k in > out || fail=1
cmp exp out || fail=1
test $fail = 1 && diff out exp 2> /dev/null
test -f ok || fail=1