summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBo Borgerson <gigabo@gmail.com>2008-04-03 18:42:57 -0400
committerJim Meyering <meyering@redhat.com>2008-06-17 08:50:41 +0200
commit322c6f2e5cd3d09ed31d9d8dea2310d70f47842a (patch)
treed3c17172c9c56923607451c1e37d7a331b8bfbea
parent3435bb7f4021e7f05fbe317ebec34851effce27c (diff)
downloadcoreutils-322c6f2e5cd3d09ed31d9d8dea2310d70f47842a.tar.xz
sort: add new option --files0-from=F
* src/sort.c: Support new option. * tests/misc/sort-files0-from: Test new option. * tests/misc/Makefile.am: Indicate new test. * docs/coreutils.texi: Explain new option. * NEWS: Advertise new option. Signed-off-by: Bo Borgerson <gigabo@gmail.com>
-rw-r--r--NEWS5
-rw-r--r--doc/coreutils.texi10
-rw-r--r--src/sort.c76
-rw-r--r--tests/Makefile.am1
-rwxr-xr-xtests/misc/sort-files0-from96
5 files changed, 183 insertions, 5 deletions
diff --git a/NEWS b/NEWS
index 30d16b80b..745a2c71b 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,11 @@ GNU coreutils NEWS -*- outline -*-
md5sum now accepts the new option, --quiet, to suppress the printing of
'OK' messages. sha1sum, sha224sum, sha384sum, and sha512sum accept it, too.
+ sort accepts a new option, --files0-from=F, that specifies a file
+ containing a null-separated list of files to sort. This list is used
+ instead of filenames passed on the command-line to avoid problems with
+ maximum command-line (argv) length.
+
** Bug fixes
chcon --verbose now prints a newline after each message
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 6be37d805..e4a979e61 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3172,7 +3172,7 @@ Print only the newline counts.
@opindex --max-line-length
Print only the maximum line lengths.
-@macro filesZeroFromOption{cmd,withTotalOption}
+@macro filesZeroFromOption{cmd,withTotalOption,subListOutput}
@itemx --files0-from=@var{FILE}
@opindex --files0-from=@var{FILE}
@c This is commented out to avoid a texi2dvi failure.
@@ -3184,13 +3184,13 @@ This is useful \withTotalOption\
when the list of file names is so long that it may exceed a command line
length limitation.
In such cases, running @command{\cmd\} via @command{xargs} is undesirable
-because it splits the list into pieces and makes @command{\cmd\} print a
-total for each sublist rather than for the entire list.
+because it splits the list into pieces and makes @command{\cmd\} print
+\subListOutput\ for each sublist rather than for the entire list.
One way to produce a list of null-byte-terminated file names is with @sc{gnu}
@command{find}, using its @option{-print0} predicate.
Do not specify any @var{FILE} on the command line when using this option.
@end macro
-@filesZeroFromOption{wc,}
+@filesZeroFromOption{wc,,a total}
For example, to find the length of the longest line in any @file{.c} or
@file{.h} file in the current hierarchy, do this:
@@ -3779,6 +3779,8 @@ Terminate with an error if @var{prog} exits with nonzero status.
White space and the backslash character should not appear in
@var{prog}; they are reserved for future use.
+@filesZeroFromOption{sort,,sorted output}
+
@item -k @var{pos1}[,@var{pos2}]
@itemx --key=@var{pos1}[,@var{pos2}]
@opindex -k
diff --git a/src/sort.c b/src/sort.c
index 632c5c836..8fb943af9 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -36,7 +36,9 @@
#include "physmem.h"
#include "posixver.h"
#include "quote.h"
+#include "quotearg.h"
#include "randread.h"
+#include "readtokens0.h"
#include "stdio--.h"
#include "stdlib--.h"
#include "strnumcmp.h"
@@ -303,8 +305,9 @@ usage (int status)
{
printf (_("\
Usage: %s [OPTION]... [FILE]...\n\
+ or: %s [OPTION]... --files0-from=F\n\
"),
- program_name);
+ program_name, program_name);
fputs (_("\
Write sorted concatenation of all FILE(s) to standard output.\n\
\n\
@@ -341,6 +344,10 @@ Other options:\n\
-C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
--compress-program=PROG compress temporaries with PROG;\n\
decompress them with PROG -d\n\
+ --files0-from=F read input from the files specified by\n\
+ NUL-terminated names in file F\n\
+"), stdout);
+ fputs (_("\
-k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
-m, --merge merge already sorted files; do not sort\n\
"), stdout);
@@ -393,6 +400,7 @@ enum
{
CHECK_OPTION = CHAR_MAX + 1,
COMPRESS_PROGRAM_OPTION,
+ FILES0_FROM_OPTION,
RANDOM_SOURCE_OPTION,
SORT_OPTION
};
@@ -406,6 +414,7 @@ static struct option const long_options[] =
{"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
{"dictionary-order", no_argument, NULL, 'd'},
{"ignore-case", no_argument, NULL, 'f'},
+ {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
{"general-numeric-sort", no_argument, NULL, 'g'},
{"ignore-nonprinting", no_argument, NULL, 'i'},
{"key", required_argument, NULL, 'k'},
@@ -2751,6 +2760,8 @@ main (int argc, char **argv)
bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
bool obsolete_usage = (posix2_version () < 200112);
char **files;
+ char *files_from = NULL;
+ struct Tokens tok;
char const *outfile = NULL;
initialize_main (&argc, &argv);
@@ -2954,6 +2965,10 @@ main (int argc, char **argv)
compress_program = optarg;
break;
+ case FILES0_FROM_OPTION:
+ files_from = optarg;
+ break;
+
case 'k':
key = key_init (&key_buf);
@@ -3098,6 +3113,65 @@ main (int argc, char **argv)
}
}
+ if (files_from)
+ {
+ FILE *stream;
+
+ /* When using --files0-from=F, you may not specify any files
+ on the command-line. */
+ if (nfiles)
+ {
+ error (0, 0, _("extra operand %s"), quote (files[0]));
+ fprintf (stderr, "%s\n",
+ _("file operands cannot be combined with --files0-from"));
+ usage (SORT_FAILURE);
+ }
+
+ if (STREQ (files_from, "-"))
+ stream = stdin;
+ else
+ {
+ stream = fopen (files_from, "r");
+ if (stream == NULL)
+ error (SORT_FAILURE, errno, _("cannot open %s for reading"),
+ quote (files_from));
+ }
+
+ readtokens0_init (&tok);
+
+ if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
+ error (SORT_FAILURE, 0, _("cannot read file names from %s"),
+ quote (files_from));
+
+ if (tok.n_tok)
+ {
+ size_t i;
+ free (files);
+ files = tok.tok;
+ nfiles = tok.n_tok;
+ for (i = 0; i < nfiles; i++)
+ {
+ if (STREQ (files[i], "-"))
+ error (SORT_FAILURE, 0, _("when reading file names from stdin, "
+ "no file name of %s allowed"),
+ quote (files[i]));
+ else if (files[i][0] == '\0')
+ {
+ /* Using the standard `filename:line-number:' prefix here is
+ not totally appropriate, since NUL is the separator, not NL,
+ but it might be better than nothing. */
+ unsigned long int file_number = i + 1;
+ error (SORT_FAILURE, 0,
+ _("%s:%lu: invalid zero-length file name"),
+ quotearg_colon (files_from), file_number);
+ }
+ }
+ }
+ else
+ error (SORT_FAILURE, 0, _("no input from %s"),
+ quote (files_from));
+ }
+
/* Inheritance of global options to individual keys. */
for (key = keylist; key; key = key->next)
{
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9b6a37819..f7275f88c 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -193,6 +193,7 @@ TESTS = \
misc/shuf \
misc/sort \
misc/sort-compress \
+ misc/sort-files0-from \
misc/sort-merge \
misc/sort-rand \
misc/split-a \
diff --git a/tests/misc/sort-files0-from b/tests/misc/sort-files0-from
new file mode 100755
index 000000000..0907320ea
--- /dev/null
+++ b/tests/misc/sort-files0-from
@@ -0,0 +1,96 @@
+#!/usr/bin/perl
+# Exercise sort's --files0-from option.
+# FIXME: keep this file in sync with tests/du/files0-from.
+
+# Copyright (C) 2006-2008 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use strict;
+
+(my $program_name = $0) =~ s|.*/||;
+
+my $prog = 'sort';
+
+# Turn off localization of executable's output.
+@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+my @Tests =
+ (
+ # invalid extra command line argument
+ ['f-extra-arg', '--files0-from=- no-such', {IN=>"a"}, {EXIT=>2},
+ {ERR => "$prog: extra operand `no-such'\n"
+ . "file operands cannot be combined with --files0-from\n"
+ . "Try `$prog --help' for more information.\n"}
+ ],
+
+ # missing input file
+ ['missing', '--files0-from=missing', {EXIT=>2},
+ {ERR => "$prog: cannot open `missing' for reading: "
+ . "No such file or directory\n"}],
+
+ # input file name of '-'
+ ['minus-in-stdin', '--files0-from=-', '<', {IN=>{f=>'-'}}, {EXIT=>2},
+ {ERR => "$prog: when reading file names from stdin, no file name of"
+ . " `-' allowed\n"}],
+
+ # empty input, regular file
+ ['empty', '--files0-from=@AUX@', {AUX=>''}, {EXIT=>2},
+ {ERR_SUBST => 's/no input from.+$//'}, {ERR => "$prog: \n"}],
+
+ # empty input, from non-regular file
+ ['empty-nonreg', '--files0-from=/dev/null', {EXIT=>2},
+ {ERR => "$prog: no input from `/dev/null'\n"}],
+
+ # one NUL
+ ['nul-1', '--files0-from=-', '<', {IN=>"\0"}, {EXIT=>2},
+ {ERR => "$prog: -:1: invalid zero-length file name\n"}],
+
+ # two NULs
+ # Note that the behavior here differs from `wc' in that the
+ # first zero-length file name is treated as fatal, so there
+ # is only one line of diagnostic output.
+ ['nul-2', '--files0-from=-', '<', {IN=>"\0\0"}, {EXIT=>2},
+ {ERR => "$prog: -:1: invalid zero-length file name\n"}],
+
+ # one file name, no NUL
+ ['1', '--files0-from=-', '<',
+ {IN=>{f=>"g"}}, {AUX=>{g=>'a'}}, {OUT=>"a\n"} ],
+
+ # one file name, with NUL
+ ['1a', '--files0-from=-', '<',
+ {IN=>{f=>"g\0"}}, {AUX=>{g=>'a'}}, {OUT=>"a\n"} ],
+
+ # two file names, no final NUL
+ ['2', '--files0-from=-', '<',
+ {IN=>{f=>"g\0g"}}, {AUX=>{g=>'a'}}, {OUT=>"a\na\n"} ],
+
+ # two file names, with final NUL
+ ['2a', '--files0-from=-', '<',
+ {IN=>{f=>"g\0g\0"}}, {AUX=>{g=>'a'}}, {OUT=>"a\na\n"} ],
+
+ # Ensure that $prog performs no processing when there is a zero-length filename
+ # Note that the behavior here differs from `wc' in that the
+ # first zero-length file name is treated as fatal, so there
+ # should be no output on STDOUT.
+ ['zero-len', '--files0-from=-', '<',
+ {IN=>{f=>"\0g\0"}}, {AUX=>{g=>''}},
+ {ERR => "$prog: -:1: invalid zero-length file name\n"}, {EXIT=>2} ],
+ );
+
+my $save_temps = $ENV{DEBUG};
+my $verbose = $ENV{VERBOSE};
+
+my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
+exit $fail;