diff options
author | Jim Meyering <meyering@redhat.com> | 2008-11-25 18:38:26 +0100 |
---|---|---|
committer | Jim Meyering <meyering@redhat.com> | 2008-12-02 13:12:22 +0100 |
commit | c2e56e0de7d86bdc0f824d758a7efde4d5d7b235 (patch) | |
tree | acd3e00bd616d75dccf110ef810a9e74a1c787d3 /src/wc.c | |
parent | 031e2fb5e9501fb9cda4d739a92abb02e2b05a52 (diff) | |
download | coreutils-c2e56e0de7d86bdc0f824d758a7efde4d5d7b235.tar.xz |
wc: read and process --files0-from= input a name at a time,
when the file name list is not too large. Before, wc would always read
the entire file name list into memory and *then* process each file name.
wc does read the list into memory when the list is known not to be too
large; this is done in order to be able to align the output numbers,
as it does with arguments specified on the command-line
* src/wc.c: Include "argv-iter.h".
(main): Rewrite to use argv-iter when the input file name list
is known to be too large.
* NEWS (Bug fixes): Mention it.
Diffstat (limited to 'src/wc.c')
-rw-r--r-- | src/wc.c | 139 |
1 files changed, 95 insertions, 44 deletions
@@ -20,14 +20,17 @@ #include <config.h> #include <stdio.h> +#include <assert.h> #include <getopt.h> #include <sys/types.h> #include <wchar.h> #include <wctype.h> #include "system.h" +#include "argv-iter.h" #include "error.h" #include "mbchar.h" +#include "physmem.h" #include "quote.h" #include "quotearg.h" #include "readtokens0.h" @@ -515,17 +518,19 @@ wc_file (char const *file, struct fstatus *fstatus) /* Return the file status for the NFILES files addressed by FILE. Optimize the case where only one number is printed, for just one file; in that case we can use a print width of 1, so we don't need - to stat the file. */ + to stat the file. Handle the case of (nfiles == 0) in the same way; + that happens when we don't know how long the list of file names will be. */ static struct fstatus * -get_input_fstatus (int nfiles, char * const *file) +get_input_fstatus (int nfiles, char *const *file) { - struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus); + struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus); - if (nfiles == 1 - && ((print_lines + print_words + print_chars - + print_bytes + print_linelength) - == 1)) + if (nfiles == 0 + || (nfiles == 1 + && ((print_lines + print_words + print_chars + + print_bytes + print_linelength) + == 1))) fstatus[0].failed = 1; else { @@ -577,7 +582,6 @@ compute_number_width (int nfiles, struct fstatus const *fstatus) int main (int argc, char **argv) { - int i; bool ok; int optc; int nfiles; @@ -637,6 +641,8 @@ main (int argc, char **argv) | print_linelength)) print_lines = print_words = print_bytes = true; + bool read_tokens = false; + struct argv_iterator *ai; if (files_from) { FILE *stream; @@ -661,69 +667,114 @@ main (int argc, char **argv) quote (files_from)); } - readtokens0_init (&tok); - - if (! readtokens0 (stream, &tok) || fclose (stream) != 0) - error (EXIT_FAILURE, 0, _("cannot read file names from %s"), - quote (files_from)); - - files = tok.tok; - nfiles = tok.n_tok; + /* Read the file list into RAM if we can detect its size and that + size is reasonable. Otherwise, we'll read a name at a time. */ + struct stat st; + if (fstat (fileno (stream), &st) == 0 + && S_ISREG (st.st_mode) + && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2)) + { + read_tokens = true; + readtokens0_init (&tok); + if (! readtokens0 (stream, &tok) || fclose (stream) != 0) + error (EXIT_FAILURE, 0, _("cannot read file names from %s"), + quote (files_from)); + files = tok.tok; + nfiles = tok.n_tok; + ai = argv_iter_init_argv (files); + } + else + { + files = NULL; + nfiles = 0; + ai = argv_iter_init_stream (stream); + } } else { - static char *stdin_only[2]; + static char *stdin_only[] = { NULL }; files = (optind < argc ? argv + optind : stdin_only); nfiles = (optind < argc ? argc - optind : 1); - stdin_only[0] = NULL; + ai = argv_iter_init_argv (files); } fstatus = get_input_fstatus (nfiles, files); number_width = compute_number_width (nfiles, fstatus); + int i; ok = true; - for (i = 0; i < nfiles; i++) + for (i = 0; /* */; i++) { - if (files[i]) + bool skip_file = false; + enum argv_iter_err ai_err; + char *file_name = argv_iter (ai, &ai_err); + if (ai_err == AI_ERR_EOF) + break; + if (!file_name) { - if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-")) + switch (ai_err) { - ok = false; - /* Give a better diagnostic in an unusual case: - printf - | wc --files0-from=- */ - error (0, 0, _("when reading file names from stdin, " - "no file name of %s allowed"), - quote ("-")); + case AI_ERR_READ: + error (0, errno, _("%s: read error"), quote (files_from)); + skip_file = true; continue; + case AI_ERR_MEM: + xalloc_die (); + default: + assert (!"unexpected error code from argv_iter"); } + } + if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-")) + { + /* Give a better diagnostic in an unusual case: + printf - | wc --files0-from=- */ + error (0, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quote (file_name)); + skip_file = true; + } + if (!file_name[0]) + { /* Diagnose a zero-length file name. When it's one - among many, knowing the record number may help. */ - if (files[i][0] == '\0') + among many, knowing the record number may help. + FIXME: currently print the record number only with + --files0-from=FILE. Maybe do it for argv, too? */ + if (files_from == NULL) + error (0, 0, "%s", _("invalid zero-length file name")); + else { - ok = false; - if (files_from) - { - /* Using the standard `filename:line-number:' prefix here is - not totally appropriate, since NUL is the separator, not NL, - but it might be better than nothing. */ - unsigned long int file_number = i + 1; - error (0, 0, "%s:%lu: %s", quotearg_colon (files_from), - file_number, _("invalid zero-length file name")); - } - else - error (0, 0, "%s", _("invalid zero-length file name")); - continue; + /* Using the standard `filename:line-number:' prefix here is + not totally appropriate, since NUL is the separator, not NL, + but it might be better than nothing. */ + unsigned long int file_number = argv_iter_n_args (ai); + error (0, 0, "%s:%lu: %s", quotearg_colon (files_from), + file_number, _("invalid zero-length file name")); } + skip_file = true; } - ok &= wc_file (files[i], &fstatus[i]); + if (skip_file) + ok = false; + else + ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]); } - if (1 < nfiles) + /* No arguments on the command line is fine. That means read from stdin. + However, no arguments on the --files0-from input stream is an error + means don't read anything. */ + if (ok && !files_from && argv_iter_n_args (ai) == 0) + ok &= wc_file (NULL, &fstatus[0]); + + if (read_tokens) + readtokens0_free (&tok); + + if (1 < argv_iter_n_args (ai)) write_counts (total_lines, total_words, total_chars, total_bytes, max_line_length, _("total")); + argv_iter_free (ai); + free (fstatus); if (have_read_stdin && close (STDIN_FILENO) != 0) |