diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-10-07 16:46:08 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-10-07 16:47:37 -0700 |
commit | 2662702b9e8643f62c670bbf2fa94b1be1ccf9af (patch) | |
tree | c82775c20abc304fa4f187218830dc3aa2f7e481 /src/split.c | |
parent | b020002b4bfae55d5bbcf66bd7ce787a4e6da689 (diff) | |
download | coreutils-2662702b9e8643f62c670bbf2fa94b1be1ccf9af.tar.xz |
wc: don't miscount /sys and similar file systems
Fix similar problems in head, od, split, tac, and tail.
Reported by George Shuklin in: http://bugs.gnu.org/18621
* NEWS: Document this.
* src/head.c (elseek): Move up.
(elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg
CURRENT_POS. All uses changed.
(elide_tail_bytes_file, elide_tail_lines_file):
New arg ST and remove arg SIZE. All uses changed.
* src/head.c (elide_tail_bytes_file):
* src/od.c (skip): Avoid optimization for /sys files, where
st_size is bogus and st_size == st_blksize.
Don't report error at EOF when not optimizing.
* src/head.c, src/od.c, src/tail.c: Include "stat-size.h".
* src/split.c (input_file_size): New function.
(bytes_split, lines_chunk_split, bytes_chunk_extract): New arg
INITIAL_READ. All uses changed. Use it to double-check st_size.
* src/tac.c (tac_seekable): New arg FILE_POS. All uses changed.
(copy_to_temp): Return size of temp file. All uses changed.
* src/tac.c (tac_seekable):
* src/tail.c (tail_bytes):
* src/wc.c (wc):
Don't trust st_size; double-check by reading.
* src/wc.c (wc): New arg CURRENT_POS. All uses changed.
* tests/local.mk (all_tests): Add tests/misc/wc-proc.sh,
tests/misc/od-j.sh, tests/tail-2/tail-c.sh.
* tests/misc/head-c.sh:
* tests/misc/tac-2-nonseekable.sh:
* tests/split/b-chunk.sh:
Add tests for problems with /proc and /sys files.
* tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh:
New files.
Diffstat (limited to 'src/split.c')
-rw-r--r-- | src/split.c | 146 |
1 files changed, 114 insertions, 32 deletions
diff --git a/src/split.c b/src/split.c index 9b238e450..ec0da7deb 100644 --- a/src/split.c +++ b/src/split.c @@ -246,6 +246,37 @@ r/K/N likewise but only output Kth of N to stdout\n\ exit (status); } +/* Return the number of bytes that can be read from FD, a file with + apparent size SIZE. Actually read the data into BUF (of size + BUFSIZE) if the file appears to be smaller than BUFSIZE, as this + works better on proc-like file systems. If the returned value is + less than BUFSIZE, store all the file's data into BUF; otherwise, + restore the input file's position so that the file can be reread if + needed. */ + +static off_t +input_file_size (int fd, off_t size, char *buf, size_t bufsize) +{ + if (size < bufsize) + { + size = 0; + while (true) + { + size_t save = size < bufsize ? size : 0; + size_t n_read = safe_read (fd, buf + save, bufsize - save); + if (n_read == 0) + break; + if (n_read == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, "%s", infile); + size += n_read; + } + if (bufsize <= size && lseek (fd, - size, SEEK_CUR) < 0) + error (EXIT_FAILURE, errno, "%s", infile); + } + + return size; +} + /* Compute the next sequential output file name and store it into the string 'outfile'. */ @@ -511,10 +542,13 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes) } /* Split into pieces of exactly N_BYTES bytes. - Use buffer BUF, whose size is BUFSIZE. */ + Use buffer BUF, whose size is BUFSIZE. + If INITIAL_READ != SIZE_MAX, the entire input file has already been + partly read into BUF and BUF contains INITIAL_READ input bytes. */ static void -bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files) +bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read, + uintmax_t max_files) { size_t n_read; bool new_file_flag = true; @@ -525,9 +559,17 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files) do { - n_read = safe_read (STDIN_FILENO, buf, bufsize); - if (n_read == SAFE_READ_ERROR) - error (EXIT_FAILURE, errno, "%s", infile); + if (initial_read != SIZE_MAX) + { + n_read = initial_read; + initial_read = SIZE_MAX; + } + else + { + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, "%s", infile); + } bp_out = buf; to_read = n_read; while (true) @@ -736,7 +778,7 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize) static void lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, - off_t file_size) + size_t initial_read, off_t file_size) { assert (n && k <= n && n <= file_size); @@ -751,7 +793,12 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, { /* Start reading 1 byte before kth chunk of file. */ off_t start = (k - 1) * chunk_size - 1; - if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) + if (initial_read != SIZE_MAX) + { + memmove (buf, buf + start, initial_read - start); + initial_read -= start; + } + else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) error (EXIT_FAILURE, errno, "%s", infile); n_written = start; chunk_no = k - 1; @@ -761,10 +808,19 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, while (n_written < file_size) { char *bp = buf, *eob; - size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); - if (n_read == SAFE_READ_ERROR) - error (EXIT_FAILURE, errno, "%s", infile); - else if (n_read == 0) + size_t n_read; + if (initial_read != SIZE_MAX) + { + n_read = initial_read; + initial_read = SIZE_MAX; + } + else + { + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, "%s", infile); + } + if (n_read == 0) break; /* eof. */ n_read = MIN (n_read, file_size - n_written); chunk_truncated = false; @@ -841,7 +897,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, static void bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, - off_t file_size) + size_t initial_read, off_t file_size) { off_t start; off_t end; @@ -851,15 +907,29 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, start = (k - 1) * (file_size / n); end = (k == n) ? file_size : k * (file_size / n); - if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) + if (initial_read != SIZE_MAX) + { + memmove (buf, buf + start, initial_read - start); + initial_read -= start; + } + else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) error (EXIT_FAILURE, errno, "%s", infile); while (start < end) { - size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); - if (n_read == SAFE_READ_ERROR) - error (EXIT_FAILURE, errno, "%s", infile); - else if (n_read == 0) + size_t n_read; + if (initial_read != SIZE_MAX) + { + n_read = initial_read; + initial_read = SIZE_MAX; + } + else + { + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, "%s", infile); + } + if (n_read == 0) break; /* eof. */ n_read = MIN (n_read, end - start); if (full_write (STDOUT_FILENO, buf, n_read) != n_read @@ -1403,22 +1473,34 @@ main (int argc, char **argv) if (in_blk_size == 0) in_blk_size = io_blksize (in_stat_buf); + void *b = xmalloc (in_blk_size + 1 + page_size - 1); + char *buf = ptr_align (b, page_size); + size_t initial_read = SIZE_MAX; + if (split_type == type_chunk_bytes || split_type == type_chunk_lines) { off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR); - if (usable_st_size (&in_stat_buf)) - file_size = in_stat_buf.st_size; - else if (0 <= input_offset) + if (0 <= input_offset) { - file_size = lseek (STDIN_FILENO, 0, SEEK_END); - input_offset = (file_size < 0 - ? file_size - : lseek (STDIN_FILENO, input_offset, SEEK_SET)); + if (usable_st_size (&in_stat_buf)) + { + file_size = input_file_size (STDIN_FILENO, in_stat_buf.st_size, + buf, in_blk_size); + if (file_size < in_blk_size) + initial_read = file_size; + } + else + { + file_size = lseek (STDIN_FILENO, 0, SEEK_END); + input_offset = (file_size < 0 + ? file_size + : lseek (STDIN_FILENO, input_offset, SEEK_SET)); + file_size -= input_offset; + } } if (input_offset < 0) error (EXIT_FAILURE, 0, _("%s: cannot determine file size"), quote (infile)); - file_size -= input_offset; /* Overflow, and sanity checking. */ if (OFF_T_MAX < n_units) { @@ -1431,9 +1513,6 @@ main (int argc, char **argv) file_size = MAX (file_size, n_units); } - void *b = xmalloc (in_blk_size + 1 + page_size - 1); - char *buf = ptr_align (b, page_size); - /* When filtering, closure of one pipe must not terminate the process, as there may still be other streams expecting input from us. */ if (filter_command) @@ -1454,7 +1533,7 @@ main (int argc, char **argv) break; case type_bytes: - bytes_split (n_units, buf, in_blk_size, 0); + bytes_split (n_units, buf, in_blk_size, SIZE_MAX, 0); break; case type_byteslines: @@ -1463,13 +1542,16 @@ main (int argc, char **argv) case type_chunk_bytes: if (k_units == 0) - bytes_split (file_size / n_units, buf, in_blk_size, n_units); + bytes_split (file_size / n_units, buf, in_blk_size, initial_read, + n_units); else - bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size); + bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read, + file_size); break; case type_chunk_lines: - lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size); + lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read, + file_size); break; case type_rr: |