diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2016-02-12 10:16:11 -0800 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2016-02-12 10:16:47 -0800 |
commit | 62e7af0326786a7dec91d982238948eddab9d6af (patch) | |
tree | 62f87db28e13c04dfa127ae7beccf108a5fefeb1 | |
parent | 380ab8453dbcfb4e17710a44148be4aa74f1b7dc (diff) | |
download | coreutils-62e7af0326786a7dec91d982238948eddab9d6af.tar.xz |
split: fix problems with /dev/zero
Problem reported by Nelson H.F. Beebe in: http://bugs.gnu.org/22624
Other problems also fixed: basically, the code got confused because
GNU/Linux reports that /dev/zero has size zero.
* src/split.c (input_file_size): Now takes struct stat *, not just
size. Always store the first buffer. All callers changed. Treat
/dev/zero as an infinitely-large file, both on GNU/Linux where
fstat and lseek say its size is zero, and on GNU/Hurd where they
say the size is OFF_T_MAX.
(cwrite): Return true on success.
(bytes_split): Don't try to read past EOF, and stop if a write fails.
(lines_rr): Omit stray check for ignorable errno.
(main): Get file size only when n_units > 1, since that's the only
time it is needed. Defer most of the work to input_file_size.
* tests/split/l-chunk.sh: Adjust tests to match new behavior
on oddball inputs.
-rw-r--r-- | src/split.c | 205 | ||||
-rwxr-xr-x | tests/split/l-chunk.sh | 18 |
2 files changed, 126 insertions, 97 deletions
diff --git a/src/split.c b/src/split.c index 510af13c7..0b10600cd 100644 --- a/src/split.c +++ b/src/split.c @@ -270,32 +270,61 @@ CHUNKS may be:\n\ exit (status); } -/* Return the number of bytes that can be read from FD, a file with - apparent size SIZE. Actually read the data into BUF (of size - BUFSIZE) if the file appears to be smaller than BUFSIZE, as this - works better on proc-like file systems. If the returned value is - less than BUFSIZE, store all the file's data into BUF; otherwise, - restore the input file's position so that the file can be reread if - needed. */ +/* Return the number of bytes that can be read from FD with status ST. + Store up to the first BUFSIZE bytes of the file's data into BUF, + and advance the file position by the number of bytes read. On + input error, set errno and return -1. */ static off_t -input_file_size (int fd, off_t size, char *buf, size_t bufsize) +input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize) { - if (size < bufsize) + off_t size = 0; + do { - size = 0; - while (true) + size_t n_read = safe_read (fd, buf + size, bufsize - size); + if (n_read == 0) + return size; + if (n_read == SAFE_READ_ERROR) + return -1; + size += n_read; + } + while (size < bufsize); + + /* The file contains at least BUFSIZE bytes. Infer its size via + st->st_size if this seems reliable, or via lseek if not. */ + off_t cur = lseek (fd, 0, SEEK_CUR); + off_t end; + if (cur < 0) + return -1; + if (cur < size) + { + /* E.g., /dev/zero on GNU/Linux, where CUR is zero and SIZE == BUFSIZE. + Assume there is no limit to the file size. */ + errno = EOVERFLOW; + return -1; + } + if (usable_st_size (st) && cur <= st->st_size) + end = st->st_size; + else + { + end = lseek (fd, 0, SEEK_END); + if (end < 0) + return -1; + if (end != cur) { - size_t save = size < bufsize ? size : 0; - size_t n_read = safe_read (fd, buf + save, bufsize - save); - if (n_read == 0) - break; - if (n_read == SAFE_READ_ERROR) - error (EXIT_FAILURE, errno, "%s", quotef (infile)); - size += n_read; + if (lseek (fd, cur, SEEK_SET) < 0) + return -1; + if (end < cur) + end = cur; } - if (bufsize <= size && lseek (fd, - size, SEEK_CUR) < 0) - error (EXIT_FAILURE, errno, "%s", quotef (infile)); + } + + size += end - cur; + if (size == OFF_T_MAX) + { + /* E.g., /dev/zero on GNU/Hurd. */ + errno = EOVERFLOW; + return -1; } return size; @@ -547,28 +576,36 @@ closeout (FILE *fp, int fd, pid_t pid, char const *name) /* Write BYTES bytes at BP to an output file. If NEW_FILE_FLAG is true, open the next output file. - Otherwise add to the same output file already in use. */ + Otherwise add to the same output file already in use. + Return true if successful. */ -static void +static bool cwrite (bool new_file_flag, const char *bp, size_t bytes) { if (new_file_flag) { if (!bp && bytes == 0 && elide_empty_files) - return; + return true; closeout (NULL, output_desc, filter_pid, outfile); next_file_name (); - if ((output_desc = create (outfile)) < 0) + output_desc = create (outfile); + if (output_desc < 0) + error (EXIT_FAILURE, errno, "%s", quotef (outfile)); + } + + if (full_write (output_desc, bp, bytes) == bytes) + return true; + else + { + if (! ignorable (errno)) error (EXIT_FAILURE, errno, "%s", quotef (outfile)); + return false; } - if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno)) - error (EXIT_FAILURE, errno, "%s", quotef (outfile)); } /* Split into pieces of exactly N_BYTES bytes. Use buffer BUF, whose size is BUFSIZE. - If INITIAL_READ != SIZE_MAX, the entire input file has already been - partly read into BUF and BUF contains INITIAL_READ input bytes. */ + BUF contains the first INITIAL_READ input bytes. */ static void bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read, @@ -576,10 +613,9 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read, { size_t n_read; bool new_file_flag = true; - size_t to_read; uintmax_t to_write = n_bytes; - char *bp_out; uintmax_t opened = 0; + bool eof; do { @@ -587,47 +623,48 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read, { n_read = initial_read; initial_read = SIZE_MAX; + eof = n_read < bufsize; } else { n_read = safe_read (STDIN_FILENO, buf, bufsize); if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", quotef (infile)); + eof = n_read == 0; } - bp_out = buf; - to_read = n_read; - while (true) + char *bp_out = buf; + size_t to_read = n_read; + while (to_write <= to_read) { - if (to_read < to_write) + size_t w = to_write; + bool cwrite_ok = cwrite (new_file_flag, bp_out, w); + opened += new_file_flag; + new_file_flag = !max_files || (opened < max_files); + if (!new_file_flag && !cwrite_ok) { - if (to_read) /* do not write 0 bytes! */ - { - cwrite (new_file_flag, bp_out, to_read); - opened += new_file_flag; - to_write -= to_read; - new_file_flag = false; - } + /* If filter no longer accepting input, stop reading. */ + n_read = to_read = 0; break; } - else + bp_out += w; + to_read -= w; + to_write = n_bytes; + } + if (to_read != 0) + { + bool cwrite_ok = cwrite (new_file_flag, bp_out, to_read); + opened += new_file_flag; + to_write -= to_read; + new_file_flag = false; + if (!cwrite_ok) { - size_t w = to_write; - cwrite (new_file_flag, bp_out, w); - opened += new_file_flag; - new_file_flag = !max_files || (opened < max_files); - if (!new_file_flag && ignorable (errno)) - { - /* If filter no longer accepting input, stop reading. */ - n_read = 0; - break; - } - bp_out += w; - to_read -= w; - to_write = n_bytes; + /* If filter no longer accepting input, stop reading. */ + n_read = 0; + break; } } } - while (n_read); + while (! eof); /* Ensure NUMBER files are created, which truncates any existing files or notifies any consumers on fifos. @@ -817,13 +854,17 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, { /* Start reading 1 byte before kth chunk of file. */ off_t start = (k - 1) * chunk_size - 1; - if (initial_read != SIZE_MAX) + if (start < initial_read) { memmove (buf, buf + start, initial_read - start); initial_read -= start; } - else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) - error (EXIT_FAILURE, errno, "%s", quotef (infile)); + else + { + if (lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + initial_read = SIZE_MAX; + } n_written = start; chunk_no = k - 1; chunk_end = chunk_no * chunk_size - 1; @@ -931,13 +972,17 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, start = (k - 1) * (file_size / n); end = (k == n) ? file_size : k * (file_size / n); - if (initial_read != SIZE_MAX) + if (initial_read != SIZE_MAX || start < initial_read) { memmove (buf, buf + start, initial_read - start); initial_read -= start; } - else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) - error (EXIT_FAILURE, errno, "%s", quotef (infile)); + else + { + if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + initial_read = SIZE_MAX; + } while (start < end) { @@ -1151,8 +1196,8 @@ lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize) error (EXIT_FAILURE, errno, "%s", quotef (files[i_file].of_name)); } - if (! ignorable (errno)) - wrote = true; + + wrote = true; if (file_limit) { @@ -1237,7 +1282,7 @@ main (int argc, char **argv) static char const multipliers[] = "bEGKkMmPTYZ0"; int c; int digits_optind = 0; - off_t file_size IF_LINT (= 0); + off_t file_size = OFF_T_MAX; initialize_main (&argc, &argv); set_program_name (argv[0]); @@ -1513,31 +1558,15 @@ main (int argc, char **argv) char *buf = ptr_align (b, page_size); size_t initial_read = SIZE_MAX; - if (split_type == type_chunk_bytes || split_type == type_chunk_lines) + if ((split_type == type_chunk_bytes || split_type == type_chunk_lines) + && n_units != 1) { - off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR); - if (0 <= input_offset) - { - if (usable_st_size (&in_stat_buf) && ! specified_buf_size) - { - assert (ST_BLKSIZE (in_stat_buf) <= in_blk_size); - file_size = input_file_size (STDIN_FILENO, in_stat_buf.st_size, - buf, in_blk_size); - if (file_size < in_blk_size) - initial_read = file_size; - } - else - { - file_size = lseek (STDIN_FILENO, 0, SEEK_END); - input_offset = (file_size < 0 - ? file_size - : lseek (STDIN_FILENO, input_offset, SEEK_SET)); - file_size -= input_offset; - } - } - if (input_offset < 0) - error (EXIT_FAILURE, 0, _("%s: cannot determine file size"), + file_size = input_file_size (STDIN_FILENO, &in_stat_buf, + buf, in_blk_size); + if (file_size < 0) + error (EXIT_FAILURE, errno, _("%s: cannot determine file size"), quotef (infile)); + initial_read = MIN (file_size, in_blk_size); /* Overflow, and sanity checking. */ if (OFF_T_MAX < n_units) { diff --git a/tests/split/l-chunk.sh b/tests/split/l-chunk.sh index dac98d05e..38297dea1 100755 --- a/tests/split/l-chunk.sh +++ b/tests/split/l-chunk.sh @@ -24,9 +24,9 @@ echo "split: invalid number of chunks: '1o'" > exp split -n l/1o 2>err && fail=1 compare exp err || fail=1 -echo "split: -: cannot determine file size" > exp -echo | split -n l/1 2>err && fail=1 -compare exp err || fail=1 +echo > exp +echo | split -n l/1 || fail=1 +compare exp xaa || fail=1 # N can be greater than the file size # in which case no data is extracted, or empty files are written @@ -34,14 +34,14 @@ split -n l/10 /dev/null || fail=1 test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "10x0" || fail=1 rm x?? -# Ensure the correct number of files written -# even if there is more data than the reported file size -split -n l/2 /dev/zero -test "$(stat -c %s x* | wc -l)" = '2' || fail=1 +# 'split' should reject any attempt to create an infinitely +# long output file. +returns_ 1 split -n l/2 /dev/zero || fail=1 rm x?? # Repeat the above, but with 1/2, not l/2: -split -n 1/2 /dev/zero || fail=1 +returns_ 1 split -n 1/2 /dev/zero || fail=1 +rm x?? # Ensure --elide-empty-files is honored split -e -n l/10 /dev/null || fail=1 @@ -54,7 +54,7 @@ lines=\ printf "%s" "$lines" | tr '~' '\n' > in || framework_failure_ echo "split: invalid chunk number: '16'" > exp -split -n l/16/15 in 2>err.t && fail=1 +returns_ 1 split -n l/16/15 in 2>err.t || fail=1 sed "s/': .*/'/" < err.t > err || framework_failure_ compare exp err || fail=1 |