summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2016-02-12 10:16:11 -0800
committerPaul Eggert <eggert@cs.ucla.edu>2016-02-12 10:16:47 -0800
commit62e7af0326786a7dec91d982238948eddab9d6af (patch)
tree62f87db28e13c04dfa127ae7beccf108a5fefeb1
parent380ab8453dbcfb4e17710a44148be4aa74f1b7dc (diff)
downloadcoreutils-62e7af0326786a7dec91d982238948eddab9d6af.tar.xz
split: fix problems with /dev/zero
Problem reported by Nelson H.F. Beebe in: http://bugs.gnu.org/22624 Other problems also fixed: basically, the code got confused because GNU/Linux reports that /dev/zero has size zero. * src/split.c (input_file_size): Now takes struct stat *, not just size. Always store the first buffer. All callers changed. Treat /dev/zero as an infinitely-large file, both on GNU/Linux where fstat and lseek say its size is zero, and on GNU/Hurd where they say the size is OFF_T_MAX. (cwrite): Return true on success. (bytes_split): Don't try to read past EOF, and stop if a write fails. (lines_rr): Omit stray check for ignorable errno. (main): Get file size only when n_units > 1, since that's the only time it is needed. Defer most of the work to input_file_size. * tests/split/l-chunk.sh: Adjust tests to match new behavior on oddball inputs.
-rw-r--r--src/split.c205
-rwxr-xr-xtests/split/l-chunk.sh18
2 files changed, 126 insertions, 97 deletions
diff --git a/src/split.c b/src/split.c
index 510af13c7..0b10600cd 100644
--- a/src/split.c
+++ b/src/split.c
@@ -270,32 +270,61 @@ CHUNKS may be:\n\
exit (status);
}
-/* Return the number of bytes that can be read from FD, a file with
- apparent size SIZE. Actually read the data into BUF (of size
- BUFSIZE) if the file appears to be smaller than BUFSIZE, as this
- works better on proc-like file systems. If the returned value is
- less than BUFSIZE, store all the file's data into BUF; otherwise,
- restore the input file's position so that the file can be reread if
- needed. */
+/* Return the number of bytes that can be read from FD with status ST.
+ Store up to the first BUFSIZE bytes of the file's data into BUF,
+ and advance the file position by the number of bytes read. On
+ input error, set errno and return -1. */
static off_t
-input_file_size (int fd, off_t size, char *buf, size_t bufsize)
+input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize)
{
- if (size < bufsize)
+ off_t size = 0;
+ do
{
- size = 0;
- while (true)
+ size_t n_read = safe_read (fd, buf + size, bufsize - size);
+ if (n_read == 0)
+ return size;
+ if (n_read == SAFE_READ_ERROR)
+ return -1;
+ size += n_read;
+ }
+ while (size < bufsize);
+
+ /* The file contains at least BUFSIZE bytes. Infer its size via
+ st->st_size if this seems reliable, or via lseek if not. */
+ off_t cur = lseek (fd, 0, SEEK_CUR);
+ off_t end;
+ if (cur < 0)
+ return -1;
+ if (cur < size)
+ {
+ /* E.g., /dev/zero on GNU/Linux, where CUR is zero and SIZE == BUFSIZE.
+ Assume there is no limit to the file size. */
+ errno = EOVERFLOW;
+ return -1;
+ }
+ if (usable_st_size (st) && cur <= st->st_size)
+ end = st->st_size;
+ else
+ {
+ end = lseek (fd, 0, SEEK_END);
+ if (end < 0)
+ return -1;
+ if (end != cur)
{
- size_t save = size < bufsize ? size : 0;
- size_t n_read = safe_read (fd, buf + save, bufsize - save);
- if (n_read == 0)
- break;
- if (n_read == SAFE_READ_ERROR)
- error (EXIT_FAILURE, errno, "%s", quotef (infile));
- size += n_read;
+ if (lseek (fd, cur, SEEK_SET) < 0)
+ return -1;
+ if (end < cur)
+ end = cur;
}
- if (bufsize <= size && lseek (fd, - size, SEEK_CUR) < 0)
- error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ }
+
+ size += end - cur;
+ if (size == OFF_T_MAX)
+ {
+ /* E.g., /dev/zero on GNU/Hurd. */
+ errno = EOVERFLOW;
+ return -1;
}
return size;
@@ -547,28 +576,36 @@ closeout (FILE *fp, int fd, pid_t pid, char const *name)
/* Write BYTES bytes at BP to an output file.
If NEW_FILE_FLAG is true, open the next output file.
- Otherwise add to the same output file already in use. */
+ Otherwise add to the same output file already in use.
+ Return true if successful. */
-static void
+static bool
cwrite (bool new_file_flag, const char *bp, size_t bytes)
{
if (new_file_flag)
{
if (!bp && bytes == 0 && elide_empty_files)
- return;
+ return true;
closeout (NULL, output_desc, filter_pid, outfile);
next_file_name ();
- if ((output_desc = create (outfile)) < 0)
+ output_desc = create (outfile);
+ if (output_desc < 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (outfile));
+ }
+
+ if (full_write (output_desc, bp, bytes) == bytes)
+ return true;
+ else
+ {
+ if (! ignorable (errno))
error (EXIT_FAILURE, errno, "%s", quotef (outfile));
+ return false;
}
- if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno))
- error (EXIT_FAILURE, errno, "%s", quotef (outfile));
}
/* Split into pieces of exactly N_BYTES bytes.
Use buffer BUF, whose size is BUFSIZE.
- If INITIAL_READ != SIZE_MAX, the entire input file has already been
- partly read into BUF and BUF contains INITIAL_READ input bytes. */
+ BUF contains the first INITIAL_READ input bytes. */
static void
bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read,
@@ -576,10 +613,9 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read,
{
size_t n_read;
bool new_file_flag = true;
- size_t to_read;
uintmax_t to_write = n_bytes;
- char *bp_out;
uintmax_t opened = 0;
+ bool eof;
do
{
@@ -587,47 +623,48 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read,
{
n_read = initial_read;
initial_read = SIZE_MAX;
+ eof = n_read < bufsize;
}
else
{
n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ eof = n_read == 0;
}
- bp_out = buf;
- to_read = n_read;
- while (true)
+ char *bp_out = buf;
+ size_t to_read = n_read;
+ while (to_write <= to_read)
{
- if (to_read < to_write)
+ size_t w = to_write;
+ bool cwrite_ok = cwrite (new_file_flag, bp_out, w);
+ opened += new_file_flag;
+ new_file_flag = !max_files || (opened < max_files);
+ if (!new_file_flag && !cwrite_ok)
{
- if (to_read) /* do not write 0 bytes! */
- {
- cwrite (new_file_flag, bp_out, to_read);
- opened += new_file_flag;
- to_write -= to_read;
- new_file_flag = false;
- }
+ /* If filter no longer accepting input, stop reading. */
+ n_read = to_read = 0;
break;
}
- else
+ bp_out += w;
+ to_read -= w;
+ to_write = n_bytes;
+ }
+ if (to_read != 0)
+ {
+ bool cwrite_ok = cwrite (new_file_flag, bp_out, to_read);
+ opened += new_file_flag;
+ to_write -= to_read;
+ new_file_flag = false;
+ if (!cwrite_ok)
{
- size_t w = to_write;
- cwrite (new_file_flag, bp_out, w);
- opened += new_file_flag;
- new_file_flag = !max_files || (opened < max_files);
- if (!new_file_flag && ignorable (errno))
- {
- /* If filter no longer accepting input, stop reading. */
- n_read = 0;
- break;
- }
- bp_out += w;
- to_read -= w;
- to_write = n_bytes;
+ /* If filter no longer accepting input, stop reading. */
+ n_read = 0;
+ break;
}
}
}
- while (n_read);
+ while (! eof);
/* Ensure NUMBER files are created, which truncates
any existing files or notifies any consumers on fifos.
@@ -817,13 +854,17 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
{
/* Start reading 1 byte before kth chunk of file. */
off_t start = (k - 1) * chunk_size - 1;
- if (initial_read != SIZE_MAX)
+ if (start < initial_read)
{
memmove (buf, buf + start, initial_read - start);
initial_read -= start;
}
- else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
- error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ else
+ {
+ if (lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ initial_read = SIZE_MAX;
+ }
n_written = start;
chunk_no = k - 1;
chunk_end = chunk_no * chunk_size - 1;
@@ -931,13 +972,17 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
start = (k - 1) * (file_size / n);
end = (k == n) ? file_size : k * (file_size / n);
- if (initial_read != SIZE_MAX)
+ if (initial_read != SIZE_MAX || start < initial_read)
{
memmove (buf, buf + start, initial_read - start);
initial_read -= start;
}
- else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
- error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ else
+ {
+ if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ initial_read = SIZE_MAX;
+ }
while (start < end)
{
@@ -1151,8 +1196,8 @@ lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
error (EXIT_FAILURE, errno, "%s",
quotef (files[i_file].of_name));
}
- if (! ignorable (errno))
- wrote = true;
+
+ wrote = true;
if (file_limit)
{
@@ -1237,7 +1282,7 @@ main (int argc, char **argv)
static char const multipliers[] = "bEGKkMmPTYZ0";
int c;
int digits_optind = 0;
- off_t file_size IF_LINT (= 0);
+ off_t file_size = OFF_T_MAX;
initialize_main (&argc, &argv);
set_program_name (argv[0]);
@@ -1513,31 +1558,15 @@ main (int argc, char **argv)
char *buf = ptr_align (b, page_size);
size_t initial_read = SIZE_MAX;
- if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
+ if ((split_type == type_chunk_bytes || split_type == type_chunk_lines)
+ && n_units != 1)
{
- off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
- if (0 <= input_offset)
- {
- if (usable_st_size (&in_stat_buf) && ! specified_buf_size)
- {
- assert (ST_BLKSIZE (in_stat_buf) <= in_blk_size);
- file_size = input_file_size (STDIN_FILENO, in_stat_buf.st_size,
- buf, in_blk_size);
- if (file_size < in_blk_size)
- initial_read = file_size;
- }
- else
- {
- file_size = lseek (STDIN_FILENO, 0, SEEK_END);
- input_offset = (file_size < 0
- ? file_size
- : lseek (STDIN_FILENO, input_offset, SEEK_SET));
- file_size -= input_offset;
- }
- }
- if (input_offset < 0)
- error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
+ file_size = input_file_size (STDIN_FILENO, &in_stat_buf,
+ buf, in_blk_size);
+ if (file_size < 0)
+ error (EXIT_FAILURE, errno, _("%s: cannot determine file size"),
quotef (infile));
+ initial_read = MIN (file_size, in_blk_size);
/* Overflow, and sanity checking. */
if (OFF_T_MAX < n_units)
{
diff --git a/tests/split/l-chunk.sh b/tests/split/l-chunk.sh
index dac98d05e..38297dea1 100755
--- a/tests/split/l-chunk.sh
+++ b/tests/split/l-chunk.sh
@@ -24,9 +24,9 @@ echo "split: invalid number of chunks: '1o'" > exp
split -n l/1o 2>err && fail=1
compare exp err || fail=1
-echo "split: -: cannot determine file size" > exp
-echo | split -n l/1 2>err && fail=1
-compare exp err || fail=1
+echo > exp
+echo | split -n l/1 || fail=1
+compare exp xaa || fail=1
# N can be greater than the file size
# in which case no data is extracted, or empty files are written
@@ -34,14 +34,14 @@ split -n l/10 /dev/null || fail=1
test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "10x0" || fail=1
rm x??
-# Ensure the correct number of files written
-# even if there is more data than the reported file size
-split -n l/2 /dev/zero
-test "$(stat -c %s x* | wc -l)" = '2' || fail=1
+# 'split' should reject any attempt to create an infinitely
+# long output file.
+returns_ 1 split -n l/2 /dev/zero || fail=1
rm x??
# Repeat the above, but with 1/2, not l/2:
-split -n 1/2 /dev/zero || fail=1
+returns_ 1 split -n 1/2 /dev/zero || fail=1
+rm x??
# Ensure --elide-empty-files is honored
split -e -n l/10 /dev/null || fail=1
@@ -54,7 +54,7 @@ lines=\
printf "%s" "$lines" | tr '~' '\n' > in || framework_failure_
echo "split: invalid chunk number: '16'" > exp
-split -n l/16/15 in 2>err.t && fail=1
+returns_ 1 split -n l/16/15 in 2>err.t || fail=1
sed "s/': .*/'/" < err.t > err || framework_failure_
compare exp err || fail=1