summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-10-07 16:46:08 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-10-07 16:47:37 -0700
commit2662702b9e8643f62c670bbf2fa94b1be1ccf9af (patch)
treec82775c20abc304fa4f187218830dc3aa2f7e481 /src
parentb020002b4bfae55d5bbcf66bd7ce787a4e6da689 (diff)
downloadcoreutils-2662702b9e8643f62c670bbf2fa94b1be1ccf9af.tar.xz
wc: don't miscount /sys and similar file systems
Fix similar problems in head, od, split, tac, and tail. Reported by George Shuklin in: http://bugs.gnu.org/18621 * NEWS: Document this. * src/head.c (elseek): Move up. (elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg CURRENT_POS. All uses changed. (elide_tail_bytes_file, elide_tail_lines_file): New arg ST and remove arg SIZE. All uses changed. * src/head.c (elide_tail_bytes_file): * src/od.c (skip): Avoid optimization for /sys files, where st_size is bogus and st_size == st_blksize. Don't report error at EOF when not optimizing. * src/head.c, src/od.c, src/tail.c: Include "stat-size.h". * src/split.c (input_file_size): New function. (bytes_split, lines_chunk_split, bytes_chunk_extract): New arg INITIAL_READ. All uses changed. Use it to double-check st_size. * src/tac.c (tac_seekable): New arg FILE_POS. All uses changed. (copy_to_temp): Return size of temp file. All uses changed. * src/tac.c (tac_seekable): * src/tail.c (tail_bytes): * src/wc.c (wc): Don't trust st_size; double-check by reading. * src/wc.c (wc): New arg CURRENT_POS. All uses changed. * tests/local.mk (all_tests): Add tests/misc/wc-proc.sh, tests/misc/od-j.sh, tests/tail-2/tail-c.sh. * tests/misc/head-c.sh: * tests/misc/tac-2-nonseekable.sh: * tests/split/b-chunk.sh: Add tests for problems with /proc and /sys files. * tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh: New files.
Diffstat (limited to 'src')
-rw-r--r--src/head.c147
-rw-r--r--src/od.c23
-rw-r--r--src/split.c146
-rw-r--r--src/tac.c70
-rw-r--r--src/tail.c43
-rw-r--r--src/wc.c45
6 files changed, 302 insertions, 172 deletions
diff --git a/src/head.c b/src/head.c
index d2f1fce60..2782f8e8c 100644
--- a/src/head.c
+++ b/src/head.c
@@ -36,6 +36,7 @@
#include "quote.h"
#include "quotearg.h"
#include "safe-read.h"
+#include "stat-size.h"
#include "xfreopen.h"
#include "xstrtol.h"
@@ -206,13 +207,42 @@ copy_fd (int src_fd, uintmax_t n_bytes)
return COPY_FD_OK;
}
-/* Print all but the last N_ELIDE bytes from the input available via
- the non-seekable file descriptor FD. Return true upon success.
+/* Call lseek (FD, OFFSET, WHENCE), where file descriptor FD
+ corresponds to the file FILENAME. WHENCE must be SEEK_SET or
+ SEEK_CUR. Return the resulting offset. Give a diagnostic and
+ return -1 if lseek fails. */
+
+static off_t
+elseek (int fd, off_t offset, int whence, char const *filename)
+{
+ off_t new_offset = lseek (fd, offset, whence);
+ char buf[INT_BUFSIZE_BOUND (offset)];
+
+ if (new_offset < 0)
+ error (0, errno,
+ _(whence == SEEK_SET
+ ? N_("%s: cannot seek to offset %s")
+ : N_("%s: cannot seek to relative offset %s")),
+ quotearg_colon (filename),
+ offtostr (offset, buf));
+
+ return new_offset;
+}
+
+/* For an input file with name FILENAME and descriptor FD,
+ output all but the last N_ELIDE_0 bytes.
+ If CURRENT_POS is nonnegative, assume that the input file is
+ positioned at CURRENT_POS and that it should be repositioned to
+ just before the elided bytes before returning.
+ Return true upon success.
Give a diagnostic and return false upon error. */
static bool
-elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
+elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0,
+ off_t current_pos)
{
size_t n_elide = n_elide_0;
+ uintmax_t desired_pos = current_pos;
+ bool ok = true;
#ifndef HEAD_TAIL_PIPE_READ_BUFSIZE
# define HEAD_TAIL_PIPE_READ_BUFSIZE BUFSIZ
@@ -251,7 +281,6 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
if (n_elide <= HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD)
{
- bool ok = true;
bool first = true;
bool eof = false;
size_t n_to_read = READ_BUFSIZE + n_elide;
@@ -293,22 +322,26 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
/* Output any (but maybe just part of the) elided data from
the previous round. */
if (! first)
- xwrite_stdout (b[!i] + READ_BUFSIZE, n_elide - delta);
+ {
+ desired_pos += n_elide - delta;
+ xwrite_stdout (b[!i] + READ_BUFSIZE, n_elide - delta);
+ }
first = false;
if (n_elide < n_read)
- xwrite_stdout (b[i], n_read - n_elide);
+ {
+ desired_pos += n_read - n_elide;
+ xwrite_stdout (b[i], n_read - n_elide);
+ }
}
free (b[0]);
- return ok;
}
else
{
/* Read blocks of size READ_BUFSIZE, until we've read at least n_elide
bytes. Then, for each new buffer we read, also write an old one. */
- bool ok = true;
bool eof = false;
size_t n_read;
bool buffered_enough;
@@ -357,7 +390,10 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
buffered_enough = true;
if (buffered_enough)
- xwrite_stdout (b[i_next], n_read);
+ {
+ desired_pos += n_read;
+ xwrite_stdout (b[i_next], n_read);
+ }
}
/* Output any remainder: rem bytes from b[i] + n_read. */
@@ -366,6 +402,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
if (buffered_enough)
{
size_t n_bytes_left_in_b_i = READ_BUFSIZE - n_read;
+ desired_pos += rem;
if (rem < n_bytes_left_in_b_i)
{
xwrite_stdout (b[i] + n_read, rem);
@@ -392,6 +429,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
*/
size_t y = READ_BUFSIZE - rem;
size_t x = n_read - y;
+ desired_pos += x;
xwrite_stdout (b[i_next], x);
}
}
@@ -400,36 +438,16 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
for (i = 0; i < n_alloc; i++)
free (b[i]);
free (b);
-
- return ok;
}
-}
-
-/* Call lseek (FD, OFFSET, WHENCE), where file descriptor FD
- corresponds to the file FILENAME. WHENCE must be SEEK_SET or
- SEEK_CUR. Return the resulting offset. Give a diagnostic and
- return -1 if lseek fails. */
-
-static off_t
-elseek (int fd, off_t offset, int whence, char const *filename)
-{
- off_t new_offset = lseek (fd, offset, whence);
- char buf[INT_BUFSIZE_BOUND (offset)];
- if (new_offset < 0)
- error (0, errno,
- _(whence == SEEK_SET
- ? N_("%s: cannot seek to offset %s")
- : N_("%s: cannot seek to relative offset %s")),
- quotearg_colon (filename),
- offtostr (offset, buf));
-
- return new_offset;
+ if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0)
+ ok = false;
+ return ok;
}
/* For the file FILENAME with descriptor FD, output all but the last N_ELIDE
bytes. If SIZE is nonnegative, this is a regular file positioned
- at START_POS with SIZE bytes. Return true on success.
+ at CURRENT_POS with SIZE bytes. Return true on success.
Give a diagnostic and return false upon error. */
/* NOTE: if the input file shrinks by more than N_ELIDE bytes between
@@ -437,10 +455,11 @@ elseek (int fd, off_t offset, int whence, char const *filename)
static bool
elide_tail_bytes_file (const char *filename, int fd, uintmax_t n_elide,
- off_t current_pos, off_t size)
+ struct stat const *st, off_t current_pos)
{
- if (size < 0)
- return elide_tail_bytes_pipe (filename, fd, n_elide);
+ off_t size = st->st_size;
+ if (size <= ST_BLKSIZE (*st))
+ return elide_tail_bytes_pipe (filename, fd, n_elide, current_pos);
else
{
/* Be careful here. The current position may actually be
@@ -460,13 +479,16 @@ elide_tail_bytes_file (const char *filename, int fd, uintmax_t n_elide,
}
}
-/* Print all but the last N_ELIDE lines from the input stream
- open for reading via file descriptor FD.
+/* For an input file with name FILENAME and descriptor FD,
+ output all but the last N_ELIDE_0 bytes.
+ If CURRENT_POS is nonnegative, the input file is positioned there
+ and should be repositioned to just before the elided bytes.
Buffer the specified number of lines as a linked list of LBUFFERs,
adding them as needed. Return true if successful. */
static bool
-elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
+elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide,
+ off_t current_pos)
{
struct linebuffer
{
@@ -475,6 +497,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
size_t nlines;
struct linebuffer *next;
};
+ uintmax_t desired_pos = current_pos;
typedef struct linebuffer LBUFFER;
LBUFFER *first, *last, *tmp;
size_t total_lines = 0; /* Total number of newlines in all buffers. */
@@ -497,6 +520,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
if (! n_elide)
{
+ desired_pos += n_read;
xwrite_stdout (tmp->buffer, n_read);
continue;
}
@@ -536,6 +560,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
last = last->next = tmp;
if (n_elide < total_lines - first->nlines)
{
+ desired_pos += first->nbytes;
xwrite_stdout (first->buffer, first->nbytes);
tmp = first;
total_lines -= first->nlines;
@@ -565,6 +590,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
for (tmp = first; n_elide < total_lines - tmp->nlines; tmp = tmp->next)
{
+ desired_pos += tmp->nbytes;
xwrite_stdout (tmp->buffer, tmp->nbytes);
total_lines -= tmp->nlines;
}
@@ -581,6 +607,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
++tmp->nlines;
--n;
}
+ desired_pos += p - tmp->buffer;
xwrite_stdout (tmp->buffer, p - tmp->buffer);
}
@@ -591,6 +618,9 @@ free_lbuffers:
free (first);
first = tmp;
}
+
+ if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0)
+ ok = false;
return ok;
}
@@ -714,10 +744,11 @@ elide_tail_lines_seekable (const char *pretty_filename, int fd,
static bool
elide_tail_lines_file (const char *filename, int fd, uintmax_t n_elide,
- off_t current_pos, off_t size)
+ struct stat const *st, off_t current_pos)
{
- if (size < 0)
- return elide_tail_lines_pipe (filename, fd, n_elide);
+ off_t size = st->st_size;
+ if (size <= ST_BLKSIZE (*st))
+ return elide_tail_lines_pipe (filename, fd, n_elide, current_pos);
else
{
/* Find the offset, OFF, of the Nth newline from the end,
@@ -802,28 +833,24 @@ head (const char *filename, int fd, uintmax_t n_units, bool count_lines,
if (elide_from_end)
{
- off_t current_pos = -1, size = -1;
- if (! presume_input_pipe)
+ off_t current_pos = -1;
+ struct stat st;
+ if (fstat (fd, &st) != 0)
{
- struct stat st;
- if (fstat (fd, &st) != 0)
- {
- error (0, errno, _("cannot fstat %s"),
- quotearg_colon (filename));
- return false;
- }
- if (S_ISREG (st.st_mode))
- {
- size = st.st_size;
- current_pos = elseek (fd, 0, SEEK_CUR, filename);
- if (current_pos < 0)
- return false;
- }
+ error (0, errno, _("cannot fstat %s"),
+ quotearg_colon (filename));
+ return false;
+ }
+ if (! presume_input_pipe && usable_st_size (&st))
+ {
+ current_pos = elseek (fd, 0, SEEK_CUR, filename);
+ if (current_pos < 0)
+ return false;
}
if (count_lines)
- return elide_tail_lines_file (filename, fd, n_units, current_pos, size);
+ return elide_tail_lines_file (filename, fd, n_units, &st, current_pos);
else
- return elide_tail_bytes_file (filename, fd, n_units, current_pos, size);
+ return elide_tail_bytes_file (filename, fd, n_units, &st, current_pos);
}
if (count_lines)
return head_lines (filename, fd, n_units);
diff --git a/src/od.c b/src/od.c
index 18b16836d..7ac663ad4 100644
--- a/src/od.c
+++ b/src/od.c
@@ -27,6 +27,7 @@
#include "error.h"
#include "ftoastr.h"
#include "quote.h"
+#include "stat-size.h"
#include "xfreopen.h"
#include "xprintf.h"
#include "xstrtol.h"
@@ -1034,9 +1035,11 @@ skip (uintmax_t n_skip)
If the number of bytes left to skip is larger than
the size of the current file, we can decrement n_skip
and go on to the next file. Skip this optimization also
- when st_size is 0, because some kernels report that
- nonempty files in /proc have st_size == 0. */
- if (S_ISREG (file_stats.st_mode) && 0 < file_stats.st_size)
+ when st_size is no greater than the block size, because
+ some kernels report nonsense small file sizes for
+ proc-like file systems. */
+ if (usable_st_size (&file_stats)
+ && ST_BLKSIZE (file_stats) < file_stats.st_size)
{
if ((uintmax_t) file_stats.st_size < n_skip)
n_skip -= file_stats.st_size;
@@ -1052,6 +1055,7 @@ skip (uintmax_t n_skip)
}
/* If it's not a regular file with nonnegative size,
+ or if it's so small that it might be in a proc-like file system,
position the file pointer by reading. */
else
@@ -1067,10 +1071,15 @@ skip (uintmax_t n_skip)
n_skip -= n_bytes_read;
if (n_bytes_read != n_bytes_to_read)
{
- in_errno = errno;
- ok = false;
- n_skip = 0;
- break;
+ if (ferror (in_stream))
+ {
+ in_errno = errno;
+ ok = false;
+ n_skip = 0;
+ break;
+ }
+ if (feof (in_stream))
+ break;
}
}
}
diff --git a/src/split.c b/src/split.c
index 9b238e450..ec0da7deb 100644
--- a/src/split.c
+++ b/src/split.c
@@ -246,6 +246,37 @@ r/K/N likewise but only output Kth of N to stdout\n\
exit (status);
}
+/* Return the number of bytes that can be read from FD, a file with
+ apparent size SIZE. Actually read the data into BUF (of size
+ BUFSIZE) if the file appears to be smaller than BUFSIZE, as this
+ works better on proc-like file systems. If the returned value is
+ less than BUFSIZE, store all the file's data into BUF; otherwise,
+ restore the input file's position so that the file can be reread if
+ needed. */
+
+static off_t
+input_file_size (int fd, off_t size, char *buf, size_t bufsize)
+{
+ if (size < bufsize)
+ {
+ size = 0;
+ while (true)
+ {
+ size_t save = size < bufsize ? size : 0;
+ size_t n_read = safe_read (fd, buf + save, bufsize - save);
+ if (n_read == 0)
+ break;
+ if (n_read == SAFE_READ_ERROR)
+ error (EXIT_FAILURE, errno, "%s", infile);
+ size += n_read;
+ }
+ if (bufsize <= size && lseek (fd, - size, SEEK_CUR) < 0)
+ error (EXIT_FAILURE, errno, "%s", infile);
+ }
+
+ return size;
+}
+
/* Compute the next sequential output file name and store it into the
string 'outfile'. */
@@ -511,10 +542,13 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes)
}
/* Split into pieces of exactly N_BYTES bytes.
- Use buffer BUF, whose size is BUFSIZE. */
+ Use buffer BUF, whose size is BUFSIZE.
+ If INITIAL_READ != SIZE_MAX, the entire input file has already been
+ partly read into BUF and BUF contains INITIAL_READ input bytes. */
static void
-bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
+bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read,
+ uintmax_t max_files)
{
size_t n_read;
bool new_file_flag = true;
@@ -525,9 +559,17 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
do
{
- n_read = safe_read (STDIN_FILENO, buf, bufsize);
- if (n_read == SAFE_READ_ERROR)
- error (EXIT_FAILURE, errno, "%s", infile);
+ if (initial_read != SIZE_MAX)
+ {
+ n_read = initial_read;
+ initial_read = SIZE_MAX;
+ }
+ else
+ {
+ n_read = safe_read (STDIN_FILENO, buf, bufsize);
+ if (n_read == SAFE_READ_ERROR)
+ error (EXIT_FAILURE, errno, "%s", infile);
+ }
bp_out = buf;
to_read = n_read;
while (true)
@@ -736,7 +778,7 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
static void
lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
- off_t file_size)
+ size_t initial_read, off_t file_size)
{
assert (n && k <= n && n <= file_size);
@@ -751,7 +793,12 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
{
/* Start reading 1 byte before kth chunk of file. */
off_t start = (k - 1) * chunk_size - 1;
- if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+ if (initial_read != SIZE_MAX)
+ {
+ memmove (buf, buf + start, initial_read - start);
+ initial_read -= start;
+ }
+ else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
error (EXIT_FAILURE, errno, "%s", infile);
n_written = start;
chunk_no = k - 1;
@@ -761,10 +808,19 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
while (n_written < file_size)
{
char *bp = buf, *eob;
- size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
- if (n_read == SAFE_READ_ERROR)
- error (EXIT_FAILURE, errno, "%s", infile);
- else if (n_read == 0)
+ size_t n_read;
+ if (initial_read != SIZE_MAX)
+ {
+ n_read = initial_read;
+ initial_read = SIZE_MAX;
+ }
+ else
+ {
+ n_read = safe_read (STDIN_FILENO, buf, bufsize);
+ if (n_read == SAFE_READ_ERROR)
+ error (EXIT_FAILURE, errno, "%s", infile);
+ }
+ if (n_read == 0)
break; /* eof. */
n_read = MIN (n_read, file_size - n_written);
chunk_truncated = false;
@@ -841,7 +897,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
static void
bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
- off_t file_size)
+ size_t initial_read, off_t file_size)
{
off_t start;
off_t end;
@@ -851,15 +907,29 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
start = (k - 1) * (file_size / n);
end = (k == n) ? file_size : k * (file_size / n);
- if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+ if (initial_read != SIZE_MAX)
+ {
+ memmove (buf, buf + start, initial_read - start);
+ initial_read -= start;
+ }
+ else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
error (EXIT_FAILURE, errno, "%s", infile);
while (start < end)
{
- size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
- if (n_read == SAFE_READ_ERROR)
- error (EXIT_FAILURE, errno, "%s", infile);
- else if (n_read == 0)
+ size_t n_read;
+ if (initial_read != SIZE_MAX)
+ {
+ n_read = initial_read;
+ initial_read = SIZE_MAX;
+ }
+ else
+ {
+ n_read = safe_read (STDIN_FILENO, buf, bufsize);
+ if (n_read == SAFE_READ_ERROR)
+ error (EXIT_FAILURE, errno, "%s", infile);
+ }
+ if (n_read == 0)
break; /* eof. */
n_read = MIN (n_read, end - start);
if (full_write (STDOUT_FILENO, buf, n_read) != n_read
@@ -1403,22 +1473,34 @@ main (int argc, char **argv)
if (in_blk_size == 0)
in_blk_size = io_blksize (in_stat_buf);
+ void *b = xmalloc (in_blk_size + 1 + page_size - 1);
+ char *buf = ptr_align (b, page_size);
+ size_t initial_read = SIZE_MAX;
+
if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
{
off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
- if (usable_st_size (&in_stat_buf))
- file_size = in_stat_buf.st_size;
- else if (0 <= input_offset)
+ if (0 <= input_offset)
{
- file_size = lseek (STDIN_FILENO, 0, SEEK_END);
- input_offset = (file_size < 0
- ? file_size
- : lseek (STDIN_FILENO, input_offset, SEEK_SET));
+ if (usable_st_size (&in_stat_buf))
+ {
+ file_size = input_file_size (STDIN_FILENO, in_stat_buf.st_size,
+ buf, in_blk_size);
+ if (file_size < in_blk_size)
+ initial_read = file_size;
+ }
+ else
+ {
+ file_size = lseek (STDIN_FILENO, 0, SEEK_END);
+ input_offset = (file_size < 0
+ ? file_size
+ : lseek (STDIN_FILENO, input_offset, SEEK_SET));
+ file_size -= input_offset;
+ }
}
if (input_offset < 0)
error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
quote (infile));
- file_size -= input_offset;
/* Overflow, and sanity checking. */
if (OFF_T_MAX < n_units)
{
@@ -1431,9 +1513,6 @@ main (int argc, char **argv)
file_size = MAX (file_size, n_units);
}
- void *b = xmalloc (in_blk_size + 1 + page_size - 1);
- char *buf = ptr_align (b, page_size);
-
/* When filtering, closure of one pipe must not terminate the process,
as there may still be other streams expecting input from us. */
if (filter_command)
@@ -1454,7 +1533,7 @@ main (int argc, char **argv)
break;
case type_bytes:
- bytes_split (n_units, buf, in_blk_size, 0);
+ bytes_split (n_units, buf, in_blk_size, SIZE_MAX, 0);
break;
case type_byteslines:
@@ -1463,13 +1542,16 @@ main (int argc, char **argv)
case type_chunk_bytes:
if (k_units == 0)
- bytes_split (file_size / n_units, buf, in_blk_size, n_units);
+ bytes_split (file_size / n_units, buf, in_blk_size, initial_read,
+ n_units);
else
- bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
+ bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
+ file_size);
break;
case type_chunk_lines:
- lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
+ lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
+ file_size);
break;
case type_rr:
diff --git a/src/tac.c b/src/tac.c
index 192dbd3be..248afa9d7 100644
--- a/src/tac.c
+++ b/src/tac.c
@@ -187,10 +187,11 @@ output (const char *start, const char *past_end)
}
/* Print in reverse the file open on descriptor FD for reading FILE.
+ The file is already positioned at FILE_POS, which should be near its end.
Return true if successful. */
static bool
-tac_seekable (int input_fd, const char *file)
+tac_seekable (int input_fd, const char *file, off_t file_pos)
{
/* Pointer to the location in 'G_buffer' where the search for
the next separator will begin. */
@@ -203,9 +204,6 @@ tac_seekable (int input_fd, const char *file)
/* Length of the record growing in 'G_buffer'. */
size_t saved_record_size;
- /* Offset in the file of the next read. */
- off_t file_pos;
-
/* True if 'output' has not been called yet for any file.
Only used when the separator is attached to the preceding record. */
bool first_time = true;
@@ -213,27 +211,43 @@ tac_seekable (int input_fd, const char *file)
char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
- /* Find the size of the input file. */
- file_pos = lseek (input_fd, 0, SEEK_END);
- if (file_pos < 1)
- return true; /* It's an empty file. */
-
/* Arrange for the first read to lop off enough to leave the rest of the
file a multiple of 'read_size'. Since 'read_size' can change, this may
not always hold during the program run, but since it usually will, leave
it here for i/o efficiency (page/sector boundaries and all that).
Note: the efficiency gain has not been verified. */
- saved_record_size = file_pos % read_size;
- if (saved_record_size == 0)
- saved_record_size = read_size;
- file_pos -= saved_record_size;
- /* 'file_pos' now points to the start of the last (probably partial) block
- in the input file. */
+ size_t remainder = file_pos % read_size;
+ if (remainder != 0)
+ {
+ file_pos -= remainder;
+ if (lseek (input_fd, file_pos, SEEK_SET) < 0)
+ error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+ }
- if (lseek (input_fd, file_pos, SEEK_SET) < 0)
- error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+ /* Scan backward, looking for end of file. This caters to proc-like
+ file systems where the file size is just an estimate. */
+ while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
+ && file_pos != 0)
+ {
+ off_t rsize = read_size;
+ if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
+ error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+ file_pos -= read_size;
+ }
- if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
+ /* Now scan forward, looking for end of file. */
+ while (saved_record_size == read_size)
+ {
+ size_t nread = safe_read (input_fd, G_buffer, read_size);
+ if (nread == 0)
+ break;
+ saved_record_size = nread;
+ if (saved_record_size == SAFE_READ_ERROR)
+ break;
+ file_pos += nread;
+ }
+
+ if (saved_record_size == SAFE_READ_ERROR)
{
error (0, errno, _("%s: read error"), quotearg_colon (file));
return false;
@@ -485,15 +499,16 @@ temp_stream (FILE **fp, char **file_name)
/* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
- and file name. Return true if successful. */
+ and file name. Return the number of bytes copied, or -1 on error. */
-static bool
+static off_t
copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
{
FILE *fp;
char *file_name;
+ off_t bytes_copied = 0;
if (!temp_stream (&fp, &file_name))
- return false;
+ return -1;
while (1)
{
@@ -511,6 +526,8 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
error (0, errno, _("%s: write error"), quotearg_colon (file_name));
goto Fail;
}
+
+ bytes_copied += bytes_read;
}
if (fflush (fp) != 0)
@@ -521,11 +538,11 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
*g_tmp = fp;
*g_tempfile = file_name;
- return true;
+ return bytes_copied;
Fail:
fclose (fp);
- return false;
+ return -1;
}
/* Copy INPUT_FD to a temporary, then tac that file.
@@ -536,10 +553,11 @@ tac_nonseekable (int input_fd, const char *file)
{
FILE *tmp_stream;
char *tmp_file;
- if (!copy_to_temp (&tmp_stream, &tmp_file, input_fd, file))
+ off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
+ if (bytes_copied < 0)
return false;
- bool ok = tac_seekable (fileno (tmp_stream), tmp_file);
+ bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
return ok;
}
@@ -578,7 +596,7 @@ tac_file (const char *filename)
ok = (file_size < 0 || isatty (fd)
? tac_nonseekable (fd, filename)
- : tac_seekable (fd, filename));
+ : tac_seekable (fd, filename, file_size));
if (!is_stdin && close (fd) != 0)
{
diff --git a/src/tail.c b/src/tail.c
index f5d258517..4c5f943c7 100644
--- a/src/tail.c
+++ b/src/tail.c
@@ -40,6 +40,7 @@
#include "posixver.h"
#include "quote.h"
#include "safe-read.h"
+#include "stat-size.h"
#include "stat-time.h"
#include "xfreopen.h"
#include "xnanosleep.h"
@@ -1665,40 +1666,30 @@ tail_bytes (const char *pretty_filename, int fd, uintmax_t n_bytes,
if (t)
return t < 0;
}
- *read_pos += dump_remainder (pretty_filename, fd, COPY_TO_EOF);
+ n_bytes = COPY_TO_EOF;
}
else
{
- if ( ! presume_input_pipe
- && S_ISREG (stats.st_mode) && n_bytes <= OFF_T_MAX)
+ off_t end_pos = ((! presume_input_pipe && usable_st_size (&stats)
+ && n_bytes <= OFF_T_MAX)
+ ? stats.st_size : -1);
+ if (end_pos <= ST_BLKSIZE (stats))
+ return pipe_bytes (pretty_filename, fd, n_bytes, read_pos);
+ off_t current_pos = xlseek (fd, 0, SEEK_CUR, pretty_filename);
+ if (current_pos < end_pos)
{
- off_t current_pos = xlseek (fd, 0, SEEK_CUR, pretty_filename);
- off_t end_pos = xlseek (fd, 0, SEEK_END, pretty_filename);
- off_t diff = end_pos - current_pos;
- /* Be careful here. The current position may actually be
- beyond the end of the file. */
- off_t bytes_remaining = diff < 0 ? 0 : diff;
- off_t nb = n_bytes;
-
- if (bytes_remaining <= nb)
- {
- /* From the current position to end of file, there are no
- more bytes than have been requested. So reposition the
- file pointer to the incoming current position and print
- everything after that. */
- *read_pos = xlseek (fd, current_pos, SEEK_SET, pretty_filename);
- }
- else
+ off_t bytes_remaining = end_pos - current_pos;
+
+ if (n_bytes < bytes_remaining)
{
- /* There are more bytes remaining than were requested.
- Back up. */
- *read_pos = xlseek (fd, -nb, SEEK_END, pretty_filename);
+ current_pos = end_pos - n_bytes;
+ xlseek (fd, current_pos, SEEK_SET, pretty_filename);
}
- *read_pos += dump_remainder (pretty_filename, fd, n_bytes);
}
- else
- return pipe_bytes (pretty_filename, fd, n_bytes, read_pos);
+ *read_pos = current_pos;
}
+
+ *read_pos += dump_remainder (pretty_filename, fd, n_bytes);
return true;
}
diff --git a/src/wc.c b/src/wc.c
index 1ff007dcf..24069f7e9 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -184,9 +184,10 @@ write_counts (uintmax_t lines,
/* Count words. FILE_X is the name of the file (or NULL for standard
input) that is open on descriptor FD. *FSTATUS is its status.
+ CURRENT_POS is the current file offset if known, negative if unknown.
Return true if successful. */
static bool
-wc (int fd, char const *file_x, struct fstatus *fstatus)
+wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
{
bool ok = true;
char buf[BUFFER_SIZE + 1];
@@ -229,32 +230,34 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
if (count_bytes && !count_chars && !print_lines && !count_complicated)
{
- off_t current_pos, end_pos;
-
if (0 < fstatus->failed)
fstatus->failed = fstat (fd, &fstatus->st);
- if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
- && (current_pos = lseek (fd, 0, SEEK_CUR)) != -1
- && (end_pos = lseek (fd, 0, SEEK_END)) != -1)
+ /* For sized files, seek to one buffer before EOF rather than to EOF.
+ This works better for files in proc-like file systems where
+ the size is only approximate. */
+ if (! fstatus->failed && usable_st_size (&fstatus->st)
+ && 0 <= fstatus->st.st_size)
{
- /* Be careful here. The current position may actually be
- beyond the end of the file. As in the example above. */
- bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
+ size_t end_pos = fstatus->st.st_size;
+ off_t hi_pos = end_pos - end_pos % BUFFER_SIZE;
+ if (current_pos < 0)
+ current_pos = lseek (fd, 0, SEEK_CUR);
+ if (0 <= current_pos && current_pos < hi_pos
+ && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+ bytes = hi_pos - current_pos;
}
- else
+
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+ while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
- fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
- while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+ if (bytes_read == SAFE_READ_ERROR)
{
- if (bytes_read == SAFE_READ_ERROR)
- {
- error (0, errno, "%s", file);
- ok = false;
- break;
- }
- bytes += bytes_read;
+ error (0, errno, "%s", file);
+ ok = false;
+ break;
}
+ bytes += bytes_read;
}
}
else if (!count_chars && !count_complicated)
@@ -500,7 +503,7 @@ wc_file (char const *file, struct fstatus *fstatus)
have_read_stdin = true;
if (O_BINARY && ! isatty (STDIN_FILENO))
xfreopen (NULL, "rb", stdin);
- return wc (STDIN_FILENO, file, fstatus);
+ return wc (STDIN_FILENO, file, fstatus, -1);
}
else
{
@@ -512,7 +515,7 @@ wc_file (char const *file, struct fstatus *fstatus)
}
else
{
- bool ok = wc (fd, file, fstatus);
+ bool ok = wc (fd, file, fstatus, 0);
if (close (fd) != 0)
{
error (0, errno, "%s", file);