summaryrefslogtreecommitdiff
path: root/src/tac.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-10-07 16:46:08 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-10-07 16:47:37 -0700
commit2662702b9e8643f62c670bbf2fa94b1be1ccf9af (patch)
treec82775c20abc304fa4f187218830dc3aa2f7e481 /src/tac.c
parentb020002b4bfae55d5bbcf66bd7ce787a4e6da689 (diff)
downloadcoreutils-2662702b9e8643f62c670bbf2fa94b1be1ccf9af.tar.xz
wc: don't miscount /sys and similar file systems
Fix similar problems in head, od, split, tac, and tail. Reported by George Shuklin in: http://bugs.gnu.org/18621 * NEWS: Document this. * src/head.c (elseek): Move up. (elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg CURRENT_POS. All uses changed. (elide_tail_bytes_file, elide_tail_lines_file): New arg ST and remove arg SIZE. All uses changed. * src/head.c (elide_tail_bytes_file): * src/od.c (skip): Avoid optimization for /sys files, where st_size is bogus and st_size == st_blksize. Don't report error at EOF when not optimizing. * src/head.c, src/od.c, src/tail.c: Include "stat-size.h". * src/split.c (input_file_size): New function. (bytes_split, lines_chunk_split, bytes_chunk_extract): New arg INITIAL_READ. All uses changed. Use it to double-check st_size. * src/tac.c (tac_seekable): New arg FILE_POS. All uses changed. (copy_to_temp): Return size of temp file. All uses changed. * src/tac.c (tac_seekable): * src/tail.c (tail_bytes): * src/wc.c (wc): Don't trust st_size; double-check by reading. * src/wc.c (wc): New arg CURRENT_POS. All uses changed. * tests/local.mk (all_tests): Add tests/misc/wc-proc.sh, tests/misc/od-j.sh, tests/tail-2/tail-c.sh. * tests/misc/head-c.sh: * tests/misc/tac-2-nonseekable.sh: * tests/split/b-chunk.sh: Add tests for problems with /proc and /sys files. * tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh: New files.
Diffstat (limited to 'src/tac.c')
-rw-r--r--src/tac.c70
1 files changed, 44 insertions, 26 deletions
diff --git a/src/tac.c b/src/tac.c
index 192dbd3be..248afa9d7 100644
--- a/src/tac.c
+++ b/src/tac.c
@@ -187,10 +187,11 @@ output (const char *start, const char *past_end)
}
/* Print in reverse the file open on descriptor FD for reading FILE.
+ The file is already positioned at FILE_POS, which should be near its end.
Return true if successful. */
static bool
-tac_seekable (int input_fd, const char *file)
+tac_seekable (int input_fd, const char *file, off_t file_pos)
{
/* Pointer to the location in 'G_buffer' where the search for
the next separator will begin. */
@@ -203,9 +204,6 @@ tac_seekable (int input_fd, const char *file)
/* Length of the record growing in 'G_buffer'. */
size_t saved_record_size;
- /* Offset in the file of the next read. */
- off_t file_pos;
-
/* True if 'output' has not been called yet for any file.
Only used when the separator is attached to the preceding record. */
bool first_time = true;
@@ -213,27 +211,43 @@ tac_seekable (int input_fd, const char *file)
char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
- /* Find the size of the input file. */
- file_pos = lseek (input_fd, 0, SEEK_END);
- if (file_pos < 1)
- return true; /* It's an empty file. */
-
/* Arrange for the first read to lop off enough to leave the rest of the
file a multiple of 'read_size'. Since 'read_size' can change, this may
not always hold during the program run, but since it usually will, leave
it here for i/o efficiency (page/sector boundaries and all that).
Note: the efficiency gain has not been verified. */
- saved_record_size = file_pos % read_size;
- if (saved_record_size == 0)
- saved_record_size = read_size;
- file_pos -= saved_record_size;
- /* 'file_pos' now points to the start of the last (probably partial) block
- in the input file. */
+ size_t remainder = file_pos % read_size;
+ if (remainder != 0)
+ {
+ file_pos -= remainder;
+ if (lseek (input_fd, file_pos, SEEK_SET) < 0)
+ error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+ }
- if (lseek (input_fd, file_pos, SEEK_SET) < 0)
- error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+ /* Scan backward, looking for end of file. This caters to proc-like
+ file systems where the file size is just an estimate. */
+ while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
+ && file_pos != 0)
+ {
+ off_t rsize = read_size;
+ if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
+ error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+ file_pos -= read_size;
+ }
- if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
+ /* Now scan forward, looking for end of file. */
+ while (saved_record_size == read_size)
+ {
+ size_t nread = safe_read (input_fd, G_buffer, read_size);
+ if (nread == 0)
+ break;
+ saved_record_size = nread;
+ if (saved_record_size == SAFE_READ_ERROR)
+ break;
+ file_pos += nread;
+ }
+
+ if (saved_record_size == SAFE_READ_ERROR)
{
error (0, errno, _("%s: read error"), quotearg_colon (file));
return false;
@@ -485,15 +499,16 @@ temp_stream (FILE **fp, char **file_name)
/* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
- and file name. Return true if successful. */
+ and file name. Return the number of bytes copied, or -1 on error. */
-static bool
+static off_t
copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
{
FILE *fp;
char *file_name;
+ off_t bytes_copied = 0;
if (!temp_stream (&fp, &file_name))
- return false;
+ return -1;
while (1)
{
@@ -511,6 +526,8 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
error (0, errno, _("%s: write error"), quotearg_colon (file_name));
goto Fail;
}
+
+ bytes_copied += bytes_read;
}
if (fflush (fp) != 0)
@@ -521,11 +538,11 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
*g_tmp = fp;
*g_tempfile = file_name;
- return true;
+ return bytes_copied;
Fail:
fclose (fp);
- return false;
+ return -1;
}
/* Copy INPUT_FD to a temporary, then tac that file.
@@ -536,10 +553,11 @@ tac_nonseekable (int input_fd, const char *file)
{
FILE *tmp_stream;
char *tmp_file;
- if (!copy_to_temp (&tmp_stream, &tmp_file, input_fd, file))
+ off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
+ if (bytes_copied < 0)
return false;
- bool ok = tac_seekable (fileno (tmp_stream), tmp_file);
+ bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
return ok;
}
@@ -578,7 +596,7 @@ tac_file (const char *filename)
ok = (file_size < 0 || isatty (fd)
? tac_nonseekable (fd, filename)
- : tac_seekable (fd, filename));
+ : tac_seekable (fd, filename, file_size));
if (!is_stdin && close (fd) != 0)
{