diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-10-07 16:46:08 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-10-07 16:47:37 -0700 |
commit | 2662702b9e8643f62c670bbf2fa94b1be1ccf9af (patch) | |
tree | c82775c20abc304fa4f187218830dc3aa2f7e481 /src/tac.c | |
parent | b020002b4bfae55d5bbcf66bd7ce787a4e6da689 (diff) | |
download | coreutils-2662702b9e8643f62c670bbf2fa94b1be1ccf9af.tar.xz |
wc: don't miscount /sys and similar file systems
Fix similar problems in head, od, split, tac, and tail.
Reported by George Shuklin in: http://bugs.gnu.org/18621
* NEWS: Document this.
* src/head.c (elseek): Move up.
(elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg
CURRENT_POS. All uses changed.
(elide_tail_bytes_file, elide_tail_lines_file):
New arg ST and remove arg SIZE. All uses changed.
* src/head.c (elide_tail_bytes_file):
* src/od.c (skip): Avoid optimization for /sys files, where
st_size is bogus and st_size == st_blksize.
Don't report error at EOF when not optimizing.
* src/head.c, src/od.c, src/tail.c: Include "stat-size.h".
* src/split.c (input_file_size): New function.
(bytes_split, lines_chunk_split, bytes_chunk_extract): New arg
INITIAL_READ. All uses changed. Use it to double-check st_size.
* src/tac.c (tac_seekable): New arg FILE_POS. All uses changed.
(copy_to_temp): Return size of temp file. All uses changed.
* src/tac.c (tac_seekable):
* src/tail.c (tail_bytes):
* src/wc.c (wc):
Don't trust st_size; double-check by reading.
* src/wc.c (wc): New arg CURRENT_POS. All uses changed.
* tests/local.mk (all_tests): Add tests/misc/wc-proc.sh,
tests/misc/od-j.sh, tests/tail-2/tail-c.sh.
* tests/misc/head-c.sh:
* tests/misc/tac-2-nonseekable.sh:
* tests/split/b-chunk.sh:
Add tests for problems with /proc and /sys files.
* tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh:
New files.
Diffstat (limited to 'src/tac.c')
-rw-r--r-- | src/tac.c | 70 |
1 files changed, 44 insertions, 26 deletions
@@ -187,10 +187,11 @@ output (const char *start, const char *past_end) } /* Print in reverse the file open on descriptor FD for reading FILE. + The file is already positioned at FILE_POS, which should be near its end. Return true if successful. */ static bool -tac_seekable (int input_fd, const char *file) +tac_seekable (int input_fd, const char *file, off_t file_pos) { /* Pointer to the location in 'G_buffer' where the search for the next separator will begin. */ @@ -203,9 +204,6 @@ tac_seekable (int input_fd, const char *file) /* Length of the record growing in 'G_buffer'. */ size_t saved_record_size; - /* Offset in the file of the next read. */ - off_t file_pos; - /* True if 'output' has not been called yet for any file. Only used when the separator is attached to the preceding record. */ bool first_time = true; @@ -213,27 +211,43 @@ tac_seekable (int input_fd, const char *file) char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */ size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */ - /* Find the size of the input file. */ - file_pos = lseek (input_fd, 0, SEEK_END); - if (file_pos < 1) - return true; /* It's an empty file. */ - /* Arrange for the first read to lop off enough to leave the rest of the file a multiple of 'read_size'. Since 'read_size' can change, this may not always hold during the program run, but since it usually will, leave it here for i/o efficiency (page/sector boundaries and all that). Note: the efficiency gain has not been verified. */ - saved_record_size = file_pos % read_size; - if (saved_record_size == 0) - saved_record_size = read_size; - file_pos -= saved_record_size; - /* 'file_pos' now points to the start of the last (probably partial) block - in the input file. */ + size_t remainder = file_pos % read_size; + if (remainder != 0) + { + file_pos -= remainder; + if (lseek (input_fd, file_pos, SEEK_SET) < 0) + error (0, errno, _("%s: seek failed"), quotearg_colon (file)); + } - if (lseek (input_fd, file_pos, SEEK_SET) < 0) - error (0, errno, _("%s: seek failed"), quotearg_colon (file)); + /* Scan backward, looking for end of file. This caters to proc-like + file systems where the file size is just an estimate. */ + while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0 + && file_pos != 0) + { + off_t rsize = read_size; + if (lseek (input_fd, -rsize, SEEK_CUR) < 0) + error (0, errno, _("%s: seek failed"), quotearg_colon (file)); + file_pos -= read_size; + } - if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size) + /* Now scan forward, looking for end of file. */ + while (saved_record_size == read_size) + { + size_t nread = safe_read (input_fd, G_buffer, read_size); + if (nread == 0) + break; + saved_record_size = nread; + if (saved_record_size == SAFE_READ_ERROR) + break; + file_pos += nread; + } + + if (saved_record_size == SAFE_READ_ERROR) { error (0, errno, _("%s: read error"), quotearg_colon (file)); return false; @@ -485,15 +499,16 @@ temp_stream (FILE **fp, char **file_name) /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream - and file name. Return true if successful. */ + and file name. Return the number of bytes copied, or -1 on error. */ -static bool +static off_t copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file) { FILE *fp; char *file_name; + off_t bytes_copied = 0; if (!temp_stream (&fp, &file_name)) - return false; + return -1; while (1) { @@ -511,6 +526,8 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file) error (0, errno, _("%s: write error"), quotearg_colon (file_name)); goto Fail; } + + bytes_copied += bytes_read; } if (fflush (fp) != 0) @@ -521,11 +538,11 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file) *g_tmp = fp; *g_tempfile = file_name; - return true; + return bytes_copied; Fail: fclose (fp); - return false; + return -1; } /* Copy INPUT_FD to a temporary, then tac that file. @@ -536,10 +553,11 @@ tac_nonseekable (int input_fd, const char *file) { FILE *tmp_stream; char *tmp_file; - if (!copy_to_temp (&tmp_stream, &tmp_file, input_fd, file)) + off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file); + if (bytes_copied < 0) return false; - bool ok = tac_seekable (fileno (tmp_stream), tmp_file); + bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied); return ok; } @@ -578,7 +596,7 @@ tac_file (const char *filename) ok = (file_size < 0 || isatty (fd) ? tac_nonseekable (fd, filename) - : tac_seekable (fd, filename)); + : tac_seekable (fd, filename, file_size)); if (!is_stdin && close (fd) != 0) { |