From 2662702b9e8643f62c670bbf2fa94b1be1ccf9af Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 7 Oct 2014 16:46:08 -0700 Subject: wc: don't miscount /sys and similar file systems Fix similar problems in head, od, split, tac, and tail. Reported by George Shuklin in: http://bugs.gnu.org/18621 * NEWS: Document this. * src/head.c (elseek): Move up. (elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg CURRENT_POS. All uses changed. (elide_tail_bytes_file, elide_tail_lines_file): New arg ST and remove arg SIZE. All uses changed. * src/head.c (elide_tail_bytes_file): * src/od.c (skip): Avoid optimization for /sys files, where st_size is bogus and st_size == st_blksize. Don't report error at EOF when not optimizing. * src/head.c, src/od.c, src/tail.c: Include "stat-size.h". * src/split.c (input_file_size): New function. (bytes_split, lines_chunk_split, bytes_chunk_extract): New arg INITIAL_READ. All uses changed. Use it to double-check st_size. * src/tac.c (tac_seekable): New arg FILE_POS. All uses changed. (copy_to_temp): Return size of temp file. All uses changed. * src/tac.c (tac_seekable): * src/tail.c (tail_bytes): * src/wc.c (wc): Don't trust st_size; double-check by reading. * src/wc.c (wc): New arg CURRENT_POS. All uses changed. * tests/local.mk (all_tests): Add tests/misc/wc-proc.sh, tests/misc/od-j.sh, tests/tail-2/tail-c.sh. * tests/misc/head-c.sh: * tests/misc/tac-2-nonseekable.sh: * tests/split/b-chunk.sh: Add tests for problems with /proc and /sys files. * tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh: New files. --- src/wc.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'src/wc.c') diff --git a/src/wc.c b/src/wc.c index 1ff007dcf..24069f7e9 100644 --- a/src/wc.c +++ b/src/wc.c @@ -184,9 +184,10 @@ write_counts (uintmax_t lines, /* Count words. FILE_X is the name of the file (or NULL for standard input) that is open on descriptor FD. *FSTATUS is its status. + CURRENT_POS is the current file offset if known, negative if unknown. Return true if successful. */ static bool -wc (int fd, char const *file_x, struct fstatus *fstatus) +wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) { bool ok = true; char buf[BUFFER_SIZE + 1]; @@ -229,32 +230,34 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) if (count_bytes && !count_chars && !print_lines && !count_complicated) { - off_t current_pos, end_pos; - if (0 < fstatus->failed) fstatus->failed = fstat (fd, &fstatus->st); - if (! fstatus->failed && S_ISREG (fstatus->st.st_mode) - && (current_pos = lseek (fd, 0, SEEK_CUR)) != -1 - && (end_pos = lseek (fd, 0, SEEK_END)) != -1) + /* For sized files, seek to one buffer before EOF rather than to EOF. + This works better for files in proc-like file systems where + the size is only approximate. */ + if (! fstatus->failed && usable_st_size (&fstatus->st) + && 0 <= fstatus->st.st_size) { - /* Be careful here. The current position may actually be - beyond the end of the file. As in the example above. */ - bytes = end_pos < current_pos ? 0 : end_pos - current_pos; + size_t end_pos = fstatus->st.st_size; + off_t hi_pos = end_pos - end_pos % BUFFER_SIZE; + if (current_pos < 0) + current_pos = lseek (fd, 0, SEEK_CUR); + if (0 <= current_pos && current_pos < hi_pos + && 0 <= lseek (fd, hi_pos, SEEK_CUR)) + bytes = hi_pos - current_pos; } - else + + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) { - fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); - while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + if (bytes_read == SAFE_READ_ERROR) { - if (bytes_read == SAFE_READ_ERROR) - { - error (0, errno, "%s", file); - ok = false; - break; - } - bytes += bytes_read; + error (0, errno, "%s", file); + ok = false; + break; } + bytes += bytes_read; } } else if (!count_chars && !count_complicated) @@ -500,7 +503,7 @@ wc_file (char const *file, struct fstatus *fstatus) have_read_stdin = true; if (O_BINARY && ! isatty (STDIN_FILENO)) xfreopen (NULL, "rb", stdin); - return wc (STDIN_FILENO, file, fstatus); + return wc (STDIN_FILENO, file, fstatus, -1); } else { @@ -512,7 +515,7 @@ wc_file (char const *file, struct fstatus *fstatus) } else { - bool ok = wc (fd, file, fstatus); + bool ok = wc (fd, file, fstatus, 0); if (close (fd) != 0) { error (0, errno, "%s", file); -- cgit v1.2.3-54-g00ecf