diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-10-07 16:46:08 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-10-07 16:47:37 -0700 |
commit | 2662702b9e8643f62c670bbf2fa94b1be1ccf9af (patch) | |
tree | c82775c20abc304fa4f187218830dc3aa2f7e481 /src/wc.c | |
parent | b020002b4bfae55d5bbcf66bd7ce787a4e6da689 (diff) | |
download | coreutils-2662702b9e8643f62c670bbf2fa94b1be1ccf9af.tar.xz |
wc: don't miscount /sys and similar file systems
Fix similar problems in head, od, split, tac, and tail.
Reported by George Shuklin in: http://bugs.gnu.org/18621
* NEWS: Document this.
* src/head.c (elseek): Move up.
(elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg
CURRENT_POS. All uses changed.
(elide_tail_bytes_file, elide_tail_lines_file):
New arg ST and remove arg SIZE. All uses changed.
* src/head.c (elide_tail_bytes_file):
* src/od.c (skip): Avoid optimization for /sys files, where
st_size is bogus and st_size == st_blksize.
Don't report error at EOF when not optimizing.
* src/head.c, src/od.c, src/tail.c: Include "stat-size.h".
* src/split.c (input_file_size): New function.
(bytes_split, lines_chunk_split, bytes_chunk_extract): New arg
INITIAL_READ. All uses changed. Use it to double-check st_size.
* src/tac.c (tac_seekable): New arg FILE_POS. All uses changed.
(copy_to_temp): Return size of temp file. All uses changed.
* src/tac.c (tac_seekable):
* src/tail.c (tail_bytes):
* src/wc.c (wc):
Don't trust st_size; double-check by reading.
* src/wc.c (wc): New arg CURRENT_POS. All uses changed.
* tests/local.mk (all_tests): Add tests/misc/wc-proc.sh,
tests/misc/od-j.sh, tests/tail-2/tail-c.sh.
* tests/misc/head-c.sh:
* tests/misc/tac-2-nonseekable.sh:
* tests/split/b-chunk.sh:
Add tests for problems with /proc and /sys files.
* tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh:
New files.
Diffstat (limited to 'src/wc.c')
-rw-r--r-- | src/wc.c | 45 |
1 files changed, 24 insertions, 21 deletions
@@ -184,9 +184,10 @@ write_counts (uintmax_t lines, /* Count words. FILE_X is the name of the file (or NULL for standard input) that is open on descriptor FD. *FSTATUS is its status. + CURRENT_POS is the current file offset if known, negative if unknown. Return true if successful. */ static bool -wc (int fd, char const *file_x, struct fstatus *fstatus) +wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) { bool ok = true; char buf[BUFFER_SIZE + 1]; @@ -229,32 +230,34 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) if (count_bytes && !count_chars && !print_lines && !count_complicated) { - off_t current_pos, end_pos; - if (0 < fstatus->failed) fstatus->failed = fstat (fd, &fstatus->st); - if (! fstatus->failed && S_ISREG (fstatus->st.st_mode) - && (current_pos = lseek (fd, 0, SEEK_CUR)) != -1 - && (end_pos = lseek (fd, 0, SEEK_END)) != -1) + /* For sized files, seek to one buffer before EOF rather than to EOF. + This works better for files in proc-like file systems where + the size is only approximate. */ + if (! fstatus->failed && usable_st_size (&fstatus->st) + && 0 <= fstatus->st.st_size) { - /* Be careful here. The current position may actually be - beyond the end of the file. As in the example above. */ - bytes = end_pos < current_pos ? 0 : end_pos - current_pos; + size_t end_pos = fstatus->st.st_size; + off_t hi_pos = end_pos - end_pos % BUFFER_SIZE; + if (current_pos < 0) + current_pos = lseek (fd, 0, SEEK_CUR); + if (0 <= current_pos && current_pos < hi_pos + && 0 <= lseek (fd, hi_pos, SEEK_CUR)) + bytes = hi_pos - current_pos; } - else + + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) { - fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); - while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + if (bytes_read == SAFE_READ_ERROR) { - if (bytes_read == SAFE_READ_ERROR) - { - error (0, errno, "%s", file); - ok = false; - break; - } - bytes += bytes_read; + error (0, errno, "%s", file); + ok = false; + break; } + bytes += bytes_read; } } else if (!count_chars && !count_complicated) @@ -500,7 +503,7 @@ wc_file (char const *file, struct fstatus *fstatus) have_read_stdin = true; if (O_BINARY && ! isatty (STDIN_FILENO)) xfreopen (NULL, "rb", stdin); - return wc (STDIN_FILENO, file, fstatus); + return wc (STDIN_FILENO, file, fstatus, -1); } else { @@ -512,7 +515,7 @@ wc_file (char const *file, struct fstatus *fstatus) } else { - bool ok = wc (fd, file, fstatus); + bool ok = wc (fd, file, fstatus, 0); if (close (fd) != 0) { error (0, errno, "%s", file); |