summaryrefslogtreecommitdiff
path: root/src/wc.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-10-07 16:46:08 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-10-07 16:47:37 -0700
commit2662702b9e8643f62c670bbf2fa94b1be1ccf9af (patch)
treec82775c20abc304fa4f187218830dc3aa2f7e481 /src/wc.c
parentb020002b4bfae55d5bbcf66bd7ce787a4e6da689 (diff)
downloadcoreutils-2662702b9e8643f62c670bbf2fa94b1be1ccf9af.tar.xz
wc: don't miscount /sys and similar file systems
Fix similar problems in head, od, split, tac, and tail. Reported by George Shuklin in: http://bugs.gnu.org/18621 * NEWS: Document this. * src/head.c (elseek): Move up. (elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg CURRENT_POS. All uses changed. (elide_tail_bytes_file, elide_tail_lines_file): New arg ST and remove arg SIZE. All uses changed. * src/head.c (elide_tail_bytes_file): * src/od.c (skip): Avoid optimization for /sys files, where st_size is bogus and st_size == st_blksize. Don't report error at EOF when not optimizing. * src/head.c, src/od.c, src/tail.c: Include "stat-size.h". * src/split.c (input_file_size): New function. (bytes_split, lines_chunk_split, bytes_chunk_extract): New arg INITIAL_READ. All uses changed. Use it to double-check st_size. * src/tac.c (tac_seekable): New arg FILE_POS. All uses changed. (copy_to_temp): Return size of temp file. All uses changed. * src/tac.c (tac_seekable): * src/tail.c (tail_bytes): * src/wc.c (wc): Don't trust st_size; double-check by reading. * src/wc.c (wc): New arg CURRENT_POS. All uses changed. * tests/local.mk (all_tests): Add tests/misc/wc-proc.sh, tests/misc/od-j.sh, tests/tail-2/tail-c.sh. * tests/misc/head-c.sh: * tests/misc/tac-2-nonseekable.sh: * tests/split/b-chunk.sh: Add tests for problems with /proc and /sys files. * tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh: New files.
Diffstat (limited to 'src/wc.c')
-rw-r--r--src/wc.c45
1 files changed, 24 insertions, 21 deletions
diff --git a/src/wc.c b/src/wc.c
index 1ff007dcf..24069f7e9 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -184,9 +184,10 @@ write_counts (uintmax_t lines,
/* Count words. FILE_X is the name of the file (or NULL for standard
input) that is open on descriptor FD. *FSTATUS is its status.
+ CURRENT_POS is the current file offset if known, negative if unknown.
Return true if successful. */
static bool
-wc (int fd, char const *file_x, struct fstatus *fstatus)
+wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
{
bool ok = true;
char buf[BUFFER_SIZE + 1];
@@ -229,32 +230,34 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
if (count_bytes && !count_chars && !print_lines && !count_complicated)
{
- off_t current_pos, end_pos;
-
if (0 < fstatus->failed)
fstatus->failed = fstat (fd, &fstatus->st);
- if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
- && (current_pos = lseek (fd, 0, SEEK_CUR)) != -1
- && (end_pos = lseek (fd, 0, SEEK_END)) != -1)
+ /* For sized files, seek to one buffer before EOF rather than to EOF.
+ This works better for files in proc-like file systems where
+ the size is only approximate. */
+ if (! fstatus->failed && usable_st_size (&fstatus->st)
+ && 0 <= fstatus->st.st_size)
{
- /* Be careful here. The current position may actually be
- beyond the end of the file. As in the example above. */
- bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
+ size_t end_pos = fstatus->st.st_size;
+ off_t hi_pos = end_pos - end_pos % BUFFER_SIZE;
+ if (current_pos < 0)
+ current_pos = lseek (fd, 0, SEEK_CUR);
+ if (0 <= current_pos && current_pos < hi_pos
+ && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+ bytes = hi_pos - current_pos;
}
- else
+
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+ while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
- fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
- while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+ if (bytes_read == SAFE_READ_ERROR)
{
- if (bytes_read == SAFE_READ_ERROR)
- {
- error (0, errno, "%s", file);
- ok = false;
- break;
- }
- bytes += bytes_read;
+ error (0, errno, "%s", file);
+ ok = false;
+ break;
}
+ bytes += bytes_read;
}
}
else if (!count_chars && !count_complicated)
@@ -500,7 +503,7 @@ wc_file (char const *file, struct fstatus *fstatus)
have_read_stdin = true;
if (O_BINARY && ! isatty (STDIN_FILENO))
xfreopen (NULL, "rb", stdin);
- return wc (STDIN_FILENO, file, fstatus);
+ return wc (STDIN_FILENO, file, fstatus, -1);
}
else
{
@@ -512,7 +515,7 @@ wc_file (char const *file, struct fstatus *fstatus)
}
else
{
- bool ok = wc (fd, file, fstatus);
+ bool ok = wc (fd, file, fstatus, 0);
if (close (fd) != 0)
{
error (0, errno, "%s", file);