summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2016-12-22 14:31:44 +0000
committerPádraig Brady <P@draigBrady.com>2016-12-26 12:57:16 +0000
commite17e5f40b81447a2af65b0e64a3295d5e2e86753 (patch)
tree762ad410249552a67b467941acbcde0f8d3019a2
parent94d2c6848b17b4e14235e80e6fa6af37aa76217b (diff)
downloadcoreutils-e17e5f40b81447a2af65b0e64a3295d5e2e86753.tar.xz
wc: with only --bytes, determine size more efficiently
* src/wc.c (wc): Avoid reading the end of the file when the size is not a multiple of PAGE_SIZE, as the special case handling for files in /proc and /sys is only required when st_size is 0 or a multiple of PAGE_SIZE. * tests/misc/wc-proc.sh: Add a test case.
-rw-r--r--src/wc.c48
-rwxr-xr-xtests/misc/wc-proc.sh13
2 files changed, 50 insertions, 11 deletions
diff --git a/src/wc.c b/src/wc.c
index 64df50cd9..a02379bf8 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -71,6 +71,9 @@ static int number_width;
/* True if we have ever read the standard input. */
static bool have_read_stdin;
+/* Used to determine if file size can be determined without reading. */
+static size_t page_size;
+
/* The result of calling fstat or stat on a file descriptor or file. */
struct fstatus
{
@@ -235,6 +238,8 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
if (count_bytes && !count_chars && !print_lines && !count_complicated)
{
+ bool skip_read = false;
+
if (0 < fstatus->failed)
fstatus->failed = fstat (fd, &fstatus->st);
@@ -245,24 +250,44 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
&& 0 <= fstatus->st.st_size)
{
size_t end_pos = fstatus->st.st_size;
- off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
if (current_pos < 0)
current_pos = lseek (fd, 0, SEEK_CUR);
- if (0 <= current_pos && current_pos < hi_pos
- && 0 <= lseek (fd, hi_pos, SEEK_CUR))
- bytes = hi_pos - current_pos;
+
+ if (end_pos % page_size)
+ {
+ /* We only need special handling of /proc and /sys files etc.
+ when they're a multiple of PAGE_SIZE. In the common case
+ for files with st_size not a multiple of PAGE_SIZE,
+ it's more efficient and accurate to use st_size.
+
+ Be careful here. The current position may actually be
+ beyond the end of the file. As in the example above. */
+
+ bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
+ skip_read = true;
+ }
+ else
+ {
+ off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
+ if (0 <= current_pos && current_pos < hi_pos
+ && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+ bytes = hi_pos - current_pos;
+ }
}
- fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
- while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+ if (! skip_read)
{
- if (bytes_read == SAFE_READ_ERROR)
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+ while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
- error (0, errno, "%s", quotef (file));
- ok = false;
- break;
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (file));
+ ok = false;
+ break;
+ }
+ bytes += bytes_read;
}
- bytes += bytes_read;
}
}
else if (!count_chars && !count_complicated)
@@ -639,6 +664,7 @@ main (int argc, char **argv)
atexit (close_stdout);
+ page_size = getpagesize ();
/* Line buffer stdout to ensure lines are written atomically and immediately
so that processes running in parallel do not intersperse their output. */
setvbuf (stdout, NULL, _IOLBF, 0);
diff --git a/tests/misc/wc-proc.sh b/tests/misc/wc-proc.sh
index d6a36ba18..c50d7832b 100755
--- a/tests/misc/wc-proc.sh
+++ b/tests/misc/wc-proc.sh
@@ -19,6 +19,7 @@
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
print_ver_ wc
+# Ensure we read() /proc files to determine content length
for file in /proc/version /sys/kernel/profiling; do
if test -r $file; then
cp -f $file copy &&
@@ -29,4 +30,16 @@ for file in /proc/version /sys/kernel/profiling; do
fi
done
+# Ensure we handle cases where we don't read()
+truncate -s 2 no_read || framework_failure_
+# read() used when multiple of page size
+truncate -s 1048576 do_read || framework_failure_
+wc -c no_read do_read > out || fail=1
+cat <<\EOF > exp
+ 2 no_read
+1048576 do_read
+1048578 total
+EOF
+compare exp out || fail=1
+
Exit $fail