summaryrefslogtreecommitdiff
path: root/src/wc.c
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2015-03-23 11:54:19 +0000
committerPádraig Brady <P@draigBrady.com>2015-03-24 10:23:29 +0000
commit56ff7a67601e79d9f7bf2fb946204a5482aa9302 (patch)
tree1a3e003b85b50592d134de49b7cbcf88391e1926 /src/wc.c
parent01fb984887ce6fdd71d563e57c6b59155a1f4cb7 (diff)
downloadcoreutils-56ff7a67601e79d9f7bf2fb946204a5482aa9302.tar.xz
wc: use a more adaptive wc -l implementation
* src/wc.c (wc): Allow any block to select the count implementation, rather than just using the first 10 lines. This also simplifies the code from 3 loops to 2.
Diffstat (limited to 'src/wc.c')
-rw-r--r--src/wc.c58
1 files changed, 26 insertions, 32 deletions
diff --git a/src/wc.c b/src/wc.c
index ceb48ed89..91f4a3145 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -265,11 +265,8 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
/* Use a separate loop when counting only lines or lines and bytes --
but not chars or words. */
bool long_lines = false;
- bool check_len = true;
while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
- char *p = buf;
-
if (bytes_read == SAFE_READ_ERROR)
{
error (0, errno, "%s", file);
@@ -277,41 +274,38 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
break;
}
+ bytes += bytes_read;
+
+ char *p = buf;
char *end = p + bytes_read;
+ uintmax_t plines = lines;
- /* Avoid function call overhead for shorter lines. */
- if (check_len)
- while (p != end)
- {
+ if (! long_lines)
+ {
+ /* Avoid function call overhead for shorter lines. */
+ while (p != end)
lines += *p++ == '\n';
- /* If there are more than 150 chars in the first 10 lines,
- then use memchr, where system specific optimizations
- may outweigh function call overhead.
- FIXME: This line length was determined in 2015, on both
- x86_64 and ppc64, but it's worth re-evaluating in future with
- newer compilers, CPUs, or memchr() implementations etc. */
- if (lines <= 10)
- {
- if (p - buf > 150)
- {
- long_lines = true;
- break;
- }
- }
- }
- else if (! long_lines)
- while (p != end)
- lines += *p++ == '\n';
-
- /* memchr is more efficient with longer lines. */
- while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
+ }
+ else
{
- ++p;
- ++lines;
+ /* memchr is more efficient with longer lines. */
+ while ((p = memchr (p, '\n', end - p)))
+ {
+ ++p;
+ ++lines;
+ }
}
- bytes += bytes_read;
- check_len = false;
+ /* If the average line length in the block is >= 15, then use
+ memchr for the next block, where system specific optimizations
+ may outweigh function call overhead.
+ FIXME: This line length was determined in 2015, on both
+ x86_64 and ppc64, but it's worth re-evaluating in future with
+ newer compilers, CPUs, or memchr() implementations etc. */
+ if (lines - plines <= bytes_read / 15)
+ long_lines = true;
+ else
+ long_lines = false;
}
}
#if MB_LEN_MAX > 1