From 56ff7a67601e79d9f7bf2fb946204a5482aa9302 Mon Sep 17 00:00:00 2001 From: Pádraig Brady Date: Mon, 23 Mar 2015 11:54:19 +0000 Subject: wc: use a more adaptive wc -l implementation * src/wc.c (wc): Allow any block to select the count implementation, rather than just using the first 10 lines. This also simplifies the code from 3 loops to 2. --- src/wc.c | 58 ++++++++++++++++++++++++++-------------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/src/wc.c b/src/wc.c index ceb48ed89..91f4a3145 100644 --- a/src/wc.c +++ b/src/wc.c @@ -265,11 +265,8 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) /* Use a separate loop when counting only lines or lines and bytes -- but not chars or words. */ bool long_lines = false; - bool check_len = true; while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) { - char *p = buf; - if (bytes_read == SAFE_READ_ERROR) { error (0, errno, "%s", file); @@ -277,41 +274,38 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) break; } + bytes += bytes_read; + + char *p = buf; char *end = p + bytes_read; + uintmax_t plines = lines; - /* Avoid function call overhead for shorter lines. */ - if (check_len) - while (p != end) - { + if (! long_lines) + { + /* Avoid function call overhead for shorter lines. */ + while (p != end) lines += *p++ == '\n'; - /* If there are more than 150 chars in the first 10 lines, - then use memchr, where system specific optimizations - may outweigh function call overhead. - FIXME: This line length was determined in 2015, on both - x86_64 and ppc64, but it's worth re-evaluating in future with - newer compilers, CPUs, or memchr() implementations etc. */ - if (lines <= 10) - { - if (p - buf > 150) - { - long_lines = true; - break; - } - } - } - else if (! long_lines) - while (p != end) - lines += *p++ == '\n'; - - /* memchr is more efficient with longer lines. */ - while ((p = memchr (p, '\n', (buf + bytes_read) - p))) + } + else { - ++p; - ++lines; + /* memchr is more efficient with longer lines. */ + while ((p = memchr (p, '\n', end - p))) + { + ++p; + ++lines; + } } - bytes += bytes_read; - check_len = false; + /* If the average line length in the block is >= 15, then use + memchr for the next block, where system specific optimizations + may outweigh function call overhead. + FIXME: This line length was determined in 2015, on both + x86_64 and ppc64, but it's worth re-evaluating in future with + newer compilers, CPUs, or memchr() implementations etc. */ + if (lines - plines <= bytes_read / 15) + long_lines = true; + else + long_lines = false; } } #if MB_LEN_MAX > 1 -- cgit v1.2.3-70-g09d2