From 1025243b6a0c8b8830b2d3676a97dae83c74d284 Mon Sep 17 00:00:00 2001 From: Kristoffer Brånemyr Date: Wed, 18 Mar 2015 15:32:19 +0000 Subject: wc: speedup counting of short lines Using a test file generated with: yes | head -n100M > 2x100M.txt before> time wc -l 2x100M.txt real 0.842s user 0.810s sys 0.033s after> time wc -l 2x100M.txt real 0.142s user 0.111s sys 0.031s * src/wc.c (wc): Split the loop that deals with -l into 3. The first is used at the start of the input to determine if the average line length is < 15, and if so the second loop is used to look for '\n' internally to wc. For longer lines, memchr is used as before to take advantage of system specific optimizations which any outweigh function call overhead. Note the first 2 loops could be combined, though in testing, GCC 4.9.2 at least, wasn't sophisticated enough to separate the loops based on the "check_len" invariant. Note also __builtin_memchr() isn't significant here as GCC currently only applies constant folding with that. * NEWS: Mention the improvement. --- src/wc.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'src') diff --git a/src/wc.c b/src/wc.c index 8cb5163dc..ceb48ed89 100644 --- a/src/wc.c +++ b/src/wc.c @@ -264,6 +264,8 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) { /* Use a separate loop when counting only lines or lines and bytes -- but not chars or words. */ + bool long_lines = false; + bool check_len = true; while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) { char *p = buf; @@ -275,12 +277,41 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) break; } + char *end = p + bytes_read; + + /* Avoid function call overhead for shorter lines. */ + if (check_len) + while (p != end) + { + lines += *p++ == '\n'; + /* If there are more than 150 chars in the first 10 lines, + then use memchr, where system specific optimizations + may outweigh function call overhead. + FIXME: This line length was determined in 2015, on both + x86_64 and ppc64, but it's worth re-evaluating in future with + newer compilers, CPUs, or memchr() implementations etc. */ + if (lines <= 10) + { + if (p - buf > 150) + { + long_lines = true; + break; + } + } + } + else if (! long_lines) + while (p != end) + lines += *p++ == '\n'; + + /* memchr is more efficient with longer lines. */ while ((p = memchr (p, '\n', (buf + bytes_read) - p))) { ++p; ++lines; } + bytes += bytes_read; + check_len = false; } } #if MB_LEN_MAX > 1 -- cgit v1.2.3-70-g09d2