diff options
author | Pádraig Brady <P@draigBrady.com> | 2015-03-23 11:54:19 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2015-03-24 10:23:29 +0000 |
commit | 56ff7a67601e79d9f7bf2fb946204a5482aa9302 (patch) | |
tree | 1a3e003b85b50592d134de49b7cbcf88391e1926 /src | |
parent | 01fb984887ce6fdd71d563e57c6b59155a1f4cb7 (diff) | |
download | coreutils-56ff7a67601e79d9f7bf2fb946204a5482aa9302.tar.xz |
wc: use a more adaptive wc -l implementation
* src/wc.c (wc): Allow any block to select the count implementation,
rather than just using the first 10 lines. This also simplifies
the code from 3 loops to 2.
Diffstat (limited to 'src')
-rw-r--r-- | src/wc.c | 58 |
1 files changed, 26 insertions, 32 deletions
@@ -265,11 +265,8 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) /* Use a separate loop when counting only lines or lines and bytes -- but not chars or words. */ bool long_lines = false; - bool check_len = true; while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) { - char *p = buf; - if (bytes_read == SAFE_READ_ERROR) { error (0, errno, "%s", file); @@ -277,41 +274,38 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) break; } + bytes += bytes_read; + + char *p = buf; char *end = p + bytes_read; + uintmax_t plines = lines; - /* Avoid function call overhead for shorter lines. */ - if (check_len) - while (p != end) - { + if (! long_lines) + { + /* Avoid function call overhead for shorter lines. */ + while (p != end) lines += *p++ == '\n'; - /* If there are more than 150 chars in the first 10 lines, - then use memchr, where system specific optimizations - may outweigh function call overhead. - FIXME: This line length was determined in 2015, on both - x86_64 and ppc64, but it's worth re-evaluating in future with - newer compilers, CPUs, or memchr() implementations etc. */ - if (lines <= 10) - { - if (p - buf > 150) - { - long_lines = true; - break; - } - } - } - else if (! long_lines) - while (p != end) - lines += *p++ == '\n'; - - /* memchr is more efficient with longer lines. */ - while ((p = memchr (p, '\n', (buf + bytes_read) - p))) + } + else { - ++p; - ++lines; + /* memchr is more efficient with longer lines. */ + while ((p = memchr (p, '\n', end - p))) + { + ++p; + ++lines; + } } - bytes += bytes_read; - check_len = false; + /* If the average line length in the block is >= 15, then use + memchr for the next block, where system specific optimizations + may outweigh function call overhead. + FIXME: This line length was determined in 2015, on both + x86_64 and ppc64, but it's worth re-evaluating in future with + newer compilers, CPUs, or memchr() implementations etc. */ + if (lines - plines <= bytes_read / 15) + long_lines = true; + else + long_lines = false; } } #if MB_LEN_MAX > 1 |