summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS2
-rw-r--r--src/wc.c31
2 files changed, 33 insertions, 0 deletions
diff --git a/NEWS b/NEWS
index 1a5123574..81031c6b0 100644
--- a/NEWS
+++ b/NEWS
@@ -94,6 +94,8 @@ GNU coreutils NEWS -*- outline -*-
stat and tail now know about IBRIX. stat -f --format=%T now reports the file
system type, and tail -f uses polling for files on IBRIX file systems.
+ wc -l processes short lines much more efficiently.
+
References from --help and the man pages of utilities have been corrected
in various cases, and more direct links to the corresponding online
documentation are provided.
diff --git a/src/wc.c b/src/wc.c
index 8cb5163dc..ceb48ed89 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -264,6 +264,8 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
{
/* Use a separate loop when counting only lines or lines and bytes --
but not chars or words. */
+ bool long_lines = false;
+ bool check_len = true;
while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
char *p = buf;
@@ -275,12 +277,41 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
break;
}
+ char *end = p + bytes_read;
+
+ /* Avoid function call overhead for shorter lines. */
+ if (check_len)
+ while (p != end)
+ {
+ lines += *p++ == '\n';
+ /* If there are more than 150 chars in the first 10 lines,
+ then use memchr, where system specific optimizations
+ may outweigh function call overhead.
+ FIXME: This line length was determined in 2015, on both
+ x86_64 and ppc64, but it's worth re-evaluating in future with
+ newer compilers, CPUs, or memchr() implementations etc. */
+ if (lines <= 10)
+ {
+ if (p - buf > 150)
+ {
+ long_lines = true;
+ break;
+ }
+ }
+ }
+ else if (! long_lines)
+ while (p != end)
+ lines += *p++ == '\n';
+
+ /* memchr is more efficient with longer lines. */
while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
{
++p;
++lines;
}
+
bytes += bytes_read;
+ check_len = false;
}
}
#if MB_LEN_MAX > 1