diff options
author | Pádraig Brady <P@draigBrady.com> | 2015-10-22 14:34:08 +0100 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2015-10-27 17:25:12 +0000 |
commit | 9459d9d8112fe7816022665b5016c2014bb625f3 (patch) | |
tree | 80486d9f4331e149f6e7ef7381acbac1613ffb31 /src/system.h | |
parent | 6796698c9945d87236ffcc939137d0919ef04931 (diff) | |
download | coreutils-9459d9d8112fe7816022665b5016c2014bb625f3.tar.xz |
copy,dd: simplify and optimize NUL bytes detection
* src/factor.c: Move LIKELY() definition to...
* src/system.h: ...here.
(is_nul): Reimplement with a version that doesn't
require a sentinel after the buffer, and which calls
down to (the system optimized) memcmp.
Performance analyzed at http://rusty.ozlabs.org/?p=560
* src/dd.c (alloc_obuf): Simplify the is_nul() call by
not needing to write the sentinel.
* src/copy.c (sparse_copy): Likewise.
(copy_reg): Simplify the buffer allocation by avoiding
consideration of the sentinel in the buffer size calculation.
Diffstat (limited to 'src/system.h')
-rw-r--r-- | src/system.h | 72 |
1 files changed, 54 insertions, 18 deletions
diff --git a/src/system.h b/src/system.h index 8f6a2ea84..1cd6bdb44 100644 --- a/src/system.h +++ b/src/system.h @@ -427,6 +427,15 @@ enum # define ATTRIBUTE_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__)) #endif +#ifdef __GNUC__ +# define LIKELY(cond) __builtin_expect ((cond), 1) +# define UNLIKELY(cond) __builtin_expect ((cond), 0) +#else +# define LIKELY(cond) (cond) +# define UNLIKELY(cond) (cond) +#endif + + #if defined strdupa # define ASSIGN_STRDUPA(DEST, S) \ do { DEST = strdupa (S); } while (0) @@ -487,27 +496,54 @@ ptr_align (void const *ptr, size_t alignment) } /* Return whether the buffer consists entirely of NULs. - Note the word after the buffer must be non NUL. */ + Based on memeqzero in CCAN by Rusty Russell under CC0 (Public domain). */ static inline bool _GL_ATTRIBUTE_PURE -is_nul (void const *buf, size_t bufsize) +is_nul (void const *buf, size_t length) { - typedef uintptr_t word; - void const *vp; - char const *cbuf = buf; - word const *wp = buf; - - /* Find first nonzero *word*, or the word with the sentinel. */ - while (*wp++ == 0) - continue; - - /* Find the first nonzero *byte*, or the sentinel. */ - vp = wp - 1; - char const *cp = vp; - while (*cp++ == 0) - continue; - - return cbuf + bufsize < cp; + const unsigned char *p = buf; +/* Using possibly unaligned access for the first 16 bytes + saves about 30-40 cycles, though it is strictly undefined behavior + and so would need __attribute__ ((__no_sanitize_undefined__)) + to avoid -fsanitize=undefined warnings. + Considering coreutils is mainly concerned with relatively + large buffers, we'll just use the defined behavior. */ +#if 0 && _STRING_ARCH_unaligned + unsigned long word; +#else + unsigned char word; +#endif + + if (! length) + return true; + + /* Check len bytes not aligned on a word. */ + while (UNLIKELY (length & (sizeof word - 1))) + { + if (*p) + return false; + p++; + length--; + if (! length) + return true; + } + + /* Check up to 16 bytes a word at a time. */ + for (;;) + { + memcpy (&word, p, sizeof word); + if (word) + return false; + p += sizeof word; + length -= sizeof word; + if (! length) + return true; + if (UNLIKELY (length & 15) == 0) + break; + } + + /* Now we know first 16 bytes are NUL, memcmp with self. */ + return memcmp (buf, p, length) == 0; } /* If 10*Accum + Digit_val is larger than the maximum value for Type, |