summaryrefslogtreecommitdiff
path: root/src/system.h
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2015-10-22 14:34:08 +0100
committerPádraig Brady <P@draigBrady.com>2015-10-27 17:25:12 +0000
commit9459d9d8112fe7816022665b5016c2014bb625f3 (patch)
tree80486d9f4331e149f6e7ef7381acbac1613ffb31 /src/system.h
parent6796698c9945d87236ffcc939137d0919ef04931 (diff)
downloadcoreutils-9459d9d8112fe7816022665b5016c2014bb625f3.tar.xz
copy,dd: simplify and optimize NUL bytes detection
* src/factor.c: Move LIKELY() definition to... * src/system.h: ...here. (is_nul): Reimplement with a version that doesn't require a sentinel after the buffer, and which calls down to (the system optimized) memcmp. Performance analyzed at http://rusty.ozlabs.org/?p=560 * src/dd.c (alloc_obuf): Simplify the is_nul() call by not needing to write the sentinel. * src/copy.c (sparse_copy): Likewise. (copy_reg): Simplify the buffer allocation by avoiding consideration of the sentinel in the buffer size calculation.
Diffstat (limited to 'src/system.h')
-rw-r--r--src/system.h72
1 files changed, 54 insertions, 18 deletions
diff --git a/src/system.h b/src/system.h
index 8f6a2ea84..1cd6bdb44 100644
--- a/src/system.h
+++ b/src/system.h
@@ -427,6 +427,15 @@ enum
# define ATTRIBUTE_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
#endif
+#ifdef __GNUC__
+# define LIKELY(cond) __builtin_expect ((cond), 1)
+# define UNLIKELY(cond) __builtin_expect ((cond), 0)
+#else
+# define LIKELY(cond) (cond)
+# define UNLIKELY(cond) (cond)
+#endif
+
+
#if defined strdupa
# define ASSIGN_STRDUPA(DEST, S) \
do { DEST = strdupa (S); } while (0)
@@ -487,27 +496,54 @@ ptr_align (void const *ptr, size_t alignment)
}
/* Return whether the buffer consists entirely of NULs.
- Note the word after the buffer must be non NUL. */
+ Based on memeqzero in CCAN by Rusty Russell under CC0 (Public domain). */
static inline bool _GL_ATTRIBUTE_PURE
-is_nul (void const *buf, size_t bufsize)
+is_nul (void const *buf, size_t length)
{
- typedef uintptr_t word;
- void const *vp;
- char const *cbuf = buf;
- word const *wp = buf;
-
- /* Find first nonzero *word*, or the word with the sentinel. */
- while (*wp++ == 0)
- continue;
-
- /* Find the first nonzero *byte*, or the sentinel. */
- vp = wp - 1;
- char const *cp = vp;
- while (*cp++ == 0)
- continue;
-
- return cbuf + bufsize < cp;
+ const unsigned char *p = buf;
+/* Using possibly unaligned access for the first 16 bytes
+ saves about 30-40 cycles, though it is strictly undefined behavior
+ and so would need __attribute__ ((__no_sanitize_undefined__))
+ to avoid -fsanitize=undefined warnings.
+ Considering coreutils is mainly concerned with relatively
+ large buffers, we'll just use the defined behavior. */
+#if 0 && _STRING_ARCH_unaligned
+ unsigned long word;
+#else
+ unsigned char word;
+#endif
+
+ if (! length)
+ return true;
+
+ /* Check len bytes not aligned on a word. */
+ while (UNLIKELY (length & (sizeof word - 1)))
+ {
+ if (*p)
+ return false;
+ p++;
+ length--;
+ if (! length)
+ return true;
+ }
+
+ /* Check up to 16 bytes a word at a time. */
+ for (;;)
+ {
+ memcpy (&word, p, sizeof word);
+ if (word)
+ return false;
+ p += sizeof word;
+ length -= sizeof word;
+ if (! length)
+ return true;
+ if (UNLIKELY (length & 15) == 0)
+ break;
+ }
+
+ /* Now we know first 16 bytes are NUL, memcmp with self. */
+ return memcmp (buf, p, length) == 0;
}
/* If 10*Accum + Digit_val is larger than the maximum value for Type,