summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2015-10-22 14:34:08 +0100
committerPádraig Brady <P@draigBrady.com>2015-10-27 17:25:12 +0000
commit9459d9d8112fe7816022665b5016c2014bb625f3 (patch)
tree80486d9f4331e149f6e7ef7381acbac1613ffb31 /src
parent6796698c9945d87236ffcc939137d0919ef04931 (diff)
downloadcoreutils-9459d9d8112fe7816022665b5016c2014bb625f3.tar.xz
copy,dd: simplify and optimize NUL bytes detection
* src/factor.c: Move LIKELY() definition to... * src/system.h: ...here. (is_nul): Reimplement with a version that doesn't require a sentinel after the buffer, and which calls down to (the system optimized) memcmp. Performance analyzed at http://rusty.ozlabs.org/?p=560 * src/dd.c (alloc_obuf): Simplify the is_nul() call by not needing to write the sentinel. * src/copy.c (sparse_copy): Likewise. (copy_reg): Simplify the buffer allocation by avoiding consideration of the sentinel in the buffer size calculation.
Diffstat (limited to 'src')
-rw-r--r--src/copy.c22
-rw-r--r--src/dd.c6
-rw-r--r--src/factor.c8
-rw-r--r--src/system.h72
4 files changed, 58 insertions, 50 deletions
diff --git a/src/copy.c b/src/copy.c
index 5fe69ea3c..edf022ef1 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -245,17 +245,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
csize = MIN (csize, n_read);
if (hole_size && csize)
- {
- /* Setup sentinel required by is_nul(). */
- typedef uintptr_t word;
- word isnul_tmp;
- memcpy (&isnul_tmp, cbuf + csize, sizeof (word));
- memset (cbuf + csize, 1, sizeof (word));
-
- make_hole = is_nul (cbuf, csize);
-
- memcpy (cbuf + csize, &isnul_tmp, sizeof (word));
- }
+ make_hole = is_nul (cbuf, csize);
bool transition = (make_hole != prev_hole) && psize;
bool last_chunk = (n_read == csize && ! make_hole) || ! csize;
@@ -1201,11 +1191,8 @@ copy_reg (char const *src_name, char const *dst_name,
if (data_copy_required)
{
- typedef uintptr_t word;
-
/* Choose a suitable buffer size; it may be adjusted later. */
- size_t buf_alignment = lcm (getpagesize (), sizeof (word));
- size_t buf_alignment_slop = sizeof (word) + buf_alignment - 1;
+ size_t buf_alignment = getpagesize ();
size_t buf_size = io_blksize (sb);
size_t hole_size = ST_BLKSIZE (sb);
@@ -1236,7 +1223,7 @@ copy_reg (char const *src_name, char const *dst_name,
{
/* Compute the least common multiple of the input and output
buffer sizes, adjusting for outlandish values. */
- size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment_slop;
+ size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment;
size_t blcm = buffer_lcm (io_blksize (src_open_sb), buf_size,
blcm_max);
@@ -1254,8 +1241,7 @@ copy_reg (char const *src_name, char const *dst_name,
buf_size = blcm;
}
- /* Make a buffer with space for a sentinel at the end. */
- buf_alloc = xmalloc (buf_size + buf_alignment_slop);
+ buf_alloc = xmalloc (buf_size + buf_alignment);
buf = ptr_align (buf_alloc, buf_alignment);
if (sparse_src)
diff --git a/src/dd.c b/src/dd.c
index 06beabf78..a1485f609 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -20,7 +20,6 @@
#define SWAB_ALIGN_OFFSET 2
-#include <assert.h>
#include <sys/types.h>
#include <signal.h>
#include <getopt.h>
@@ -728,11 +727,6 @@ alloc_obuf (void)
alloc_ibuf ();
obuf = ibuf;
}
-
- /* Write a sentinel to the slop after the buffer,
- to allow efficient checking for NUL blocks. */
- assert (sizeof (uintptr_t) <= OUTPUT_BLOCK_SLOP);
- memset (obuf + output_blocksize, 1, sizeof (uintptr_t));
}
static void
diff --git a/src/factor.c b/src/factor.c
index e4ce4a5ea..ee578bfe5 100644
--- a/src/factor.c
+++ b/src/factor.c
@@ -705,14 +705,6 @@ static bool flag_prove_primality = PROVE_PRIMALITY;
/* Number of Miller-Rabin tests to run when not proving primality. */
#define MR_REPS 25
-#ifdef __GNUC__
-# define LIKELY(cond) __builtin_expect ((cond), 1)
-# define UNLIKELY(cond) __builtin_expect ((cond), 0)
-#else
-# define LIKELY(cond) (cond)
-# define UNLIKELY(cond) (cond)
-#endif
-
static void
factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
unsigned int off)
diff --git a/src/system.h b/src/system.h
index 8f6a2ea84..1cd6bdb44 100644
--- a/src/system.h
+++ b/src/system.h
@@ -427,6 +427,15 @@ enum
# define ATTRIBUTE_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
#endif
+#ifdef __GNUC__
+# define LIKELY(cond) __builtin_expect ((cond), 1)
+# define UNLIKELY(cond) __builtin_expect ((cond), 0)
+#else
+# define LIKELY(cond) (cond)
+# define UNLIKELY(cond) (cond)
+#endif
+
+
#if defined strdupa
# define ASSIGN_STRDUPA(DEST, S) \
do { DEST = strdupa (S); } while (0)
@@ -487,27 +496,54 @@ ptr_align (void const *ptr, size_t alignment)
}
/* Return whether the buffer consists entirely of NULs.
- Note the word after the buffer must be non NUL. */
+ Based on memeqzero in CCAN by Rusty Russell under CC0 (Public domain). */
static inline bool _GL_ATTRIBUTE_PURE
-is_nul (void const *buf, size_t bufsize)
+is_nul (void const *buf, size_t length)
{
- typedef uintptr_t word;
- void const *vp;
- char const *cbuf = buf;
- word const *wp = buf;
-
- /* Find first nonzero *word*, or the word with the sentinel. */
- while (*wp++ == 0)
- continue;
-
- /* Find the first nonzero *byte*, or the sentinel. */
- vp = wp - 1;
- char const *cp = vp;
- while (*cp++ == 0)
- continue;
-
- return cbuf + bufsize < cp;
+ const unsigned char *p = buf;
+/* Using possibly unaligned access for the first 16 bytes
+ saves about 30-40 cycles, though it is strictly undefined behavior
+ and so would need __attribute__ ((__no_sanitize_undefined__))
+ to avoid -fsanitize=undefined warnings.
+ Considering coreutils is mainly concerned with relatively
+ large buffers, we'll just use the defined behavior. */
+#if 0 && _STRING_ARCH_unaligned
+ unsigned long word;
+#else
+ unsigned char word;
+#endif
+
+ if (! length)
+ return true;
+
+ /* Check len bytes not aligned on a word. */
+ while (UNLIKELY (length & (sizeof word - 1)))
+ {
+ if (*p)
+ return false;
+ p++;
+ length--;
+ if (! length)
+ return true;
+ }
+
+ /* Check up to 16 bytes a word at a time. */
+ for (;;)
+ {
+ memcpy (&word, p, sizeof word);
+ if (word)
+ return false;
+ p += sizeof word;
+ length -= sizeof word;
+ if (! length)
+ return true;
+ if (UNLIKELY (length & 15) == 0)
+ break;
+ }
+
+ /* Now we know first 16 bytes are NUL, memcmp with self. */
+ return memcmp (buf, p, length) == 0;
}
/* If 10*Accum + Digit_val is larger than the maximum value for Type,