/* Copyright (C) 1991, 1993, 1995, 1997, 1998, 2003 Free Software Foundation, Inc. Contributed by Torbjorn Granlund (tege@sics.se). NOTE: The canonical source of this file is maintained with the GNU C Library. Bugs can be reported to bug-glibc@prep.ai.mit.edu. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H # include <config.h> #endif #include <string.h> #undef memcmp #ifdef _LIBC # include <memcopy.h> # include <endian.h> # if __BYTE_ORDER == __BIG_ENDIAN # define WORDS_BIGENDIAN # endif #else /* Not in the GNU C library. */ # include <sys/types.h> /* Type to use for aligned memory operations. This should normally be the biggest type supported by a single load and store. Must be an unsigned type. */ # define op_t unsigned long int # define OPSIZ (sizeof(op_t)) /* Threshold value for when to enter the unrolled loops. */ # define OP_T_THRES 16 /* Type to use for unaligned operations. */ typedef unsigned char byte; # ifndef WORDS_BIGENDIAN # define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) # else # define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) # endif #endif /* In the GNU C library. */ #ifdef WORDS_BIGENDIAN # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) #else # define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b)) #endif /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */ /* The strategy of this memcmp is: 1. Compare bytes until one of the block pointers is aligned. 2. Compare using memcmp_common_alignment or memcmp_not_common_alignment, regarding the alignment of the other block after the initial byte operations. The maximum number of full words (of type op_t) are compared in this way. 3. Compare the few remaining bytes. */ #ifndef WORDS_BIGENDIAN /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine. A and B are known to be different. This is needed only on little-endian machines. */ # ifdef __GNUC__ __inline # endif static int memcmp_bytes (long unsigned int a, long unsigned int b) { long int srcp1 = (long int) &a; long int srcp2 = (long int) &b; op_t a0, b0; do { a0 = ((byte *) srcp1)[0]; b0 = ((byte *) srcp2)[0]; srcp1 += 1; srcp2 += 1; } while (a0 == b0); return a0 - b0; } #endif /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for memory operations on `op_t's. */ #ifdef __GNUC__ __inline #endif static int memcmp_common_alignment (long int srcp1, long int srcp2, size_t len) { op_t a0, a1; op_t b0, b1; switch (len % 4) { default: /* Avoid warning about uninitialized local variables. */ case 2: a0 = ((op_t *) srcp1)[0]; b0 = ((op_t *) srcp2)[0]; srcp1 -= 2 * OPSIZ; srcp2 -= 2 * OPSIZ; len += 2; goto do1; case 3: a1 = ((op_t *) srcp1)[0]; b1 = ((op_t *) srcp2)[0]; srcp1 -= OPSIZ; srcp2 -= OPSIZ; len += 1; goto do2; case 0: if (OP_T_THRES <= 3 * OPSIZ && len == 0) return 0; a0 = ((op_t *) srcp1)[0]; b0 = ((op_t *) srcp2)[0]; goto do3; case 1: a1 = ((op_t *) srcp1)[0]; b1 = ((op_t *) srcp2)[0]; srcp1 += OPSIZ; srcp2 += OPSIZ; len -= 1; if (OP_T_THRES <= 3 * OPSIZ && len == 0) goto do0; /* Fall through. */ } do { a0 = ((op_t *) srcp1)[0]; b0 = ((op_t *) srcp2)[0]; if (a1 != b1) return CMP_LT_OR_GT (a1, b1); do3: a1 = ((op_t *) srcp1)[1]; b1 = ((op_t *) srcp2)[1]; if (a0 != b0) return CMP_LT_OR_GT (a0, b0); do2: a0 = ((op_t *) srcp1)[2]; b0 = ((op_t *) srcp2)[2]; if (a1 != b1) return CMP_LT_OR_GT (a1, b1); do1: a1 = ((op_t *) srcp1)[3]; b1 = ((op_t *) srcp2)[3]; if (a0 != b0) return CMP_LT_OR_GT (a0, b0); srcp1 += 4 * OPSIZ; srcp2 += 4 * OPSIZ; len -= 4; } while (len != 0); /* This is the right position for do0. Please don't move it into the loop. */ do0: if (a1 != b1) return CMP_LT_OR_GT (a1, b1); return 0; } /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory operations on `op_t', but SRCP1 *should be unaligned*. */ #ifdef __GNUC__ __inline #endif static int memcmp_not_common_alignment (long int srcp1, long int srcp2, size_t len) { op_t a0, a1, a2, a3; op_t b0, b1, b2, b3; op_t x; int shl, shr; /* Calculate how to shift a word read at the memory operation aligned srcp1 to make it aligned for comparison. */ shl = 8 * (srcp1 % OPSIZ); shr = 8 * OPSIZ - shl; /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t' it points in the middle of. */ srcp1 &= -OPSIZ; switch (len % 4) { default: /* Avoid warning about uninitialized local variables. */ case 2: a1 = ((op_t *) srcp1)[0]; a2 = ((op_t *) srcp1)[1]; b2 = ((op_t *) srcp2)[0]; srcp1 -= 1 * OPSIZ; srcp2 -= 2 * OPSIZ; len += 2; goto do1; case 3: a0 = ((op_t *) srcp1)[0]; a1 = ((op_t *) srcp1)[1]; b1 = ((op_t *) srcp2)[0]; srcp2 -= 1 * OPSIZ; len += 1; goto do2; case 0: if (OP_T_THRES <= 3 * OPSIZ && len == 0) return 0; a3 = ((op_t *) srcp1)[0]; a0 = ((op_t *) srcp1)[1]; b0 = ((op_t *) srcp2)[0]; srcp1 += 1 * OPSIZ; goto do3; case 1: a2 = ((op_t *) srcp1)[0]; a3 = ((op_t *) srcp1)[1]; b3 = ((op_t *) srcp2)[0]; srcp1 += 2 * OPSIZ; srcp2 += 1 * OPSIZ; len -= 1; if (OP_T_THRES <= 3 * OPSIZ && len == 0) goto do0; /* Fall through. */ } do { a0 = ((op_t *) srcp1)[0]; b0 = ((op_t *) srcp2)[0]; x = MERGE(a2, shl, a3, shr); if (x != b3) return CMP_LT_OR_GT (x, b3); do3: a1 = ((op_t *) srcp1)[1]; b1 = ((op_t *) srcp2)[1]; x = MERGE(a3, shl, a0, shr); if (x != b0) return CMP_LT_OR_GT (x, b0); do2: a2 = ((op_t *) srcp1)[2]; b2 = ((op_t *) srcp2)[2]; x = MERGE(a0, shl, a1, shr); if (x != b1) return CMP_LT_OR_GT (x, b1); do1: a3 = ((op_t *) srcp1)[3]; b3 = ((op_t *) srcp2)[3]; x = MERGE(a1, shl, a2, shr); if (x != b2) return CMP_LT_OR_GT (x, b2); srcp1 += 4 * OPSIZ; srcp2 += 4 * OPSIZ; len -= 4; } while (len != 0); /* This is the right position for do0. Please don't move it into the loop. */ do0: x = MERGE(a2, shl, a3, shr); if (x != b3) return CMP_LT_OR_GT (x, b3); return 0; } int rpl_memcmp (const void *s1, const void *s2, size_t len) { op_t a0; op_t b0; long int srcp1 = (long int) s1; long int srcp2 = (long int) s2; op_t res; if (len >= OP_T_THRES) { /* There are at least some bytes to compare. No need to test for LEN == 0 in this alignment loop. */ while (srcp2 % OPSIZ != 0) { a0 = ((byte *) srcp1)[0]; b0 = ((byte *) srcp2)[0]; srcp1 += 1; srcp2 += 1; res = a0 - b0; if (res != 0) return res; len -= 1; } /* SRCP2 is now aligned for memory operations on `op_t'. SRCP1 alignment determines if we can do a simple, aligned compare or need to shuffle bits. */ if (srcp1 % OPSIZ == 0) res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ); else res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ); if (res != 0) return res; /* Number of bytes remaining in the interval [0..OPSIZ-1]. */ srcp1 += len & -OPSIZ; srcp2 += len & -OPSIZ; len %= OPSIZ; } /* There are just a few bytes to compare. Use byte memory operations. */ while (len != 0) { a0 = ((byte *) srcp1)[0]; b0 = ((byte *) srcp2)[0]; srcp1 += 1; srcp2 += 1; res = a0 - b0; if (res != 0) return res; len -= 1; } return 0; } #ifdef weak_alias # undef bcmp weak_alias (memcmp, bcmp) #endif