summaryrefslogtreecommitdiff
path: root/lib/regex.c
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>2001-02-17 07:14:31 +0000
committerJim Meyering <jim@meyering.net>2001-02-17 07:14:31 +0000
commit4bcbd9c6aa71cc754a2ca3fa3a2e855849a0b83f (patch)
treef7c49ad4bad22121f3213880eb43f30b1c79d1ea /lib/regex.c
parent98c5eb08be7cb4ce71389611197f39cce80c98e8 (diff)
downloadcoreutils-4bcbd9c6aa71cc754a2ca3fa3a2e855849a0b83f.tar.xz
Update from libc.
Diffstat (limited to 'lib/regex.c')
-rw-r--r--lib/regex.c96
1 files changed, 56 insertions, 40 deletions
diff --git a/lib/regex.c b/lib/regex.c
index b8c1818b1..79c3f0b91 100644
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -62,7 +62,7 @@
# define US_CHAR_TYPE wchar_t/* unsigned character type */
# define COMPILED_BUFFER_VAR wc_buffer
# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
-# define CHAR_CLASS_SIZE (sizeof(wctype_t)/sizeof(CHAR_TYPE)+1)
+# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_TYPE)+1)
# define PUT_CHAR(c) \
do { \
if (MC_CUR_MAX == 1) \
@@ -289,6 +289,8 @@ extern char *re_syntax_table;
static char re_syntax_table[CHAR_SET_SIZE];
+static void init_syntax_once PARAMS ((void));
+
static void
init_syntax_once ()
{
@@ -1173,7 +1175,7 @@ printchar (c)
static size_t convert_mbs_to_wcs (CHAR_TYPE *dest, const unsigned char* src,
size_t len, int *offset_buffer,
- int *is_binary);
+ char *is_binary);
static size_t
convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
CHAR_TYPE *dest;
@@ -1190,7 +1192,7 @@ convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
= {0, 3, 4, 6}
*/
int *offset_buffer;
- int *is_binary;
+ char *is_binary;
{
wchar_t *pdest = dest;
const unsigned char *psrc = src;
@@ -1905,7 +1907,8 @@ static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
#ifndef TRANSLATE
# ifdef MBS_SUPPORT
# define TRANSLATE(d) \
- (translate && (sizeof(d) <= 1)? (char) translate[(unsigned char) (d)] : (d))
+ ((translate && ((US_CHAR_TYPE) (d)) <= 0xff) \
+ ? (char) translate[(unsigned char) (d)] : (d))
#else
# define TRANSLATE(d) \
(translate ? (char) translate[(unsigned char) (d)] : (d))
@@ -2282,9 +2285,9 @@ regex_compile (pattern, size, syntax, bufp)
/* offset buffer for optimizatoin. See convert_mbs_to_wc. */
int *mbs_offset = NULL;
/* It hold whether each wchar_t is binary data or not. */
- int *is_binary = NULL;
+ char *is_binary = NULL;
/* A flag whether exactn is handling binary data or not. */
- int is_exactn_bin = FALSE;
+ char is_exactn_bin = FALSE;
#endif /* MBS_SUPPORT */
/* A random temporary spot in PATTERN. */
@@ -2344,7 +2347,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Initialize the wchar_t PATTERN and offset_buffer. */
p = pend = pattern = TALLOC(csize, CHAR_TYPE);
mbs_offset = TALLOC(csize + 1, int);
- is_binary = TALLOC(csize + 1, int);
+ is_binary = TALLOC(csize + 1, char);
if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
{
if (pattern) free(pattern);
@@ -2806,6 +2809,8 @@ regex_compile (pattern, size, syntax, bufp)
if (c == ':' && *p == ']')
{
wctype_t wt;
+ uintptr_t alignedp;
+
/* Query the character class as wctype_t. */
wt = IS_CHAR_CLASS (str);
if (wt == 0)
@@ -2823,9 +2828,14 @@ regex_compile (pattern, size, syntax, bufp)
b += CHAR_CLASS_SIZE;
/* Move data which follow character classes
not to violate the data. */
- insert_space(CHAR_CLASS_SIZE, laststart + 6, b - 1);
+ insert_space(CHAR_CLASS_SIZE,
+ laststart + 6 + laststart[1],
+ b - 1);
+ alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
+ + __alignof__(wctype_t) - 1)
+ & ~(uintptr_t)(__alignof__(wctype_t) - 1);
/* Store the character class. */
- *((wctype_t*)(laststart + 6)) = wt;
+ *((wctype_t*)alignedp) = wt;
/* Update length of char_classes */
laststart[1] += CHAR_CLASS_SIZE;
@@ -5050,9 +5060,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
}
else /* Searching backwards. */
{
- register char c = (size1 == 0 || startpos >= size1
- ? string2[startpos - size1]
- : string1[startpos]);
+ register CHAR_TYPE c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
if (!fastmap[(unsigned char) TRANSLATE (c)])
goto advance;
@@ -5186,8 +5196,6 @@ weak_alias (__re_search_2, re_search_2)
FREE_VAR (string2); \
FREE_VAR (mbs_offset1); \
FREE_VAR (mbs_offset2); \
- FREE_VAR (is_binary1); \
- FREE_VAR (is_binary2); \
} while (0)
# else /* not MBS_SUPPORT */
# define FREE_VARIABLES() \
@@ -5205,17 +5213,16 @@ weak_alias (__re_search_2, re_search_2)
} while (0)
# endif /* MBS_SUPPORT */
#else
+# define FREE_VAR(var) if (var) free (var); var = NULL
# ifdef MBS_SUPPORT
# define FREE_VARIABLES() \
do { \
- if (string1) free (string1); \
- if (string2) free (string2); \
- if (mbs_offset1) free (mbs_offset1); \
- if (mbs_offset2) free (mbs_offset2); \
- if (is_binary1) free (is_binary1); \
- if (is_binary2) free (is_binary2); \
+ FREE_VAR (string1); \
+ FREE_VAR (string2); \
+ FREE_VAR (mbs_offset1); \
+ FREE_VAR (mbs_offset2); \
} while (0)
-# eles
+# else
# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
# endif /* MBS_SUPPORT */
#endif /* not MATCH_MAY_ALLOCATE */
@@ -5304,10 +5311,14 @@ weak_alias (__re_match_2, re_match_2)
#endif
#ifdef MBS_SUPPORT
+
+static int count_mbs_length PARAMS ((int *, int));
+
/* This check the substring (from 0, to length) of the multibyte string,
to which offset_buffer correspond. And count how many wchar_t_characters
the substring occupy. We use offset_buffer to optimization.
See convert_mbs_to_wcs. */
+
static int
count_mbs_length(offset_buffer, length)
int *offset_buffer;
@@ -5365,7 +5376,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* offset buffer for optimizatoin. See convert_mbs_to_wc. */
int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
/* They hold whether each wchar_t is binary data or not. */
- int *is_binary1 = NULL, *is_binary2 = NULL;
+ char *is_binary = NULL;
#endif /* MBS_SUPPORT */
/* Just past the end of the corresponding string. */
@@ -5544,38 +5555,39 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
fill them with converted string. */
if (csize1 != 0)
{
- string1 = TALLOC (csize1 + 1, CHAR_TYPE);
- mbs_offset1 = TALLOC (csize1 + 1, int);
- is_binary1 = TALLOC (csize1 + 1, int);
- if (!string1 || !mbs_offset1 || !is_binary1)
+ string1 = REGEX_TALLOC (csize1 + 1, CHAR_TYPE);
+ mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
+ is_binary = REGEX_TALLOC (csize1 + 1, char);
+ if (!string1 || !mbs_offset1 || !is_binary)
{
- if (string1) free(string1);
- if (mbs_offset1) free(mbs_offset1);
- if (is_binary1) free(is_binary1);
+ FREE_VAR (string1);
+ FREE_VAR (mbs_offset1);
+ FREE_VAR (is_binary);
return -2;
}
size1 = convert_mbs_to_wcs(string1, cstring1, csize1,
- mbs_offset1, is_binary1);
+ mbs_offset1, is_binary);
string1[size1] = L'\0'; /* for a sentinel */
+ FREE_VAR (is_binary);
}
if (csize2 != 0)
{
string2 = REGEX_TALLOC (csize2 + 1, CHAR_TYPE);
mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
- is_binary2 = TALLOC (csize2 + 1, int);
- if (!string2 || !mbs_offset2 || !is_binary2)
+ is_binary = REGEX_TALLOC (csize2 + 1, char);
+ if (!string2 || !mbs_offset2 || !is_binary)
{
- if (string1) free(string1);
- if (mbs_offset1) free(mbs_offset1);
- if (is_binary1) free(is_binary1);
- if (string2) free(string2);
- if (mbs_offset2) free(mbs_offset2);
- if (is_binary2) free(is_binary2);
+ FREE_VAR (string1);
+ FREE_VAR (mbs_offset1);
+ FREE_VAR (string2);
+ FREE_VAR (mbs_offset2);
+ FREE_VAR (is_binary);
return -2;
}
size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
- mbs_offset2, is_binary2);
+ mbs_offset2, is_binary);
string2[size2] = L'\0'; /* for a sentinel */
+ FREE_VAR (is_binary);
}
/* We need to cast pattern to (wchar_t*), because we casted this compiled
@@ -6000,7 +6012,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* match with char_class? */
for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
{
- wctype_t wctype = *((wctype_t*)workp);
+ wctype_t wctype;
+ uintptr_t alignedp = ((uintptr_t)workp
+ + __alignof__(wctype_t) - 1)
+ & ~(uintptr_t)(__alignof__(wctype_t) - 1);
+ wctype = *((wctype_t*)alignedp);
workp += CHAR_CLASS_SIZE;
if (iswctype((wint_t)c, wctype))
goto char_set_matched;