summaryrefslogtreecommitdiff
path: root/lib/regex.c
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>2001-08-12 12:50:16 +0000
committerJim Meyering <jim@meyering.net>2001-08-12 12:50:16 +0000
commit5bd7048dfea765e3931da12336ddff424b47ef7f (patch)
tree9d36aa8e7753cc5f2901c94e71b4188cc9002088 /lib/regex.c
parent43f4c1dfd650939d502457ce8d6c0c9e1ddf69fc (diff)
downloadcoreutils-5bd7048dfea765e3931da12336ddff424b47ef7f.tar.xz
update from libc
Diffstat (limited to 'lib/regex.c')
-rw-r--r--lib/regex.c161
1 files changed, 112 insertions, 49 deletions
diff --git a/lib/regex.c b/lib/regex.c
index c20977cd9..83a6820b5 100644
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -405,9 +405,6 @@ typedef char boolean;
static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
reg_syntax_t syntax,
struct re_pattern_buffer *bufp));
-static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
- reg_syntax_t syntax,
- struct re_pattern_buffer *bufp));
static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
const char *string1, int size1,
@@ -415,6 +412,19 @@ static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
int pos,
struct re_registers *regs,
int stop));
+static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs, int stop));
+static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
+
+#ifdef MBS_SUPPORT
+static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp));
+
+
static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
const char *cstring1, int csize1,
const char *cstring2, int csize2,
@@ -424,19 +434,13 @@ static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
wchar_t *string1, int size1,
wchar_t *string2, int size2,
int *mbs_offset1, int *mbs_offset2));
-static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
- const char *string1, int size1,
- const char *string2, int size2,
- int startpos, int range,
- struct re_registers *regs, int stop));
static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
const char *string1, int size1,
const char *string2, int size2,
int startpos, int range,
struct re_registers *regs, int stop));
-static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
-
+#endif
/* These are the command codes that appear in compiled regular
expressions. Some opcodes are followed by argument bytes. A
@@ -606,29 +610,31 @@ typedef enum
# define PREFIX(name) byte_##name
# define ARG_PREFIX(name) name
# define PUT_CHAR(c) putchar (c)
-#elif defined WCHAR
-# define CHAR_T wchar_t
-# define UCHAR_T wchar_t
-# define COMPILED_BUFFER_VAR wc_buffer
-# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
-# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
-# define PREFIX(name) wcs_##name
-# define ARG_PREFIX(name) c##name
-/* Should we use wide stream?? */
-# define PUT_CHAR(c) printf ("%C", c);
-# define TRUE 1
-# define FALSE 0
#else
-# ifdef MBS_SUPPORT
-# define WCHAR
+# ifdef WCHAR
+# define CHAR_T wchar_t
+# define UCHAR_T wchar_t
+# define COMPILED_BUFFER_VAR wc_buffer
+# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
+# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
+# define PREFIX(name) wcs_##name
+# define ARG_PREFIX(name) c##name
+/* Should we use wide stream?? */
+# define PUT_CHAR(c) printf ("%C", c);
+# define TRUE 1
+# define FALSE 0
+# else
+# ifdef MBS_SUPPORT
+# define WCHAR
+# define INSIDE_RECURSION
+# include "regex.c"
+# undef INSIDE_RECURSION
+# endif
+# define BYTE
# define INSIDE_RECURSION
# include "regex.c"
# undef INSIDE_RECURSION
# endif
-# define BYTE
-# define INSIDE_RECURSION
-# include "regex.c"
-# undef INSIDE_RECURSION
#endif
#ifdef INSIDE_RECURSION
@@ -3048,7 +3054,7 @@ PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp)
/* First compare the hashing value. */
if (symb_table[2 * elem] == hash
&& c1 == extra[symb_table[2 * elem + 1]]
- && memcmp (str,
+ && memcmp (char_str,
&extra[symb_table[2 * elem + 1]
+ 1], c1) == 0)
{
@@ -3068,7 +3074,7 @@ PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp)
in the table. */
idx += 1 + extra[idx];
/* Adjust for the alignment. */
- idx = (idx + 3) & ~4;
+ idx = (idx + 3) & ~3;
str[0] = (wchar_t) idx + 4;
}
@@ -5071,16 +5077,35 @@ weak_alias (__re_search_2, re_search_2)
#endif
#ifdef WCHAR
-# define FREE_WCS_BUFFERS() \
- do { \
- FREE_VAR (string1); \
- FREE_VAR (string2); \
- FREE_VAR (mbs_offset1); \
- FREE_VAR (mbs_offset2); \
+# define MAX_ALLOCA_SIZE 2000
+
+# define FREE_WCS_BUFFERS() \
+ do { \
+ if (size1 > MAX_ALLOCA_SIZE) \
+ { \
+ free (wcs_string1); \
+ free (mbs_offset1); \
+ } \
+ else \
+ { \
+ FREE_VAR (wcs_string1); \
+ FREE_VAR (mbs_offset1); \
+ } \
+ if (size2 > MAX_ALLOCA_SIZE) \
+ { \
+ free (wcs_string2); \
+ free (mbs_offset2); \
+ } \
+ else \
+ { \
+ FREE_VAR (wcs_string2); \
+ FREE_VAR (mbs_offset2); \
+ } \
} while (0)
#endif
+
static int
PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
regs, stop)
@@ -5155,36 +5180,72 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
fill them with converted string. */
if (size1 != 0)
{
- wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
- mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
- is_binary = REGEX_TALLOC (size1 + 1, char);
+ if (size1 > MAX_ALLOCA_SIZE)
+ {
+ wcs_string1 = TALLOC (size1 + 1, CHAR_T);
+ mbs_offset1 = TALLOC (size1 + 1, int);
+ is_binary = TALLOC (size1 + 1, char);
+ }
+ else
+ {
+ wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
+ mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
+ is_binary = REGEX_TALLOC (size1 + 1, char);
+ }
if (!wcs_string1 || !mbs_offset1 || !is_binary)
{
- FREE_VAR (wcs_string1);
- FREE_VAR (mbs_offset1);
- FREE_VAR (is_binary);
+ if (size1 > MAX_ALLOCA_SIZE)
+ {
+ free (wcs_string1);
+ free (mbs_offset1);
+ free (is_binary);
+ }
+ else
+ {
+ FREE_VAR (wcs_string1);
+ FREE_VAR (mbs_offset1);
+ FREE_VAR (is_binary);
+ }
return -2;
}
wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
mbs_offset1, is_binary);
wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */
- FREE_VAR (is_binary);
+ if (size1 > MAX_ALLOCA_SIZE)
+ free (is_binary);
+ else
+ FREE_VAR (is_binary);
}
if (size2 != 0)
{
- wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
- mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
- is_binary = REGEX_TALLOC (size2 + 1, char);
+ if (size2 > MAX_ALLOCA_SIZE)
+ {
+ wcs_string2 = TALLOC (size2 + 1, CHAR_T);
+ mbs_offset2 = TALLOC (size2 + 1, int);
+ is_binary = TALLOC (size2 + 1, char);
+ }
+ else
+ {
+ wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
+ mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
+ is_binary = REGEX_TALLOC (size2 + 1, char);
+ }
if (!wcs_string2 || !mbs_offset2 || !is_binary)
{
FREE_WCS_BUFFERS ();
- FREE_VAR (is_binary);
+ if (size2 > MAX_ALLOCA_SIZE)
+ free (is_binary);
+ else
+ FREE_VAR (is_binary);
return -2;
}
wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
mbs_offset2, is_binary);
wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */
- FREE_VAR (is_binary);
+ if (size2 > MAX_ALLOCA_SIZE)
+ free (is_binary);
+ else
+ FREE_VAR (is_binary);
}
#endif /* WCHAR */
@@ -5346,7 +5407,9 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
/* Use internationalized API instead of SYNTAX. */
# define WORDCHAR_P(d) \
(iswalnum ((wint_t)((d) == end1 ? *string2 \
- : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0)
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \
+ || ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
#else /* BYTE */
# define WORDCHAR_P(d) \
(SYNTAX ((d) == end1 ? *string2 \