diff options
author | Jim Meyering <jim@meyering.net> | 2002-06-22 10:11:14 +0000 |
---|---|---|
committer | Jim Meyering <jim@meyering.net> | 2002-06-22 10:11:14 +0000 |
commit | 64a1747ac51e4aeb96c267951ca0030407f7960a (patch) | |
tree | 8651b8205c98723cab5780b25fbf2dd2810ba3f5 | |
parent | 2e53304940be4bb31ac1c8c072c681f52b7b622e (diff) | |
download | coreutils-64a1747ac51e4aeb96c267951ca0030407f7960a.tar.xz |
Update from diffutils-2.8.2.
-rw-r--r-- | lib/fnmatch.c | 519 |
1 files changed, 336 insertions, 183 deletions
diff --git a/lib/fnmatch.c b/lib/fnmatch.c index 9bff8c220..1b498e5f3 100644 --- a/lib/fnmatch.c +++ b/lib/fnmatch.c @@ -1,4 +1,5 @@ -/* Copyright 1991, 1992, 1993, 1996, 1997, 2000 Free Software Foundation, Inc. +/* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2000, 2001, + 2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -10,9 +11,9 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #if HAVE_CONFIG_H # include <config.h> @@ -23,208 +24,360 @@ # define _GNU_SOURCE 1 #endif +#ifdef __GNUC__ +# define alloca __builtin_alloca +# define HAVE_ALLOCA 1 +#else +# if defined HAVE_ALLOCA_H || defined _LIBC +# include <alloca.h> +# else +# ifdef _AIX + # pragma alloca +# else +# ifndef alloca +char *alloca (); +# endif +# endif +# endif +#endif + +#if ! defined __builtin_expect && __GNUC__ < 3 +# define __builtin_expect(expr, expected) (expr) +#endif + +#include <assert.h> #include <errno.h> #include <fnmatch.h> #include <ctype.h> -#if defined STDC_HEADERS || !defined isascii -# define IN_CTYPE_DOMAIN(c) 1 +#if HAVE_STRING_H || defined _LIBC +# include <string.h> #else -# define IN_CTYPE_DOMAIN(c) isascii (c) +# if HAVE_STRINGS_H +# include <strings.h> +# endif #endif -#define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c)) +#if defined STDC_HEADERS || defined _LIBC +# include <stddef.h> +# include <stdlib.h> +#endif +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ +# include <wchar.h> +# include <wctype.h> +#endif -#ifndef errno -extern int errno; +/* We need some of the locale data (the collation sequence information) + but there is no interface to get this information in general. Therefore + we support a correct implementation only in glibc. */ +#ifdef _LIBC +# include "../locale/localeinfo.h" +# include "../locale/elem-hash.h" +# include "../locale/coll-lookup.h" +# include <shlib-compat.h> + +# define CONCAT(a,b) __CONCAT(a,b) +# define mbsinit __mbsinit +# define mbsrtowcs __mbsrtowcs +# define fnmatch __fnmatch +extern int fnmatch (const char *pattern, const char *string, int flags); #endif -/* Match STRING against the filename pattern PATTERN, returning zero if - it matches, nonzero if not. */ -int -fnmatch (const char *pattern, const char *string, int flags) -{ - register const char *p = pattern, *n = string; - register char c; +/* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */ +#define NO_LEADING_PERIOD(flags) \ + ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD)) + +/* Comment out all this code if we are using the GNU C Library, are not + actually compiling the library itself, and have not detected a bug + in the library. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if defined _LIBC || !defined __GNU_LIBRARY__ || !HAVE_FNMATCH_GNU + + +# if defined STDC_HEADERS || !defined isascii +# define ISASCII(c) 1 +# else +# define ISASCII(c) isascii(c) +# endif + +# ifdef isblank +# define ISBLANK(c) (ISASCII (c) && isblank (c)) +# else +# define ISBLANK(c) ((c) == ' ' || (c) == '\t') +# endif +# ifdef isgraph +# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +# else +# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +# endif + +# define ISPRINT(c) (ISASCII (c) && isprint (c)) +# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +# define ISALNUM(c) (ISASCII (c) && isalnum (c)) +# define ISALPHA(c) (ISASCII (c) && isalpha (c)) +# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +# define ISLOWER(c) (ISASCII (c) && islower (c)) +# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +# define ISSPACE(c) (ISASCII (c) && isspace (c)) +# define ISUPPER(c) (ISASCII (c) && isupper (c)) +# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +/* The GNU C library provides support for user-defined character classes + and the functions from ISO C amendement 1. */ +# ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this + problem. Use a reasonable default value. */ +# define CHAR_CLASS_MAX_LENGTH 256 +# endif + +# ifdef _LIBC +# define IS_CHAR_CLASS(string) __wctype (string) +# else +# define IS_CHAR_CLASS(string) wctype (string) +# endif + +# ifdef _LIBC +# define ISWCTYPE(WC, WT) __iswctype (WC, WT) +# else +# define ISWCTYPE(WC, WT) iswctype (WC, WT) +# endif + +# if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC +/* In this case we are implementing the multibyte character handling. */ +# define HANDLE_MULTIBYTE 1 +# endif + +# else +# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ + +# define IS_CHAR_CLASS(string) \ + (STREQ (string, "alpha") || STREQ (string, "upper") \ + || STREQ (string, "lower") || STREQ (string, "digit") \ + || STREQ (string, "alnum") || STREQ (string, "xdigit") \ + || STREQ (string, "space") || STREQ (string, "print") \ + || STREQ (string, "punct") || STREQ (string, "graph") \ + || STREQ (string, "cntrl") || STREQ (string, "blank")) +# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +# if !defined _LIBC && !defined getenv && !HAVE_DECL_GETENV +extern char *getenv (); +# endif + +# ifndef errno +extern int errno; +# endif + +/* Global variable. */ +static int posixly_correct; + +# ifndef internal_function +/* Inside GNU libc we mark some function in a special way. In other + environments simply ignore the marking. */ +# define internal_function +# endif /* Note that this evaluates C many times. */ -#define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER ((unsigned char) (c)) \ - ? tolower ((unsigned char) (c)) \ - : (c)) +# ifdef _LIBC +# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) +# else +# define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) +# endif +# define CHAR char +# define UCHAR unsigned char +# define INT int +# define FCT internal_fnmatch +# define EXT ext_match +# define END end_pattern +# define L(CS) CS +# ifdef _LIBC +# define BTOWC(C) __btowc (C) +# else +# define BTOWC(C) btowc (C) +# endif +# define STRLEN(S) strlen (S) +# define STRCAT(D, S) strcat (D, S) +# ifdef _LIBC +# define MEMPCPY(D, S, N) __mempcpy (D, S, N) +# else +# if HAVE_MEMPCPY +# define MEMPCPY(D, S, N) mempcpy (D, S, N) +# else +# define MEMPCPY(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N))) +# endif +# endif +# define MEMCHR(S, C, N) memchr (S, C, N) +# define STRCOLL(S1, S2) strcoll (S1, S2) +# include "fnmatch_loop.c" + + +# if HANDLE_MULTIBYTE +# define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c)) +# define CHAR wchar_t +# define UCHAR wint_t +# define INT wint_t +# define FCT internal_fnwmatch +# define EXT ext_wmatch +# define END end_wpattern +# define L(CS) L##CS +# define BTOWC(C) (C) +# ifdef _LIBC +# define STRLEN(S) __wcslen (S) +# define STRCAT(D, S) __wcscat (D, S) +# define MEMPCPY(D, S, N) __wmempcpy (D, S, N) +# else +# define STRLEN(S) wcslen (S) +# define STRCAT(D, S) wcscat (D, S) +# if HAVE_WMEMPCPY +# define MEMPCPY(D, S, N) wmempcpy (D, S, N) +# else +# define MEMPCPY(D, S, N) (wmemcpy (D, S, N) + (N)) +# endif +# endif +# define MEMCHR(S, C, N) wmemchr (S, C, N) +# define STRCOLL(S1, S2) wcscoll (S1, S2) +# define WIDE_CHAR_VERSION 1 + +# undef IS_CHAR_CLASS +/* We have to convert the wide character string in a multibyte string. But + we know that the character class names consist of alphanumeric characters + from the portable character set, and since the wide character encoding + for a member of the portable character set is the same code point as + its single-byte encoding, we can use a simplified method to convert the + string to a multibyte character string. */ +static wctype_t +is_char_class (const wchar_t *wcs) +{ + char s[CHAR_CLASS_MAX_LENGTH + 1]; + char *cp = s; - while ((c = *p++) != '\0') + do { - c = FOLD (c); - - switch (c) + /* Test for a printable character from the portable character set. */ +# ifdef _LIBC + if (*wcs < 0x20 || *wcs > 0x7e + || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60) + return (wctype_t) 0; +# else + switch (*wcs) { - case '?': - if (*n == '\0') - return FNM_NOMATCH; - else if ((flags & FNM_FILE_NAME) && *n == '/') - return FNM_NOMATCH; - else if ((flags & FNM_PERIOD) && *n == '.' && - (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/'))) - return FNM_NOMATCH; + case L' ': case L'!': case L'"': case L'#': case L'%': + case L'&': case L'\'': case L'(': case L')': case L'*': + case L'+': case L',': case L'-': case L'.': case L'/': + case L'0': case L'1': case L'2': case L'3': case L'4': + case L'5': case L'6': case L'7': case L'8': case L'9': + case L':': case L';': case L'<': case L'=': case L'>': + case L'?': + case L'A': case L'B': case L'C': case L'D': case L'E': + case L'F': case L'G': case L'H': case L'I': case L'J': + case L'K': case L'L': case L'M': case L'N': case L'O': + case L'P': case L'Q': case L'R': case L'S': case L'T': + case L'U': case L'V': case L'W': case L'X': case L'Y': + case L'Z': + case L'[': case L'\\': case L']': case L'^': case L'_': + case L'a': case L'b': case L'c': case L'd': case L'e': + case L'f': case L'g': case L'h': case L'i': case L'j': + case L'k': case L'l': case L'm': case L'n': case L'o': + case L'p': case L'q': case L'r': case L's': case L't': + case L'u': case L'v': case L'w': case L'x': case L'y': + case L'z': case L'{': case L'|': case L'}': case L'~': break; - - case '\\': - if (!(flags & FNM_NOESCAPE)) - { - c = *p++; - if (c == '\0') - /* Trailing \ loses. */ - return FNM_NOMATCH; - c = FOLD (c); - } - if (FOLD (*n) != c) - return FNM_NOMATCH; - break; - - case '*': - if ((flags & FNM_PERIOD) && *n == '.' && - (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/'))) - return FNM_NOMATCH; - - for (c = *p++; c == '?' || c == '*'; c = *p++) - { - if (c == '?') - { - /* A ? needs to match one character. */ - if (*n == '\0' || (*n == '/' && (flags & FNM_FILE_NAME))) - /* There isn't another character; no match. */ - return FNM_NOMATCH; - else - /* One character of the string is consumed in matching - this ? wildcard, so *??? won't match if there are - less than three characters. */ - ++n; - } - } - - if (c == '\0') - { - if ((flags & (FNM_FILE_NAME | FNM_LEADING_DIR)) == FNM_FILE_NAME) - for (; *n != '\0'; n++) - if (*n == '/') - return FNM_NOMATCH; - return 0; - } - - { - char c1 = (!(flags & FNM_NOESCAPE) && c == '\\') ? *p : c; - c1 = FOLD (c1); - for (--p; *n != '\0'; ++n) - if ((c == '[' || FOLD (*n) == c1) && - fnmatch (p, n, flags & ~FNM_PERIOD) == 0) - return 0; - else if (*n == '/' && (flags & FNM_FILE_NAME)) - break; - return FNM_NOMATCH; - } - - case '[': - { - /* Nonzero if the sense of the character class is inverted. */ - register int not; - - if (*n == '\0') - return FNM_NOMATCH; - - if ((flags & FNM_PERIOD) && *n == '.' && - (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/'))) - return FNM_NOMATCH; - - not = (*p == '!' || *p == '^'); - if (not) - ++p; - - c = *p++; - for (;;) - { - register char cstart = c, cend = c; - - if (!(flags & FNM_NOESCAPE) && c == '\\') - { - if (*p == '\0') - return FNM_NOMATCH; - cstart = cend = *p++; - } - - cstart = cend = FOLD (cstart); - - if (c == '\0') - /* [ (unterminated) loses. */ - return FNM_NOMATCH; - - c = *p++; - c = FOLD (c); - - if ((flags & FNM_FILE_NAME) && c == '/') - /* [/] can never match. */ - return FNM_NOMATCH; - - if (c == '-' && *p != ']') - { - cend = *p++; - if (!(flags & FNM_NOESCAPE) && cend == '\\') - cend = *p++; - if (cend == '\0') - return FNM_NOMATCH; - cend = FOLD (cend); - - c = *p++; - } - - if (FOLD (*n) >= cstart && FOLD (*n) <= cend) - goto matched; - - if (c == ']') - break; - } - if (!not) - return FNM_NOMATCH; - break; - - matched:; - /* Skip the rest of the [...] that already matched. */ - while (c != ']') - { - if (c == '\0') - /* [... (unterminated) loses. */ - return FNM_NOMATCH; - - c = *p++; - if (!(flags & FNM_NOESCAPE) && c == '\\') - { - if (*p == '\0') - return FNM_NOMATCH; - /* XXX 1003.2d11 is unclear if this is right. */ - ++p; - } - } - if (not) - return FNM_NOMATCH; - } - break; - default: - if (c != FOLD (*n)) - return FNM_NOMATCH; + return (wctype_t) 0; } +# endif + + /* Avoid overrunning the buffer. */ + if (cp == s + CHAR_CLASS_MAX_LENGTH) + return (wctype_t) 0; - ++n; + *cp++ = (char) *wcs++; } + while (*wcs != L'\0'); - if (*n == '\0') - return 0; + *cp = '\0'; + +# ifdef _LIBC + return __wctype (s); +# else + return wctype (s); +# endif +} +# define IS_CHAR_CLASS(string) is_char_class (string) - if ((flags & FNM_LEADING_DIR) && *n == '/') - /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ - return 0; +# include "fnmatch_loop.c" +# endif - return FNM_NOMATCH; -#undef FOLD +int +fnmatch (pattern, string, flags) + const char *pattern; + const char *string; + int flags; +{ +# if HANDLE_MULTIBYTE + if (__builtin_expect (MB_CUR_MAX, 1) != 1) + { + mbstate_t ps; + size_t n; + wchar_t *wpattern; + wchar_t *wstring; + + /* Convert the strings into wide characters. */ + memset (&ps, '\0', sizeof (ps)); + n = mbsrtowcs (NULL, &pattern, 0, &ps); + if (__builtin_expect (n, 0) == (size_t) -1) + /* Something wrong. + XXX Do we have to set `errno' to something which mbsrtows hasn't + already done? */ + return -1; + wpattern = (wchar_t *) alloca ((n + 1) * sizeof (wchar_t)); + assert (mbsinit (&ps)); + (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps); + + assert (mbsinit (&ps)); + n = mbsrtowcs (NULL, &string, 0, &ps); + if (__builtin_expect (n, 0) == (size_t) -1) + /* Something wrong. + XXX Do we have to set `errno' to something which mbsrtows hasn't + already done? */ + return -1; + wstring = (wchar_t *) alloca ((n + 1) * sizeof (wchar_t)); + assert (mbsinit (&ps)); + (void) mbsrtowcs (wstring, &string, n + 1, &ps); + + return internal_fnwmatch (wpattern, wstring, wstring + n, + flags & FNM_PERIOD, flags); + } +# endif /* mbstate_t and mbsrtowcs or _LIBC. */ + + return internal_fnmatch (pattern, string, string + strlen (string), + flags & FNM_PERIOD, flags); } + +# ifdef _LIBC +# undef fnmatch +versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3); +# if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3) +strong_alias (__fnmatch, __fnmatch_old) +compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0); +# endif +# endif + +#endif /* _LIBC or not __GNU_LIBRARY__. */ |