summaryrefslogtreecommitdiff
path: root/lib/fnmatch.c
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>2002-06-22 10:11:14 +0000
committerJim Meyering <jim@meyering.net>2002-06-22 10:11:14 +0000
commit64a1747ac51e4aeb96c267951ca0030407f7960a (patch)
tree8651b8205c98723cab5780b25fbf2dd2810ba3f5 /lib/fnmatch.c
parent2e53304940be4bb31ac1c8c072c681f52b7b622e (diff)
downloadcoreutils-64a1747ac51e4aeb96c267951ca0030407f7960a.tar.xz
Update from diffutils-2.8.2.
Diffstat (limited to 'lib/fnmatch.c')
-rw-r--r--lib/fnmatch.c519
1 files changed, 336 insertions, 183 deletions
diff --git a/lib/fnmatch.c b/lib/fnmatch.c
index 9bff8c220..1b498e5f3 100644
--- a/lib/fnmatch.c
+++ b/lib/fnmatch.c
@@ -1,4 +1,5 @@
-/* Copyright 1991, 1992, 1993, 1996, 1997, 2000 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2000, 2001,
+ 2002 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -10,9 +11,9 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation,
- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#if HAVE_CONFIG_H
# include <config.h>
@@ -23,208 +24,360 @@
# define _GNU_SOURCE 1
#endif
+#ifdef __GNUC__
+# define alloca __builtin_alloca
+# define HAVE_ALLOCA 1
+#else
+# if defined HAVE_ALLOCA_H || defined _LIBC
+# include <alloca.h>
+# else
+# ifdef _AIX
+ # pragma alloca
+# else
+# ifndef alloca
+char *alloca ();
+# endif
+# endif
+# endif
+#endif
+
+#if ! defined __builtin_expect && __GNUC__ < 3
+# define __builtin_expect(expr, expected) (expr)
+#endif
+
+#include <assert.h>
#include <errno.h>
#include <fnmatch.h>
#include <ctype.h>
-#if defined STDC_HEADERS || !defined isascii
-# define IN_CTYPE_DOMAIN(c) 1
+#if HAVE_STRING_H || defined _LIBC
+# include <string.h>
#else
-# define IN_CTYPE_DOMAIN(c) isascii (c)
+# if HAVE_STRINGS_H
+# include <strings.h>
+# endif
#endif
-#define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
+#if defined STDC_HEADERS || defined _LIBC
+# include <stddef.h>
+# include <stdlib.h>
+#endif
+/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
+# include <wchar.h>
+# include <wctype.h>
+#endif
-#ifndef errno
-extern int errno;
+/* We need some of the locale data (the collation sequence information)
+ but there is no interface to get this information in general. Therefore
+ we support a correct implementation only in glibc. */
+#ifdef _LIBC
+# include "../locale/localeinfo.h"
+# include "../locale/elem-hash.h"
+# include "../locale/coll-lookup.h"
+# include <shlib-compat.h>
+
+# define CONCAT(a,b) __CONCAT(a,b)
+# define mbsinit __mbsinit
+# define mbsrtowcs __mbsrtowcs
+# define fnmatch __fnmatch
+extern int fnmatch (const char *pattern, const char *string, int flags);
#endif
-/* Match STRING against the filename pattern PATTERN, returning zero if
- it matches, nonzero if not. */
-int
-fnmatch (const char *pattern, const char *string, int flags)
-{
- register const char *p = pattern, *n = string;
- register char c;
+/* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */
+#define NO_LEADING_PERIOD(flags) \
+ ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD))
+
+/* Comment out all this code if we are using the GNU C Library, are not
+ actually compiling the library itself, and have not detected a bug
+ in the library. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#if defined _LIBC || !defined __GNU_LIBRARY__ || !HAVE_FNMATCH_GNU
+
+
+# if defined STDC_HEADERS || !defined isascii
+# define ISASCII(c) 1
+# else
+# define ISASCII(c) isascii(c)
+# endif
+
+# ifdef isblank
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
+# else
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+# endif
+# ifdef isgraph
+# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# else
+# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# endif
+
+# define ISPRINT(c) (ISASCII (c) && isprint (c))
+# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+# define ISALNUM(c) (ISASCII (c) && isalnum (c))
+# define ISALPHA(c) (ISASCII (c) && isalpha (c))
+# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+# define ISLOWER(c) (ISASCII (c) && islower (c))
+# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+# define ISSPACE(c) (ISASCII (c) && isspace (c))
+# define ISUPPER(c) (ISASCII (c) && isupper (c))
+# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+/* The GNU C library provides support for user-defined character classes
+ and the functions from ISO C amendement 1. */
+# ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+ problem. Use a reasonable default value. */
+# define CHAR_CLASS_MAX_LENGTH 256
+# endif
+
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
+
+# ifdef _LIBC
+# define ISWCTYPE(WC, WT) __iswctype (WC, WT)
+# else
+# define ISWCTYPE(WC, WT) iswctype (WC, WT)
+# endif
+
+# if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC
+/* In this case we are implementing the multibyte character handling. */
+# define HANDLE_MULTIBYTE 1
+# endif
+
+# else
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+# define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+# endif
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+# if !defined _LIBC && !defined getenv && !HAVE_DECL_GETENV
+extern char *getenv ();
+# endif
+
+# ifndef errno
+extern int errno;
+# endif
+
+/* Global variable. */
+static int posixly_correct;
+
+# ifndef internal_function
+/* Inside GNU libc we mark some function in a special way. In other
+ environments simply ignore the marking. */
+# define internal_function
+# endif
/* Note that this evaluates C many times. */
-#define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER ((unsigned char) (c)) \
- ? tolower ((unsigned char) (c)) \
- : (c))
+# ifdef _LIBC
+# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
+# else
+# define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c))
+# endif
+# define CHAR char
+# define UCHAR unsigned char
+# define INT int
+# define FCT internal_fnmatch
+# define EXT ext_match
+# define END end_pattern
+# define L(CS) CS
+# ifdef _LIBC
+# define BTOWC(C) __btowc (C)
+# else
+# define BTOWC(C) btowc (C)
+# endif
+# define STRLEN(S) strlen (S)
+# define STRCAT(D, S) strcat (D, S)
+# ifdef _LIBC
+# define MEMPCPY(D, S, N) __mempcpy (D, S, N)
+# else
+# if HAVE_MEMPCPY
+# define MEMPCPY(D, S, N) mempcpy (D, S, N)
+# else
+# define MEMPCPY(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N)))
+# endif
+# endif
+# define MEMCHR(S, C, N) memchr (S, C, N)
+# define STRCOLL(S1, S2) strcoll (S1, S2)
+# include "fnmatch_loop.c"
+
+
+# if HANDLE_MULTIBYTE
+# define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c))
+# define CHAR wchar_t
+# define UCHAR wint_t
+# define INT wint_t
+# define FCT internal_fnwmatch
+# define EXT ext_wmatch
+# define END end_wpattern
+# define L(CS) L##CS
+# define BTOWC(C) (C)
+# ifdef _LIBC
+# define STRLEN(S) __wcslen (S)
+# define STRCAT(D, S) __wcscat (D, S)
+# define MEMPCPY(D, S, N) __wmempcpy (D, S, N)
+# else
+# define STRLEN(S) wcslen (S)
+# define STRCAT(D, S) wcscat (D, S)
+# if HAVE_WMEMPCPY
+# define MEMPCPY(D, S, N) wmempcpy (D, S, N)
+# else
+# define MEMPCPY(D, S, N) (wmemcpy (D, S, N) + (N))
+# endif
+# endif
+# define MEMCHR(S, C, N) wmemchr (S, C, N)
+# define STRCOLL(S1, S2) wcscoll (S1, S2)
+# define WIDE_CHAR_VERSION 1
+
+# undef IS_CHAR_CLASS
+/* We have to convert the wide character string in a multibyte string. But
+ we know that the character class names consist of alphanumeric characters
+ from the portable character set, and since the wide character encoding
+ for a member of the portable character set is the same code point as
+ its single-byte encoding, we can use a simplified method to convert the
+ string to a multibyte character string. */
+static wctype_t
+is_char_class (const wchar_t *wcs)
+{
+ char s[CHAR_CLASS_MAX_LENGTH + 1];
+ char *cp = s;
- while ((c = *p++) != '\0')
+ do
{
- c = FOLD (c);
-
- switch (c)
+ /* Test for a printable character from the portable character set. */
+# ifdef _LIBC
+ if (*wcs < 0x20 || *wcs > 0x7e
+ || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60)
+ return (wctype_t) 0;
+# else
+ switch (*wcs)
{
- case '?':
- if (*n == '\0')
- return FNM_NOMATCH;
- else if ((flags & FNM_FILE_NAME) && *n == '/')
- return FNM_NOMATCH;
- else if ((flags & FNM_PERIOD) && *n == '.' &&
- (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/')))
- return FNM_NOMATCH;
+ case L' ': case L'!': case L'"': case L'#': case L'%':
+ case L'&': case L'\'': case L'(': case L')': case L'*':
+ case L'+': case L',': case L'-': case L'.': case L'/':
+ case L'0': case L'1': case L'2': case L'3': case L'4':
+ case L'5': case L'6': case L'7': case L'8': case L'9':
+ case L':': case L';': case L'<': case L'=': case L'>':
+ case L'?':
+ case L'A': case L'B': case L'C': case L'D': case L'E':
+ case L'F': case L'G': case L'H': case L'I': case L'J':
+ case L'K': case L'L': case L'M': case L'N': case L'O':
+ case L'P': case L'Q': case L'R': case L'S': case L'T':
+ case L'U': case L'V': case L'W': case L'X': case L'Y':
+ case L'Z':
+ case L'[': case L'\\': case L']': case L'^': case L'_':
+ case L'a': case L'b': case L'c': case L'd': case L'e':
+ case L'f': case L'g': case L'h': case L'i': case L'j':
+ case L'k': case L'l': case L'm': case L'n': case L'o':
+ case L'p': case L'q': case L'r': case L's': case L't':
+ case L'u': case L'v': case L'w': case L'x': case L'y':
+ case L'z': case L'{': case L'|': case L'}': case L'~':
break;
-
- case '\\':
- if (!(flags & FNM_NOESCAPE))
- {
- c = *p++;
- if (c == '\0')
- /* Trailing \ loses. */
- return FNM_NOMATCH;
- c = FOLD (c);
- }
- if (FOLD (*n) != c)
- return FNM_NOMATCH;
- break;
-
- case '*':
- if ((flags & FNM_PERIOD) && *n == '.' &&
- (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/')))
- return FNM_NOMATCH;
-
- for (c = *p++; c == '?' || c == '*'; c = *p++)
- {
- if (c == '?')
- {
- /* A ? needs to match one character. */
- if (*n == '\0' || (*n == '/' && (flags & FNM_FILE_NAME)))
- /* There isn't another character; no match. */
- return FNM_NOMATCH;
- else
- /* One character of the string is consumed in matching
- this ? wildcard, so *??? won't match if there are
- less than three characters. */
- ++n;
- }
- }
-
- if (c == '\0')
- {
- if ((flags & (FNM_FILE_NAME | FNM_LEADING_DIR)) == FNM_FILE_NAME)
- for (; *n != '\0'; n++)
- if (*n == '/')
- return FNM_NOMATCH;
- return 0;
- }
-
- {
- char c1 = (!(flags & FNM_NOESCAPE) && c == '\\') ? *p : c;
- c1 = FOLD (c1);
- for (--p; *n != '\0'; ++n)
- if ((c == '[' || FOLD (*n) == c1) &&
- fnmatch (p, n, flags & ~FNM_PERIOD) == 0)
- return 0;
- else if (*n == '/' && (flags & FNM_FILE_NAME))
- break;
- return FNM_NOMATCH;
- }
-
- case '[':
- {
- /* Nonzero if the sense of the character class is inverted. */
- register int not;
-
- if (*n == '\0')
- return FNM_NOMATCH;
-
- if ((flags & FNM_PERIOD) && *n == '.' &&
- (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/')))
- return FNM_NOMATCH;
-
- not = (*p == '!' || *p == '^');
- if (not)
- ++p;
-
- c = *p++;
- for (;;)
- {
- register char cstart = c, cend = c;
-
- if (!(flags & FNM_NOESCAPE) && c == '\\')
- {
- if (*p == '\0')
- return FNM_NOMATCH;
- cstart = cend = *p++;
- }
-
- cstart = cend = FOLD (cstart);
-
- if (c == '\0')
- /* [ (unterminated) loses. */
- return FNM_NOMATCH;
-
- c = *p++;
- c = FOLD (c);
-
- if ((flags & FNM_FILE_NAME) && c == '/')
- /* [/] can never match. */
- return FNM_NOMATCH;
-
- if (c == '-' && *p != ']')
- {
- cend = *p++;
- if (!(flags & FNM_NOESCAPE) && cend == '\\')
- cend = *p++;
- if (cend == '\0')
- return FNM_NOMATCH;
- cend = FOLD (cend);
-
- c = *p++;
- }
-
- if (FOLD (*n) >= cstart && FOLD (*n) <= cend)
- goto matched;
-
- if (c == ']')
- break;
- }
- if (!not)
- return FNM_NOMATCH;
- break;
-
- matched:;
- /* Skip the rest of the [...] that already matched. */
- while (c != ']')
- {
- if (c == '\0')
- /* [... (unterminated) loses. */
- return FNM_NOMATCH;
-
- c = *p++;
- if (!(flags & FNM_NOESCAPE) && c == '\\')
- {
- if (*p == '\0')
- return FNM_NOMATCH;
- /* XXX 1003.2d11 is unclear if this is right. */
- ++p;
- }
- }
- if (not)
- return FNM_NOMATCH;
- }
- break;
-
default:
- if (c != FOLD (*n))
- return FNM_NOMATCH;
+ return (wctype_t) 0;
}
+# endif
+
+ /* Avoid overrunning the buffer. */
+ if (cp == s + CHAR_CLASS_MAX_LENGTH)
+ return (wctype_t) 0;
- ++n;
+ *cp++ = (char) *wcs++;
}
+ while (*wcs != L'\0');
- if (*n == '\0')
- return 0;
+ *cp = '\0';
+
+# ifdef _LIBC
+ return __wctype (s);
+# else
+ return wctype (s);
+# endif
+}
+# define IS_CHAR_CLASS(string) is_char_class (string)
- if ((flags & FNM_LEADING_DIR) && *n == '/')
- /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
- return 0;
+# include "fnmatch_loop.c"
+# endif
- return FNM_NOMATCH;
-#undef FOLD
+int
+fnmatch (pattern, string, flags)
+ const char *pattern;
+ const char *string;
+ int flags;
+{
+# if HANDLE_MULTIBYTE
+ if (__builtin_expect (MB_CUR_MAX, 1) != 1)
+ {
+ mbstate_t ps;
+ size_t n;
+ wchar_t *wpattern;
+ wchar_t *wstring;
+
+ /* Convert the strings into wide characters. */
+ memset (&ps, '\0', sizeof (ps));
+ n = mbsrtowcs (NULL, &pattern, 0, &ps);
+ if (__builtin_expect (n, 0) == (size_t) -1)
+ /* Something wrong.
+ XXX Do we have to set `errno' to something which mbsrtows hasn't
+ already done? */
+ return -1;
+ wpattern = (wchar_t *) alloca ((n + 1) * sizeof (wchar_t));
+ assert (mbsinit (&ps));
+ (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps);
+
+ assert (mbsinit (&ps));
+ n = mbsrtowcs (NULL, &string, 0, &ps);
+ if (__builtin_expect (n, 0) == (size_t) -1)
+ /* Something wrong.
+ XXX Do we have to set `errno' to something which mbsrtows hasn't
+ already done? */
+ return -1;
+ wstring = (wchar_t *) alloca ((n + 1) * sizeof (wchar_t));
+ assert (mbsinit (&ps));
+ (void) mbsrtowcs (wstring, &string, n + 1, &ps);
+
+ return internal_fnwmatch (wpattern, wstring, wstring + n,
+ flags & FNM_PERIOD, flags);
+ }
+# endif /* mbstate_t and mbsrtowcs or _LIBC. */
+
+ return internal_fnmatch (pattern, string, string + strlen (string),
+ flags & FNM_PERIOD, flags);
}
+
+# ifdef _LIBC
+# undef fnmatch
+versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3);
+# if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3)
+strong_alias (__fnmatch, __fnmatch_old)
+compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0);
+# endif
+# endif
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */