fnmatch: Replace BSD implementation with musl one.

The BSD implementation was under the Advertising Clause,
so we might as well take the opportunity to replace the
implementation entirely with musl's.

Header also rewritten to be a Haiku one; the constants
are left unchanged of course.
This commit is contained in:
Augustin Cavalier 2020-07-03 15:09:33 -04:00
parent 6996e5b271
commit 657f041aee
6 changed files with 352 additions and 277 deletions

View File

@ -1,62 +1,31 @@
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fnmatch.h 8.1 (Berkeley) 6/2/93
/*
* Copyright 2020, Haiku, Inc. All rights reserved.
* Distributed under the terms of the MIT License.
*/
#ifndef _FNMATCH_H_
#define _FNMATCH_H_
#ifndef _FNMATCH_H
#define _FNMATCH_H
#define FNM_NOMATCH 1 /* Match failed. */
#define FNM_NOESCAPE 0x01
#define FNM_PATHNAME 0x02
#define FNM_PERIOD 0x04
#define FNM_NOESCAPE 0x01 /* Disable backslash escaping. */
#define FNM_PATHNAME 0x02 /* Slash must be matched by slash. */
#define FNM_PERIOD 0x04 /* Period must be matched by period. */
#define FNM_LEADING_DIR 0x08
#define FNM_CASEFOLD 0x10
#define FNM_IGNORECASE FNM_CASEFOLD
#define FNM_FILE_NAME FNM_PATHNAME
#define FNM_LEADING_DIR 0x08 /* Ignore /<tail> after Imatch. */
#define FNM_CASEFOLD 0x10 /* Case insensitive search. */
#define FNM_IGNORECASE FNM_CASEFOLD
#define FNM_FILE_NAME FNM_PATHNAME
#define FNM_NOMATCH 1
#ifdef __cplusplus
extern "C" {
#endif
extern int fnmatch(const char *pattern, const char *string, int flags);
extern int fnmatch(const char *, const char *, int);
#ifdef __cplusplus
}
#endif
#endif /* _FNMATCH_H_ */
#endif /* _FNMATCH_H */

View File

@ -23,7 +23,6 @@ for architectureObject in [ MultiArchSubDirSetup ] {
dirent.c
errno.c
fcntl.cpp
fnmatch.c
fts.c
ftw.c
glob.c

View File

@ -1,230 +0,0 @@
/*
* Copyright (c) 1989, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Guido van Rossum.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
* Compares a filename or pathname to a pattern.
*/
#include <ctype.h>
#include <fnmatch.h>
#include <string.h>
#include <stdio.h>
//#include "collate.h"
#define EOS '\0'
#define RANGE_MATCH 1
#define RANGE_NOMATCH 0
#define RANGE_ERROR (-1)
static int rangematch(const char *, char, int, char **);
int
fnmatch(pattern, string, flags)
const char *pattern, *string;
int flags;
{
const char *stringstart;
char *newp;
char c, test;
for (stringstart = string;;)
switch (c = *pattern++) {
case EOS:
if ((flags & FNM_LEADING_DIR) && *string == '/')
return (0);
return (*string == EOS ? 0 : FNM_NOMATCH);
case '?':
if (*string == EOS)
return (FNM_NOMATCH);
if (*string == '/' && (flags & FNM_PATHNAME))
return (FNM_NOMATCH);
if (*string == '.' && (flags & FNM_PERIOD) &&
(string == stringstart ||
((flags & FNM_PATHNAME) && *(string - 1) == '/')))
return (FNM_NOMATCH);
++string;
break;
case '*':
c = *pattern;
/* Collapse multiple stars. */
while (c == '*')
c = *++pattern;
if (*string == '.' && (flags & FNM_PERIOD) &&
(string == stringstart ||
((flags & FNM_PATHNAME) && *(string - 1) == '/')))
return (FNM_NOMATCH);
/* Optimize for pattern with * at end or before /. */
if (c == EOS)
if (flags & FNM_PATHNAME)
return ((flags & FNM_LEADING_DIR) ||
strchr(string, '/') == NULL ?
0 : FNM_NOMATCH);
else
return (0);
else if (c == '/' && flags & FNM_PATHNAME) {
if ((string = strchr(string, '/')) == NULL)
return (FNM_NOMATCH);
break;
}
/* General case, use recursion. */
while ((test = *string) != EOS) {
if (!fnmatch(pattern, string, flags & ~FNM_PERIOD))
return (0);
if (test == '/' && flags & FNM_PATHNAME)
break;
++string;
}
return (FNM_NOMATCH);
case '[':
if (*string == EOS)
return (FNM_NOMATCH);
if (*string == '/' && (flags & FNM_PATHNAME))
return (FNM_NOMATCH);
if (*string == '.' && (flags & FNM_PERIOD) &&
(string == stringstart ||
((flags & FNM_PATHNAME) && *(string - 1) == '/')))
return (FNM_NOMATCH);
switch (rangematch(pattern, *string, flags, &newp)) {
case RANGE_ERROR:
goto norm;
case RANGE_MATCH:
pattern = newp;
break;
case RANGE_NOMATCH:
return (FNM_NOMATCH);
}
++string;
break;
case '\\':
if (!(flags & FNM_NOESCAPE)) {
if ((c = *pattern++) == EOS) {
c = '\\';
--pattern;
}
}
/* FALLTHROUGH */
default:
norm:
if (c == *string)
;
else if ((flags & FNM_CASEFOLD) &&
(tolower((unsigned char)c) ==
tolower((unsigned char)*string)))
;
else
return (FNM_NOMATCH);
string++;
break;
}
/* NOTREACHED */
}
static int
rangematch(pattern, test, flags, newp)
const char *pattern;
char test;
int flags;
char **newp;
{
int negate, ok;
char c, c2;
/*
* A bracket expression starting with an unquoted circumflex
* character produces unspecified results (IEEE 1003.2-1992,
* 3.13.2). This implementation treats it like '!', for
* consistency with the regular expression syntax.
* J.T. Conklin (conklin@ngai.kaleida.com)
*/
if ( (negate = (*pattern == '!' || *pattern == '^')) )
++pattern;
if (flags & FNM_CASEFOLD)
test = tolower((unsigned char)test);
/*
* A right bracket shall lose its special meaning and represent
* itself in a bracket expression if it occurs first in the list.
* -- POSIX.2 2.8.3.2
*/
ok = 0;
c = *pattern++;
do {
if (c == '\\' && !(flags & FNM_NOESCAPE))
c = *pattern++;
if (c == EOS)
return (RANGE_ERROR);
if (c == '/' && (flags & FNM_PATHNAME))
return (RANGE_NOMATCH);
if (flags & FNM_CASEFOLD)
c = tolower((unsigned char)c);
if (*pattern == '-'
&& (c2 = *(pattern+1)) != EOS && c2 != ']') {
pattern += 2;
if (c2 == '\\' && !(flags & FNM_NOESCAPE))
c2 = *pattern++;
if (c2 == EOS)
return (RANGE_ERROR);
if (flags & FNM_CASEFOLD)
c2 = tolower((unsigned char)c2);
if (
// ToDo: collate stuff disabled
// __collate_load_error ?
c <= test && test <= c2
// : __collate_range_cmp(c, test) <= 0
// && __collate_range_cmp(test, c2) <= 0
)
ok = 1;
} else if (c == test)
ok = 1;
} while ((c = *pattern++) != ']');
*newp = (char *)pattern;
return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
}

View File

@ -8,6 +8,7 @@ for architectureObject in [ MultiArchSubDirSetup ] {
MergeObjectFromObjects <$(architecture)>posix_musl.o :
:
<$(architecture)>posix_musl_math.o
<$(architecture)>posix_musl_regex.o
<$(architecture)>posix_musl_string.o
;
}
@ -18,4 +19,5 @@ for arch in $(TARGET_ARCHS) {
HaikuSubInclude math $(arch) ;
}
HaikuSubInclude regex ;
HaikuSubInclude string ;

View File

@ -0,0 +1,15 @@
SubDir HAIKU_TOP src system libroot posix musl regex ;
SubDirSysHdrs [ FDirName $(SUBDIR) .. include ] ;
UseHeaders [ FDirName $(SUBDIR) .. internal ] ;
local architectureObject ;
for architectureObject in [ MultiArchSubDirSetup ] {
on $(architectureObject) {
local architecture = $(TARGET_PACKAGING_ARCH) ;
MergeObject <$(architecture)>posix_musl_regex.o :
fnmatch.c
;
}
}

View File

@ -0,0 +1,320 @@
/*
* An implementation of what I call the "Sea of Stars" algorithm for
* POSIX fnmatch(). The basic idea is that we factor the pattern into
* a head component (which we match first and can reject without ever
* measuring the length of the string), an optional tail component
* (which only exists if the pattern contains at least one star), and
* an optional "sea of stars", a set of star-separated components
* between the head and tail. After the head and tail matches have
* been removed from the input string, the components in the "sea of
* stars" are matched sequentially by searching for their first
* occurrence past the end of the previous match.
*
* - Rich Felker, April 2012
*/
#include <string.h>
#include <fnmatch.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#define END 0
#define UNMATCHABLE -2
#define BRACKET -3
#define QUESTION -4
#define STAR -5
static int str_next(const char *str, size_t n, size_t *step)
{
if (!n) {
*step = 0;
return 0;
}
if (str[0] >= 128U) {
wchar_t wc;
int k = mbtowc(&wc, str, n);
if (k<0) {
*step = 1;
return -1;
}
*step = k;
return wc;
}
*step = 1;
return str[0];
}
static int pat_next(const char *pat, size_t m, size_t *step, int flags)
{
int esc = 0;
if (!m || !*pat) {
*step = 0;
return END;
}
*step = 1;
if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) {
*step = 2;
pat++;
esc = 1;
goto escaped;
}
if (pat[0]=='[') {
size_t k = 1;
if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
if (k<m) if (pat[k] == ']') k++;
for (; k<m && pat[k] && pat[k]!=']'; k++) {
if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
int z = pat[k+1];
k+=2;
if (k<m && pat[k]) k++;
while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
if (k==m || !pat[k]) break;
}
}
if (k==m || !pat[k]) {
*step = 1;
return '[';
}
*step = k+1;
return BRACKET;
}
if (pat[0] == '*')
return STAR;
if (pat[0] == '?')
return QUESTION;
escaped:
if (pat[0] >= 128U) {
wchar_t wc;
int k = mbtowc(&wc, pat, m);
if (k<0) {
*step = 0;
return UNMATCHABLE;
}
*step = k + esc;
return wc;
}
return pat[0];
}
static int casefold(int k)
{
int c = towupper(k);
return c == k ? towlower(k) : c;
}
static int match_bracket(const char *p, int k, int kfold)
{
wchar_t wc;
int inv = 0;
p++;
if (*p=='^' || *p=='!') {
inv = 1;
p++;
}
if (*p==']') {
if (k==']') return !inv;
p++;
} else if (*p=='-') {
if (k=='-') return !inv;
p++;
}
wc = p[-1];
for (; *p != ']'; p++) {
if (p[0]=='-' && p[1]!=']') {
wchar_t wc2;
int l = mbtowc(&wc2, p+1, 4);
if (l < 0) return 0;
if (wc <= wc2)
if ((unsigned)k-wc <= wc2-wc ||
(unsigned)kfold-wc <= wc2-wc)
return !inv;
p += l-1;
continue;
}
if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
const char *p0 = p+2;
int z = p[1];
p+=3;
while (p[-1]!=z || p[0]!=']') p++;
if (z == ':' && p-1-p0 < 16) {
char buf[16];
memcpy(buf, p0, p-1-p0);
buf[p-1-p0] = 0;
if (iswctype(k, wctype(buf)) ||
iswctype(kfold, wctype(buf)))
return !inv;
}
continue;
}
if (*p < 128U) {
wc = (unsigned char)*p;
} else {
int l = mbtowc(&wc, p, 4);
if (l < 0) return 0;
p += l-1;
}
if (wc==k || wc==kfold) return !inv;
}
return inv;
}
static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
{
const char *p, *ptail, *endpat;
const char *s, *stail, *endstr;
size_t pinc, sinc, tailcnt=0;
int c, k, kfold;
if (flags & FNM_PERIOD) {
if (*str == '.' && *pat != '.')
return FNM_NOMATCH;
}
for (;;) {
switch ((c = pat_next(pat, m, &pinc, flags))) {
case UNMATCHABLE:
return FNM_NOMATCH;
case STAR:
pat++;
m--;
break;
default:
k = str_next(str, n, &sinc);
if (k <= 0)
return (c==END) ? 0 : FNM_NOMATCH;
str += sinc;
n -= sinc;
kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
if (c == BRACKET) {
if (!match_bracket(pat, k, kfold))
return FNM_NOMATCH;
} else if (c != QUESTION && k != c && kfold != c) {
return FNM_NOMATCH;
}
pat+=pinc;
m-=pinc;
continue;
}
break;
}
/* Compute real pat length if it was initially unknown/-1 */
m = strnlen(pat, m);
endpat = pat + m;
/* Find the last * in pat and count chars needed after it */
for (p=ptail=pat; p<endpat; p+=pinc) {
switch (pat_next(p, endpat-p, &pinc, flags)) {
case UNMATCHABLE:
return FNM_NOMATCH;
case STAR:
tailcnt=0;
ptail = p+1;
break;
default:
tailcnt++;
break;
}
}
/* Past this point we need not check for UNMATCHABLE in pat,
* because all of pat has already been parsed once. */
/* Compute real str length if it was initially unknown/-1 */
n = strnlen(str, n);
endstr = str + n;
if (n < tailcnt) return FNM_NOMATCH;
/* Find the final tailcnt chars of str, accounting for UTF-8.
* On illegal sequences we may get it wrong, but in that case
* we necessarily have a matching failure anyway. */
for (s=endstr; s>str && tailcnt; tailcnt--) {
if (s[-1] < 128U) s--;
else while ((unsigned char)*--s-0x80U<0x40 && s>str);
}
if (tailcnt) return FNM_NOMATCH;
stail = s;
/* Check that the pat and str tails match */
p = ptail;
for (;;) {
c = pat_next(p, endpat-p, &pinc, flags);
p += pinc;
if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
if (c != END) return FNM_NOMATCH;
break;
}
s += sinc;
kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
if (c == BRACKET) {
if (!match_bracket(p-pinc, k, kfold))
return FNM_NOMATCH;
} else if (c != QUESTION && k != c && kfold != c) {
return FNM_NOMATCH;
}
}
/* We're all done with the tails now, so throw them out */
endstr = stail;
endpat = ptail;
/* Match pattern components until there are none left */
while (pat<endpat) {
p = pat;
s = str;
for (;;) {
c = pat_next(p, endpat-p, &pinc, flags);
p += pinc;
/* Encountering * completes/commits a component */
if (c == STAR) {
pat = p;
str = s;
break;
}
k = str_next(s, endstr-s, &sinc);
if (!k)
return FNM_NOMATCH;
kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
if (c == BRACKET) {
if (!match_bracket(p-pinc, k, kfold))
break;
} else if (c != QUESTION && k != c && kfold != c) {
break;
}
s += sinc;
}
if (c == STAR) continue;
/* If we failed, advance str, by 1 char if it's a valid
* char, or past all invalid bytes otherwise. */
k = str_next(str, endstr-str, &sinc);
if (k > 0) str += sinc;
else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
}
return 0;
}
int fnmatch(const char *pat, const char *str, int flags)
{
const char *s, *p;
size_t inc;
int c;
if (flags & FNM_PATHNAME) for (;;) {
for (s=str; *s && *s!='/'; s++);
for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR)))
return FNM_NOMATCH;
if (fnmatch_internal(pat, p-pat, str, s-str, flags))
return FNM_NOMATCH;
if (!c) return 0;
str = s+1;
pat = p+inc;
} else if (flags & FNM_LEADING_DIR) {
for (s=str; *s; s++) {
if (*s != '/') continue;
if (!fnmatch_internal(pat, -1, str, s-str, flags))
return 0;
}
}
return fnmatch_internal(pat, -1, str, -1, flags);
}