Replace regex files with multibyte-aware version from FreeBSD.
* Makefile.in (install-headers): Remove extra command to install regex.h. (uninstall-headers): Remove extra command to uninstall regex.h. * nlsfuncs.cc (collate_lcid): Make externally available to allow access to collation internals from regex functions. (collate_charset): Ditto. * wchar.h: Add __cplusplus guards to make C-clean. * include/regex.h: New file, replacing regex/regex.h. Remove UCB advertising clause. * regex/COPYRIGHT: Accommodate BSD license. Remove UCB advertising clause. * regex/cclass.h: Remove. * regex/cname.h: New file from FreeBSD. * regex/engine.c: Ditto. (NONCHAR): Tweak for Cygwin. * regex/engine.ih: Remove. * regex/mkh: Remove. * regex/regcomp.c: New file from FreeBSD. Tweak slightly for Cygwin. Import required collate internals from nlsfunc.cc. (p_ere_exp): Add GNU-specific \< and \> handling for word boundaries. (p_simp_re): Ditto. (__collate_range_cmp): Define. (p_b_term): Use Cygwin-specific collate internals. (findmust): Ditto. * regex/regcomp.ih: Remove. * regex/regerror.c: New file from FreeBSD. Fix a few compiler warnings. * regex/regerror.ih: Remove. * regex/regex.7: New file from FreeBSD. Remove UCB advertising clause. * regex/regex.h: Remove. Replaced by include/regex.h. * regex/regexec.c: New file from FreeBSD. Fix a few compiler warnings. * regex/regfree.c: New file from FreeBSD. * regex/tests: Remove. * regex/utils.h: New file from FreeBSD.
This commit is contained in:
parent
c8f7d3cb48
commit
e1e595a649
|
@ -1,3 +1,40 @@
|
||||||
|
2010-02-04 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
|
Replace regex files with multibyte-aware version from FreeBSD.
|
||||||
|
* Makefile.in (install-headers): Remove extra command to install
|
||||||
|
regex.h.
|
||||||
|
(uninstall-headers): Remove extra command to uninstall regex.h.
|
||||||
|
* nlsfuncs.cc (collate_lcid): Make externally available to allow
|
||||||
|
access to collation internals from regex functions.
|
||||||
|
(collate_charset): Ditto.
|
||||||
|
* wchar.h: Add __cplusplus guards to make C-clean.
|
||||||
|
* include/regex.h: New file, replacing regex/regex.h. Remove UCB
|
||||||
|
advertising clause.
|
||||||
|
* regex/COPYRIGHT: Accommodate BSD license. Remove UCB advertising
|
||||||
|
clause.
|
||||||
|
* regex/cclass.h: Remove.
|
||||||
|
* regex/cname.h: New file from FreeBSD.
|
||||||
|
* regex/engine.c: Ditto.
|
||||||
|
(NONCHAR): Tweak for Cygwin.
|
||||||
|
* regex/engine.ih: Remove.
|
||||||
|
* regex/mkh: Remove.
|
||||||
|
* regex/regcomp.c: New file from FreeBSD. Tweak slightly for Cygwin.
|
||||||
|
Import required collate internals from nlsfunc.cc.
|
||||||
|
(p_ere_exp): Add GNU-specific \< and \> handling for word boundaries.
|
||||||
|
(p_simp_re): Ditto.
|
||||||
|
(__collate_range_cmp): Define.
|
||||||
|
(p_b_term): Use Cygwin-specific collate internals.
|
||||||
|
(findmust): Ditto.
|
||||||
|
* regex/regcomp.ih: Remove.
|
||||||
|
* regex/regerror.c: New file from FreeBSD. Fix a few compiler warnings.
|
||||||
|
* regex/regerror.ih: Remove.
|
||||||
|
* regex/regex.7: New file from FreeBSD. Remove UCB advertising clause.
|
||||||
|
* regex/regex.h: Remove. Replaced by include/regex.h.
|
||||||
|
* regex/regexec.c: New file from FreeBSD. Fix a few compiler warnings.
|
||||||
|
* regex/regfree.c: New file from FreeBSD.
|
||||||
|
* regex/tests: Remove.
|
||||||
|
* regex/utils.h: New file from FreeBSD.
|
||||||
|
|
||||||
2010-02-03 Christopher Faylor <me+cygwin@cgf.cx>
|
2010-02-03 Christopher Faylor <me+cygwin@cgf.cx>
|
||||||
|
|
||||||
* sigproc.cc (get_proc_lock): Fix error message typo.
|
* sigproc.cc (get_proc_lock): Fix error message typo.
|
||||||
|
|
|
@ -329,7 +329,6 @@ install-headers:
|
||||||
$(INSTALL_DATA) $$i $(DESTDIR)$(tooldir)/$$sub/`basename $$i` ; \
|
$(INSTALL_DATA) $$i $(DESTDIR)$(tooldir)/$$sub/`basename $$i` ; \
|
||||||
done ; \
|
done ; \
|
||||||
done ; \
|
done ; \
|
||||||
$(INSTALL_DATA) regex/regex.h $(DESTDIR)$(tooldir)/include/regex.h
|
|
||||||
|
|
||||||
install-man:
|
install-man:
|
||||||
@$(MKDIRP) $(DESTDIR)$(mandir)/man2 $(DESTDIR)$(mandir)/man3 $(DESTDIR)$(mandir)/man5 $(DESTDIR)$(mandir)/man7
|
@$(MKDIRP) $(DESTDIR)$(mandir)/man2 $(DESTDIR)$(mandir)/man3 $(DESTDIR)$(mandir)/man5 $(DESTDIR)$(mandir)/man7
|
||||||
|
@ -364,7 +363,6 @@ uninstall-headers:
|
||||||
rm -f $(tooldir)/$$sub/`basename $$i` ; \
|
rm -f $(tooldir)/$$sub/`basename $$i` ; \
|
||||||
done ; \
|
done ; \
|
||||||
done ; \
|
done ; \
|
||||||
rm -f $(tooldir)/include/regex.h
|
|
||||||
|
|
||||||
uninstall-man:
|
uninstall-man:
|
||||||
cd $(srcdir); \
|
cd $(srcdir); \
|
||||||
|
|
|
@ -0,0 +1,130 @@
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1992 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer of the University of Toronto.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regex.h 8.2 (Berkeley) 1/3/94
|
||||||
|
* $FreeBSD$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _REGEX_H_
|
||||||
|
#define _REGEX_H_
|
||||||
|
|
||||||
|
#include <sys/cdefs.h>
|
||||||
|
#include <sys/_types.h>
|
||||||
|
|
||||||
|
/* types */
|
||||||
|
#ifdef __CYGWIN__
|
||||||
|
typedef _off_t regoff_t;
|
||||||
|
|
||||||
|
#define __need_size_t
|
||||||
|
#include <stddef.h>
|
||||||
|
#else /* !__CYGWIN__ */
|
||||||
|
typedef __off_t regoff_t;
|
||||||
|
|
||||||
|
#ifndef _SIZE_T_DECLARED
|
||||||
|
typedef __size_t size_t;
|
||||||
|
#define _SIZE_T_DECLARED
|
||||||
|
#endif
|
||||||
|
#endif /* !__CYGWIN__ */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int re_magic;
|
||||||
|
size_t re_nsub; /* number of parenthesized subexpressions */
|
||||||
|
#ifdef __CYGWIN__
|
||||||
|
const char *re_endp; /* end pointer for REG_PEND */
|
||||||
|
#else
|
||||||
|
__const char *re_endp; /* end pointer for REG_PEND */
|
||||||
|
#endif
|
||||||
|
struct re_guts *re_g; /* none of your business :-) */
|
||||||
|
} regex_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
regoff_t rm_so; /* start of match */
|
||||||
|
regoff_t rm_eo; /* end of match */
|
||||||
|
} regmatch_t;
|
||||||
|
|
||||||
|
/* regcomp() flags */
|
||||||
|
#define REG_BASIC 0000
|
||||||
|
#define REG_EXTENDED 0001
|
||||||
|
#define REG_ICASE 0002
|
||||||
|
#define REG_NOSUB 0004
|
||||||
|
#define REG_NEWLINE 0010
|
||||||
|
#define REG_NOSPEC 0020
|
||||||
|
#define REG_PEND 0040
|
||||||
|
#define REG_DUMP 0200
|
||||||
|
|
||||||
|
/* regerror() flags */
|
||||||
|
#define REG_ENOSYS (-1)
|
||||||
|
#ifdef __CYGWIN__
|
||||||
|
#define REG_NOERROR 0 /* GNU extension */
|
||||||
|
#endif
|
||||||
|
#define REG_NOMATCH 1
|
||||||
|
#define REG_BADPAT 2
|
||||||
|
#define REG_ECOLLATE 3
|
||||||
|
#define REG_ECTYPE 4
|
||||||
|
#define REG_EESCAPE 5
|
||||||
|
#define REG_ESUBREG 6
|
||||||
|
#define REG_EBRACK 7
|
||||||
|
#define REG_EPAREN 8
|
||||||
|
#define REG_EBRACE 9
|
||||||
|
#define REG_BADBR 10
|
||||||
|
#define REG_ERANGE 11
|
||||||
|
#define REG_ESPACE 12
|
||||||
|
#define REG_BADRPT 13
|
||||||
|
#define REG_EMPTY 14
|
||||||
|
#define REG_ASSERT 15
|
||||||
|
#define REG_INVARG 16
|
||||||
|
#define REG_ILLSEQ 17
|
||||||
|
#define REG_ATOI 255 /* convert name to number (!) */
|
||||||
|
#define REG_ITOA 0400 /* convert number to name (!) */
|
||||||
|
|
||||||
|
/* regexec() flags */
|
||||||
|
#define REG_NOTBOL 00001
|
||||||
|
#define REG_NOTEOL 00002
|
||||||
|
#define REG_STARTEND 00004
|
||||||
|
#define REG_TRACE 00400 /* tracing of execution */
|
||||||
|
#define REG_LARGE 01000 /* force large representation */
|
||||||
|
#define REG_BACKR 02000 /* force use of backref code */
|
||||||
|
|
||||||
|
__BEGIN_DECLS
|
||||||
|
int regcomp(regex_t * __restrict, const char * __restrict, int);
|
||||||
|
size_t regerror(int, const regex_t * __restrict, char * __restrict, size_t);
|
||||||
|
/*
|
||||||
|
* XXX forth parameter should be `regmatch_t [__restrict]', but isn't because
|
||||||
|
* of a bug in GCC 3.2 (when -std=c99 is specified) which perceives this as a
|
||||||
|
* syntax error.
|
||||||
|
*/
|
||||||
|
int regexec(const regex_t * __restrict, const char * __restrict, size_t,
|
||||||
|
regmatch_t * __restrict, int);
|
||||||
|
void regfree(regex_t *);
|
||||||
|
__END_DECLS
|
||||||
|
|
||||||
|
#endif /* !_REGEX_H_ */
|
|
@ -638,9 +638,9 @@ __set_lc_monetary_from_win (const char *name,
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static LCID collate_lcid = 0;
|
LCID collate_lcid = 0;
|
||||||
static mbtowc_p collate_mbtowc = __ascii_mbtowc;
|
static mbtowc_p collate_mbtowc = __ascii_mbtowc;
|
||||||
static char collate_charset[ENCODING_LEN + 1] = "ASCII";
|
char collate_charset[ENCODING_LEN + 1] = "ASCII";
|
||||||
|
|
||||||
/* Called from newlib's setlocale() if category is LC_COLLATE. Stores
|
/* Called from newlib's setlocale() if category is LC_COLLATE. Stores
|
||||||
LC_COLLATE locale information. This is subsequently accessed by the
|
LC_COLLATE locale information. This is subsequently accessed by the
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved.
|
Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved.
|
||||||
This software is not subject to any license of the American Telephone
|
This software is not subject to any license of the American Telephone
|
||||||
and Telegraph Company or of the Regents of the University of California.
|
and Telegraph Company or of the Regents of the University of California.
|
||||||
|
|
||||||
|
@ -18,3 +18,35 @@ to the following restrictions:
|
||||||
ever read sources, credits must appear in the documentation.
|
ever read sources, credits must appear in the documentation.
|
||||||
|
|
||||||
4. This notice may not be removed or altered.
|
4. This notice may not be removed or altered.
|
||||||
|
|
||||||
|
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94
|
||||||
|
*/
|
||||||
|
|
|
@ -1,31 +0,0 @@
|
||||||
/* character-class table */
|
|
||||||
static struct cclass {
|
|
||||||
const char *name;
|
|
||||||
const char *chars;
|
|
||||||
const char *multis;
|
|
||||||
} cclasses[] = {
|
|
||||||
{"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
|
||||||
0123456789", ""},
|
|
||||||
{"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
|
|
||||||
""},
|
|
||||||
{"blank", " \t", ""},
|
|
||||||
{"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
|
|
||||||
\25\26\27\30\31\32\33\34\35\36\37\177", ""},
|
|
||||||
{"digit", "0123456789", ""},
|
|
||||||
{"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
|
||||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
|
||||||
""},
|
|
||||||
{"lower", "abcdefghijklmnopqrstuvwxyz",
|
|
||||||
""},
|
|
||||||
{"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
|
||||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
|
|
||||||
""},
|
|
||||||
{"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
|
||||||
""},
|
|
||||||
{"space", "\t\n\v\f\r ", ""},
|
|
||||||
{"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
||||||
""},
|
|
||||||
{"xdigit", "0123456789ABCDEFabcdef",
|
|
||||||
""},
|
|
||||||
{NULL, 0, ""}
|
|
||||||
};
|
|
|
@ -1,82 +1,118 @@
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)cname.h 8.3 (Berkeley) 3/20/94
|
||||||
|
* $FreeBSD: src/lib/libc/regex/cname.h,v 1.4 2007/01/09 00:28:04 imp Exp $
|
||||||
|
*/
|
||||||
|
|
||||||
/* character-name table */
|
/* character-name table */
|
||||||
static struct cname {
|
static struct cname {
|
||||||
const char *name;
|
const char *name;
|
||||||
char code;
|
char code;
|
||||||
} cnames[] = {
|
} cnames[] = {
|
||||||
{"NUL", '\0'},
|
{"NUL", '\0'},
|
||||||
{"SOH", '\001'},
|
{"SOH", '\001'},
|
||||||
{"STX", '\002'},
|
{"STX", '\002'},
|
||||||
{"ETX", '\003'},
|
{"ETX", '\003'},
|
||||||
{"EOT", '\004'},
|
{"EOT", '\004'},
|
||||||
{"ENQ", '\005'},
|
{"ENQ", '\005'},
|
||||||
{"ACK", '\006'},
|
{"ACK", '\006'},
|
||||||
{"BEL", '\007'},
|
{"BEL", '\007'},
|
||||||
{"alert", '\007'},
|
{"alert", '\007'},
|
||||||
{"BS", '\010'},
|
{"BS", '\010'},
|
||||||
{"backspace", '\b'},
|
{"backspace", '\b'},
|
||||||
{"HT", '\011'},
|
{"HT", '\011'},
|
||||||
{"tab", '\t'},
|
{"tab", '\t'},
|
||||||
{"LF", '\012'},
|
{"LF", '\012'},
|
||||||
{"newline", '\n'},
|
{"newline", '\n'},
|
||||||
{"VT", '\013'},
|
{"VT", '\013'},
|
||||||
{"vertical-tab", '\v'},
|
{"vertical-tab", '\v'},
|
||||||
{"FF", '\014'},
|
{"FF", '\014'},
|
||||||
{"form-feed", '\f'},
|
{"form-feed", '\f'},
|
||||||
{"CR", '\015'},
|
{"CR", '\015'},
|
||||||
{"carriage-return", '\r'},
|
{"carriage-return", '\r'},
|
||||||
{"SO", '\016'},
|
{"SO", '\016'},
|
||||||
{"SI", '\017'},
|
{"SI", '\017'},
|
||||||
{"DLE", '\020'},
|
{"DLE", '\020'},
|
||||||
{"DC1", '\021'},
|
{"DC1", '\021'},
|
||||||
{"DC2", '\022'},
|
{"DC2", '\022'},
|
||||||
{"DC3", '\023'},
|
{"DC3", '\023'},
|
||||||
{"DC4", '\024'},
|
{"DC4", '\024'},
|
||||||
{"NAK", '\025'},
|
{"NAK", '\025'},
|
||||||
{"SYN", '\026'},
|
{"SYN", '\026'},
|
||||||
{"ETB", '\027'},
|
{"ETB", '\027'},
|
||||||
{"CAN", '\030'},
|
{"CAN", '\030'},
|
||||||
{"EM", '\031'},
|
{"EM", '\031'},
|
||||||
{"SUB", '\032'},
|
{"SUB", '\032'},
|
||||||
{"ESC", '\033'},
|
{"ESC", '\033'},
|
||||||
{"IS4", '\034'},
|
{"IS4", '\034'},
|
||||||
{"FS", '\034'},
|
{"FS", '\034'},
|
||||||
{"IS3", '\035'},
|
{"IS3", '\035'},
|
||||||
{"GS", '\035'},
|
{"GS", '\035'},
|
||||||
{"IS2", '\036'},
|
{"IS2", '\036'},
|
||||||
{"RS", '\036'},
|
{"RS", '\036'},
|
||||||
{"IS1", '\037'},
|
{"IS1", '\037'},
|
||||||
{"US", '\037'},
|
{"US", '\037'},
|
||||||
{"space", ' '},
|
{"space", ' '},
|
||||||
{"exclamation-mark", '!'},
|
{"exclamation-mark", '!'},
|
||||||
{"quotation-mark", '"'},
|
{"quotation-mark", '"'},
|
||||||
{"number-sign", '#'},
|
{"number-sign", '#'},
|
||||||
{"dollar-sign", '$'},
|
{"dollar-sign", '$'},
|
||||||
{"percent-sign", '%'},
|
{"percent-sign", '%'},
|
||||||
{"ampersand", '&'},
|
{"ampersand", '&'},
|
||||||
{"apostrophe", '\''},
|
{"apostrophe", '\''},
|
||||||
{"left-parenthesis", '('},
|
{"left-parenthesis", '('},
|
||||||
{"right-parenthesis", ')'},
|
{"right-parenthesis", ')'},
|
||||||
{"asterisk", '*'},
|
{"asterisk", '*'},
|
||||||
{"plus-sign", '+'},
|
{"plus-sign", '+'},
|
||||||
{"comma", ','},
|
{"comma", ','},
|
||||||
{"hyphen", '-'},
|
{"hyphen", '-'},
|
||||||
{"hyphen-minus", '-'},
|
{"hyphen-minus", '-'},
|
||||||
{"period", '.'},
|
{"period", '.'},
|
||||||
{"full-stop", '.'},
|
{"full-stop", '.'},
|
||||||
{"slash", '/'},
|
{"slash", '/'},
|
||||||
{"solidus", '/'},
|
{"solidus", '/'},
|
||||||
{"zero", '0'},
|
{"zero", '0'},
|
||||||
{"one", '1'},
|
{"one", '1'},
|
||||||
{"two", '2'},
|
{"two", '2'},
|
||||||
{"three", '3'},
|
{"three", '3'},
|
||||||
{"four", '4'},
|
{"four", '4'},
|
||||||
{"five", '5'},
|
{"five", '5'},
|
||||||
{"six", '6'},
|
{"six", '6'},
|
||||||
{"seven", '7'},
|
{"seven", '7'},
|
||||||
{"eight", '8'},
|
{"eight", '8'},
|
||||||
{"nine", '9'},
|
{"nine", '9'},
|
||||||
{"colon", ':'},
|
{"colon", ':'},
|
||||||
{"semicolon", ';'},
|
{"semicolon", ';'},
|
||||||
{"less-than-sign", '<'},
|
{"less-than-sign", '<'},
|
||||||
{"equals-sign", '='},
|
{"equals-sign", '='},
|
||||||
{"greater-than-sign", '>'},
|
{"greater-than-sign", '>'},
|
||||||
|
@ -85,12 +121,12 @@ static struct cname {
|
||||||
{"left-square-bracket", '['},
|
{"left-square-bracket", '['},
|
||||||
{"backslash", '\\'},
|
{"backslash", '\\'},
|
||||||
{"reverse-solidus", '\\'},
|
{"reverse-solidus", '\\'},
|
||||||
{"right-square-bracket", ']'},
|
{"right-square-bracket",']'},
|
||||||
{"circumflex", '^'},
|
{"circumflex", '^'},
|
||||||
{"circumflex-accent", '^'},
|
{"circumflex-accent", '^'},
|
||||||
{"underscore", '_'},
|
{"underscore", '_'},
|
||||||
{"low-line", '_'},
|
{"low-line", '_'},
|
||||||
{"grave-accent", '`'},
|
{"grave-accent", '`'},
|
||||||
{"left-brace", '{'},
|
{"left-brace", '{'},
|
||||||
{"left-curly-bracket", '{'},
|
{"left-curly-bracket", '{'},
|
||||||
{"vertical-line", '|'},
|
{"vertical-line", '|'},
|
||||||
|
|
|
@ -1,3 +1,41 @@
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)engine.c 8.5 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/cdefs.h>
|
||||||
|
__FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds Exp $");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The matching engine and friends. This file is #included by regexec.c
|
* The matching engine and friends. This file is #included by regexec.c
|
||||||
* after suitable #defines of a variety of macros used herein, so that
|
* after suitable #defines of a variety of macros used herein, so that
|
||||||
|
@ -27,25 +65,77 @@
|
||||||
#define at lat
|
#define at lat
|
||||||
#define match lmat
|
#define match lmat
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef MNAMES
|
||||||
|
#define matcher mmatcher
|
||||||
|
#define fast mfast
|
||||||
|
#define slow mslow
|
||||||
|
#define dissect mdissect
|
||||||
|
#define backref mbackref
|
||||||
|
#define step mstep
|
||||||
|
#define print mprint
|
||||||
|
#define at mat
|
||||||
|
#define match mmat
|
||||||
|
#endif
|
||||||
|
|
||||||
/* another structure passed up and down to avoid zillions of parameters */
|
/* another structure passed up and down to avoid zillions of parameters */
|
||||||
struct match {
|
struct match {
|
||||||
struct re_guts *g;
|
struct re_guts *g;
|
||||||
int eflags;
|
int eflags;
|
||||||
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
|
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
|
||||||
char *offp; /* offsets work from here */
|
const char *offp; /* offsets work from here */
|
||||||
char *beginp; /* start of string -- virtual NUL precedes */
|
const char *beginp; /* start of string -- virtual NUL precedes */
|
||||||
char *endp; /* end of string -- virtual NUL here */
|
const char *endp; /* end of string -- virtual NUL here */
|
||||||
char *coldp; /* can be no match starting before here */
|
const char *coldp; /* can be no match starting before here */
|
||||||
char **lastpos; /* [nplus+1] */
|
const char **lastpos; /* [nplus+1] */
|
||||||
STATEVARS;
|
STATEVARS;
|
||||||
states st; /* current states */
|
states st; /* current states */
|
||||||
states fresh; /* states for a fresh start */
|
states fresh; /* states for a fresh start */
|
||||||
states tmp; /* temporary */
|
states tmp; /* temporary */
|
||||||
states empty; /* empty set of states */
|
states empty; /* empty set of states */
|
||||||
|
mbstate_t mbs; /* multibyte conversion state */
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "engine.ih"
|
/* ========= begin header generated by ./mkh ========= */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* === engine.c === */
|
||||||
|
static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||||
|
static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||||
|
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
|
||||||
|
static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||||
|
static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||||
|
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
|
||||||
|
#define MAX_RECURSION 100
|
||||||
|
#define BOL (OUT-1)
|
||||||
|
#define EOL (BOL-1)
|
||||||
|
#define BOLEOL (BOL-2)
|
||||||
|
#define NOTHING (BOL-3)
|
||||||
|
#define BOW (BOL-4)
|
||||||
|
#define EOW (BOL-5)
|
||||||
|
#define BADCHAR (BOL-6)
|
||||||
|
#ifdef __CYGWIN__
|
||||||
|
/* In contrast to BSD, wint_t on Cygwin is unsigned. This breaks this test,
|
||||||
|
unless the compared values are casted to signed. */
|
||||||
|
#define NONCHAR(c) ((int)(c) <= (int)OUT)
|
||||||
|
#else
|
||||||
|
#define NONCHAR(c) ((c) <= OUT)
|
||||||
|
#endif
|
||||||
|
#ifdef REDEBUG
|
||||||
|
static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
|
||||||
|
#endif
|
||||||
|
#ifdef REDEBUG
|
||||||
|
static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||||
|
#endif
|
||||||
|
#ifdef REDEBUG
|
||||||
|
static const char *pchar(int ch);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/* ========= end header generated by ./mkh ========= */
|
||||||
|
|
||||||
#ifdef REDEBUG
|
#ifdef REDEBUG
|
||||||
#define SP(t, s, c) print(m, t, s, c, stdout)
|
#define SP(t, s, c) print(m, t, s, c, stdout)
|
||||||
|
@ -59,26 +149,32 @@ struct match {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
- matcher - the actual matching engine
|
- matcher - the actual matching engine
|
||||||
== static int matcher(register struct re_guts *g, char *string, \
|
== static int matcher(struct re_guts *g, const char *string, \
|
||||||
== size_t nmatch, regmatch_t pmatch[], int eflags);
|
== size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||||
*/
|
*/
|
||||||
static int /* 0 success, REG_NOMATCH failure */
|
static int /* 0 success, REG_NOMATCH failure */
|
||||||
matcher(g, string, nmatch, pmatch, eflags)
|
matcher(struct re_guts *g,
|
||||||
register struct re_guts *g;
|
const char *string,
|
||||||
char *string;
|
size_t nmatch,
|
||||||
size_t nmatch;
|
regmatch_t pmatch[],
|
||||||
regmatch_t pmatch[];
|
int eflags)
|
||||||
int eflags;
|
|
||||||
{
|
{
|
||||||
register char *endp;
|
const char *endp;
|
||||||
register size_t i;
|
int i;
|
||||||
struct match mv;
|
struct match mv;
|
||||||
register struct match *m = &mv;
|
struct match *m = &mv;
|
||||||
register char *dp;
|
const char *dp;
|
||||||
const register sopno gf = g->firststate+1; /* +1 for OEND */
|
const sopno gf = g->firststate+1; /* +1 for OEND */
|
||||||
const register sopno gl = g->laststate;
|
const sopno gl = g->laststate;
|
||||||
char *start;
|
const char *start;
|
||||||
char *stop;
|
const char *stop;
|
||||||
|
/* Boyer-Moore algorithms variables */
|
||||||
|
const char *pp;
|
||||||
|
int cj, mj;
|
||||||
|
const char *mustfirst;
|
||||||
|
const char *mustlast;
|
||||||
|
int *matchjump;
|
||||||
|
int *charjump;
|
||||||
|
|
||||||
/* simplify the situation where possible */
|
/* simplify the situation where possible */
|
||||||
if (g->cflags®_NOSUB)
|
if (g->cflags®_NOSUB)
|
||||||
|
@ -95,12 +191,46 @@ int eflags;
|
||||||
|
|
||||||
/* prescreening; this does wonders for this rather slow code */
|
/* prescreening; this does wonders for this rather slow code */
|
||||||
if (g->must != NULL) {
|
if (g->must != NULL) {
|
||||||
for (dp = start; dp < stop; dp++)
|
if (g->charjump != NULL && g->matchjump != NULL) {
|
||||||
if (*dp == g->must[0] && stop - dp >= g->mlen &&
|
mustfirst = g->must;
|
||||||
memcmp(dp, g->must, (size_t)g->mlen) == 0)
|
mustlast = g->must + g->mlen - 1;
|
||||||
break;
|
charjump = g->charjump;
|
||||||
if (dp == stop) /* we didn't find g->must */
|
matchjump = g->matchjump;
|
||||||
return(REG_NOMATCH);
|
pp = mustlast;
|
||||||
|
for (dp = start+g->mlen-1; dp < stop;) {
|
||||||
|
/* Fast skip non-matches */
|
||||||
|
while (dp < stop && charjump[(int)*dp])
|
||||||
|
dp += charjump[(int)*dp];
|
||||||
|
|
||||||
|
if (dp >= stop)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Greedy matcher */
|
||||||
|
/* We depend on not being used for
|
||||||
|
* for strings of length 1
|
||||||
|
*/
|
||||||
|
while (*--dp == *--pp && pp != mustfirst);
|
||||||
|
|
||||||
|
if (*dp == *pp)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Jump to next possible match */
|
||||||
|
mj = matchjump[pp - mustfirst];
|
||||||
|
cj = charjump[(int)*dp];
|
||||||
|
dp += (cj < mj ? mj : cj);
|
||||||
|
pp = mustlast;
|
||||||
|
}
|
||||||
|
if (pp != mustfirst)
|
||||||
|
return(REG_NOMATCH);
|
||||||
|
} else {
|
||||||
|
for (dp = start; dp < stop; dp++)
|
||||||
|
if (*dp == g->must[0] &&
|
||||||
|
stop - dp >= g->mlen &&
|
||||||
|
memcmp(dp, g->must, (size_t)g->mlen) == 0)
|
||||||
|
break;
|
||||||
|
if (dp == stop) /* we didn't find g->must */
|
||||||
|
return(REG_NOMATCH);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* match struct setup */
|
/* match struct setup */
|
||||||
|
@ -117,11 +247,22 @@ int eflags;
|
||||||
SETUP(m->tmp);
|
SETUP(m->tmp);
|
||||||
SETUP(m->empty);
|
SETUP(m->empty);
|
||||||
CLEAR(m->empty);
|
CLEAR(m->empty);
|
||||||
|
ZAPSTATE(&m->mbs);
|
||||||
|
|
||||||
|
/* Adjust start according to moffset, to speed things up */
|
||||||
|
if (g->moffset > -1)
|
||||||
|
start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
|
||||||
|
|
||||||
|
SP("mloop", m->st, *start);
|
||||||
|
|
||||||
/* this loop does only one repetition except for backrefs */
|
/* this loop does only one repetition except for backrefs */
|
||||||
for (;;) {
|
for (;;) {
|
||||||
endp = fast(m, start, stop, gf, gl);
|
endp = fast(m, start, stop, gf, gl);
|
||||||
if (endp == NULL) { /* a miss */
|
if (endp == NULL) { /* a miss */
|
||||||
|
if (m->pmatch != NULL)
|
||||||
|
free((char *)m->pmatch);
|
||||||
|
if (m->lastpos != NULL)
|
||||||
|
free((char *)m->lastpos);
|
||||||
STATETEARDOWN(m);
|
STATETEARDOWN(m);
|
||||||
return(REG_NOMATCH);
|
return(REG_NOMATCH);
|
||||||
}
|
}
|
||||||
|
@ -136,7 +277,8 @@ int eflags;
|
||||||
if (endp != NULL)
|
if (endp != NULL)
|
||||||
break;
|
break;
|
||||||
assert(m->coldp < m->endp);
|
assert(m->coldp < m->endp);
|
||||||
m->coldp++;
|
m->coldp += XMBRTOWC(NULL, m->coldp,
|
||||||
|
m->endp - m->coldp, &m->mbs, 0);
|
||||||
}
|
}
|
||||||
if (nmatch == 1 && !g->backrefs)
|
if (nmatch == 1 && !g->backrefs)
|
||||||
break; /* no further info needed */
|
break; /* no further info needed */
|
||||||
|
@ -156,15 +298,15 @@ int eflags;
|
||||||
dp = dissect(m, m->coldp, endp, gf, gl);
|
dp = dissect(m, m->coldp, endp, gf, gl);
|
||||||
} else {
|
} else {
|
||||||
if (g->nplus > 0 && m->lastpos == NULL)
|
if (g->nplus > 0 && m->lastpos == NULL)
|
||||||
m->lastpos = (char **)malloc((g->nplus+1) *
|
m->lastpos = malloc((g->nplus+1) *
|
||||||
sizeof(char *));
|
sizeof(const char *));
|
||||||
if (g->nplus > 0 && m->lastpos == NULL) {
|
if (g->nplus > 0 && m->lastpos == NULL) {
|
||||||
free(m->pmatch);
|
free(m->pmatch);
|
||||||
STATETEARDOWN(m);
|
STATETEARDOWN(m);
|
||||||
return(REG_ESPACE);
|
return(REG_ESPACE);
|
||||||
}
|
}
|
||||||
NOTE("backref dissect");
|
NOTE("backref dissect");
|
||||||
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
|
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
|
||||||
}
|
}
|
||||||
if (dp != NULL)
|
if (dp != NULL)
|
||||||
break;
|
break;
|
||||||
|
@ -187,7 +329,7 @@ int eflags;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
NOTE("backoff dissect");
|
NOTE("backoff dissect");
|
||||||
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
|
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
|
||||||
}
|
}
|
||||||
assert(dp == NULL || dp == endp);
|
assert(dp == NULL || dp == endp);
|
||||||
if (dp != NULL) /* found a shorter one */
|
if (dp != NULL) /* found a shorter one */
|
||||||
|
@ -195,7 +337,9 @@ int eflags;
|
||||||
|
|
||||||
/* despite initial appearances, there is no match here */
|
/* despite initial appearances, there is no match here */
|
||||||
NOTE("false alarm");
|
NOTE("false alarm");
|
||||||
start = m->coldp + 1; /* recycle starting later */
|
/* recycle starting later */
|
||||||
|
start = m->coldp + XMBRTOWC(NULL, m->coldp,
|
||||||
|
stop - m->coldp, &m->mbs, 0);
|
||||||
assert(start <= stop);
|
assert(start <= stop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -225,30 +369,29 @@ int eflags;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
- dissect - figure out what matched what, no back references
|
- dissect - figure out what matched what, no back references
|
||||||
== static char *dissect(register struct match *m, char *start, \
|
== static const char *dissect(struct match *m, const char *start, \
|
||||||
== char *stop, sopno startst, sopno stopst);
|
== const char *stop, sopno startst, sopno stopst);
|
||||||
*/
|
*/
|
||||||
static char * /* == stop (success) always */
|
static const char * /* == stop (success) always */
|
||||||
dissect(m, start, stop, startst, stopst)
|
dissect(struct match *m,
|
||||||
register struct match *m;
|
const char *start,
|
||||||
char *start;
|
const char *stop,
|
||||||
char *stop;
|
sopno startst,
|
||||||
sopno startst;
|
sopno stopst)
|
||||||
sopno stopst;
|
|
||||||
{
|
{
|
||||||
register int i;
|
int i;
|
||||||
register sopno ss; /* start sop of current subRE */
|
sopno ss; /* start sop of current subRE */
|
||||||
register sopno es; /* end sop of current subRE */
|
sopno es; /* end sop of current subRE */
|
||||||
register char *sp; /* start of string matched by it */
|
const char *sp; /* start of string matched by it */
|
||||||
register char *stp; /* string matched by it cannot pass here */
|
const char *stp; /* string matched by it cannot pass here */
|
||||||
register char *rest; /* start of rest of string */
|
const char *rest; /* start of rest of string */
|
||||||
register char *tail; /* string unmatched by rest of RE */
|
const char *tail; /* string unmatched by rest of RE */
|
||||||
register sopno ssub; /* start sop of subsubRE */
|
sopno ssub; /* start sop of subsubRE */
|
||||||
register sopno esub; /* end sop of subsubRE */
|
sopno esub; /* end sop of subsubRE */
|
||||||
register char *ssp; /* start of string matched by subsubRE */
|
const char *ssp; /* start of string matched by subsubRE */
|
||||||
register char *sep; /* end of string matched by subsubRE */
|
const char *sep; /* end of string matched by subsubRE */
|
||||||
register char *oldssp; /* previous ssp */
|
const char *oldssp; /* previous ssp */
|
||||||
register char *dp;
|
const char *dp;
|
||||||
|
|
||||||
AT("diss", start, stop, startst, stopst);
|
AT("diss", start, stop, startst, stopst);
|
||||||
sp = start;
|
sp = start;
|
||||||
|
@ -273,7 +416,7 @@ sopno stopst;
|
||||||
assert(nope);
|
assert(nope);
|
||||||
break;
|
break;
|
||||||
case OCHAR:
|
case OCHAR:
|
||||||
sp++;
|
sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
|
||||||
break;
|
break;
|
||||||
case OBOL:
|
case OBOL:
|
||||||
case OEOL:
|
case OEOL:
|
||||||
|
@ -282,7 +425,7 @@ sopno stopst;
|
||||||
break;
|
break;
|
||||||
case OANY:
|
case OANY:
|
||||||
case OANYOF:
|
case OANYOF:
|
||||||
sp++;
|
sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
|
||||||
break;
|
break;
|
||||||
case OBACK_:
|
case OBACK_:
|
||||||
case O_BACK:
|
case O_BACK:
|
||||||
|
@ -413,30 +556,31 @@ sopno stopst;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
- backref - figure out what matched what, figuring in back references
|
- backref - figure out what matched what, figuring in back references
|
||||||
== static char *backref(register struct match *m, char *start, \
|
== static const char *backref(struct match *m, const char *start, \
|
||||||
== char *stop, sopno startst, sopno stopst, sopno lev);
|
== const char *stop, sopno startst, sopno stopst, sopno lev);
|
||||||
*/
|
*/
|
||||||
static char * /* == stop (success) or NULL (failure) */
|
static const char * /* == stop (success) or NULL (failure) */
|
||||||
backref(m, start, stop, startst, stopst, lev)
|
backref(struct match *m,
|
||||||
register struct match *m;
|
const char *start,
|
||||||
char *start;
|
const char *stop,
|
||||||
char *stop;
|
sopno startst,
|
||||||
sopno startst;
|
sopno stopst,
|
||||||
sopno stopst;
|
sopno lev, /* PLUS nesting level */
|
||||||
sopno lev; /* PLUS nesting level */
|
int rec)
|
||||||
{
|
{
|
||||||
register int i;
|
int i;
|
||||||
register sopno ss; /* start sop of current subRE */
|
sopno ss; /* start sop of current subRE */
|
||||||
register char *sp; /* start of string matched by it */
|
const char *sp; /* start of string matched by it */
|
||||||
register sopno ssub; /* start sop of subsubRE */
|
sopno ssub; /* start sop of subsubRE */
|
||||||
register sopno esub; /* end sop of subsubRE */
|
sopno esub; /* end sop of subsubRE */
|
||||||
register char *ssp; /* start of string matched by subsubRE */
|
const char *ssp; /* start of string matched by subsubRE */
|
||||||
register char *dp;
|
const char *dp;
|
||||||
register size_t len;
|
size_t len;
|
||||||
register int hard;
|
int hard;
|
||||||
register sop s;
|
sop s;
|
||||||
register regoff_t offsave;
|
regoff_t offsave;
|
||||||
register cset *cs;
|
cset *cs;
|
||||||
|
wint_t wc;
|
||||||
|
|
||||||
AT("back", start, stop, startst, stopst);
|
AT("back", start, stop, startst, stopst);
|
||||||
sp = start;
|
sp = start;
|
||||||
|
@ -446,17 +590,25 @@ sopno lev; /* PLUS nesting level */
|
||||||
for (ss = startst; !hard && ss < stopst; ss++)
|
for (ss = startst; !hard && ss < stopst; ss++)
|
||||||
switch (OP(s = m->g->strip[ss])) {
|
switch (OP(s = m->g->strip[ss])) {
|
||||||
case OCHAR:
|
case OCHAR:
|
||||||
if (sp == stop || *sp++ != (char)OPND(s))
|
if (sp == stop)
|
||||||
|
return(NULL);
|
||||||
|
sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
|
||||||
|
if (wc != OPND(s))
|
||||||
return(NULL);
|
return(NULL);
|
||||||
break;
|
break;
|
||||||
case OANY:
|
case OANY:
|
||||||
if (sp == stop)
|
if (sp == stop)
|
||||||
return(NULL);
|
return(NULL);
|
||||||
sp++;
|
sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
|
||||||
|
if (wc == BADCHAR)
|
||||||
|
return (NULL);
|
||||||
break;
|
break;
|
||||||
case OANYOF:
|
case OANYOF:
|
||||||
|
if (sp == stop)
|
||||||
|
return (NULL);
|
||||||
cs = &m->g->sets[OPND(s)];
|
cs = &m->g->sets[OPND(s)];
|
||||||
if (sp == stop || !CHIN(cs, *sp++))
|
sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
|
||||||
|
if (wc == BADCHAR || !CHIN(cs, wc))
|
||||||
return(NULL);
|
return(NULL);
|
||||||
break;
|
break;
|
||||||
case OBOL:
|
case OBOL:
|
||||||
|
@ -529,6 +681,8 @@ sopno lev; /* PLUS nesting level */
|
||||||
return(NULL);
|
return(NULL);
|
||||||
assert(m->pmatch[i].rm_so != -1);
|
assert(m->pmatch[i].rm_so != -1);
|
||||||
len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
|
len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
|
||||||
|
if (len == 0 && rec++ > MAX_RECURSION)
|
||||||
|
return(NULL);
|
||||||
assert(stop - m->beginp >= len);
|
assert(stop - m->beginp >= len);
|
||||||
if (sp > stop - len)
|
if (sp > stop - len)
|
||||||
return(NULL); /* not enough left to match */
|
return(NULL); /* not enough left to match */
|
||||||
|
@ -537,28 +691,28 @@ sopno lev; /* PLUS nesting level */
|
||||||
return(NULL);
|
return(NULL);
|
||||||
while (m->g->strip[ss] != SOP(O_BACK, i))
|
while (m->g->strip[ss] != SOP(O_BACK, i))
|
||||||
ss++;
|
ss++;
|
||||||
return(backref(m, sp+len, stop, ss+1, stopst, lev));
|
return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
|
||||||
break;
|
break;
|
||||||
case OQUEST_: /* to null or not */
|
case OQUEST_: /* to null or not */
|
||||||
dp = backref(m, sp, stop, ss+1, stopst, lev);
|
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
|
||||||
if (dp != NULL)
|
if (dp != NULL)
|
||||||
return(dp); /* not */
|
return(dp); /* not */
|
||||||
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
|
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
|
||||||
break;
|
break;
|
||||||
case OPLUS_:
|
case OPLUS_:
|
||||||
assert(m->lastpos != NULL);
|
assert(m->lastpos != NULL);
|
||||||
assert(lev+1 <= m->g->nplus);
|
assert(lev+1 <= m->g->nplus);
|
||||||
m->lastpos[lev+1] = sp;
|
m->lastpos[lev+1] = sp;
|
||||||
return(backref(m, sp, stop, ss+1, stopst, lev+1));
|
return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
|
||||||
break;
|
break;
|
||||||
case O_PLUS:
|
case O_PLUS:
|
||||||
if (sp == m->lastpos[lev]) /* last pass matched null */
|
if (sp == m->lastpos[lev]) /* last pass matched null */
|
||||||
return(backref(m, sp, stop, ss+1, stopst, lev-1));
|
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
|
||||||
/* try another pass */
|
/* try another pass */
|
||||||
m->lastpos[lev] = sp;
|
m->lastpos[lev] = sp;
|
||||||
dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
|
dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
|
||||||
if (dp == NULL)
|
if (dp == NULL)
|
||||||
return(backref(m, sp, stop, ss+1, stopst, lev-1));
|
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
|
||||||
else
|
else
|
||||||
return(dp);
|
return(dp);
|
||||||
break;
|
break;
|
||||||
|
@ -567,7 +721,7 @@ sopno lev; /* PLUS nesting level */
|
||||||
esub = ss + OPND(s) - 1;
|
esub = ss + OPND(s) - 1;
|
||||||
assert(OP(m->g->strip[esub]) == OOR1);
|
assert(OP(m->g->strip[esub]) == OOR1);
|
||||||
for (;;) { /* find first matching branch */
|
for (;;) { /* find first matching branch */
|
||||||
dp = backref(m, sp, stop, ssub, esub, lev);
|
dp = backref(m, sp, stop, ssub, esub, lev, rec);
|
||||||
if (dp != NULL)
|
if (dp != NULL)
|
||||||
return(dp);
|
return(dp);
|
||||||
/* that one missed, try next one */
|
/* that one missed, try next one */
|
||||||
|
@ -588,7 +742,7 @@ sopno lev; /* PLUS nesting level */
|
||||||
assert(0 < i && i <= m->g->nsub);
|
assert(0 < i && i <= m->g->nsub);
|
||||||
offsave = m->pmatch[i].rm_so;
|
offsave = m->pmatch[i].rm_so;
|
||||||
m->pmatch[i].rm_so = sp - m->offp;
|
m->pmatch[i].rm_so = sp - m->offp;
|
||||||
dp = backref(m, sp, stop, ss+1, stopst, lev);
|
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
|
||||||
if (dp != NULL)
|
if (dp != NULL)
|
||||||
return(dp);
|
return(dp);
|
||||||
m->pmatch[i].rm_so = offsave;
|
m->pmatch[i].rm_so = offsave;
|
||||||
|
@ -599,7 +753,7 @@ sopno lev; /* PLUS nesting level */
|
||||||
assert(0 < i && i <= m->g->nsub);
|
assert(0 < i && i <= m->g->nsub);
|
||||||
offsave = m->pmatch[i].rm_eo;
|
offsave = m->pmatch[i].rm_eo;
|
||||||
m->pmatch[i].rm_eo = sp - m->offp;
|
m->pmatch[i].rm_eo = sp - m->offp;
|
||||||
dp = backref(m, sp, stop, ss+1, stopst, lev);
|
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
|
||||||
if (dp != NULL)
|
if (dp != NULL)
|
||||||
return(dp);
|
return(dp);
|
||||||
m->pmatch[i].rm_eo = offsave;
|
m->pmatch[i].rm_eo = offsave;
|
||||||
|
@ -613,42 +767,57 @@ sopno lev; /* PLUS nesting level */
|
||||||
/* "can't happen" */
|
/* "can't happen" */
|
||||||
assert(nope);
|
assert(nope);
|
||||||
/* NOTREACHED */
|
/* NOTREACHED */
|
||||||
return((char *)NULL); /* dummy */
|
return "shut up gcc";
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
- fast - step through the string at top speed
|
- fast - step through the string at top speed
|
||||||
== static char *fast(register struct match *m, char *start, \
|
== static const char *fast(struct match *m, const char *start, \
|
||||||
== char *stop, sopno startst, sopno stopst);
|
== const char *stop, sopno startst, sopno stopst);
|
||||||
*/
|
*/
|
||||||
static char * /* where tentative match ended, or NULL */
|
static const char * /* where tentative match ended, or NULL */
|
||||||
fast(m, start, stop, startst, stopst)
|
fast( struct match *m,
|
||||||
register struct match *m;
|
const char *start,
|
||||||
char *start;
|
const char *stop,
|
||||||
char *stop;
|
sopno startst,
|
||||||
sopno startst;
|
sopno stopst)
|
||||||
sopno stopst;
|
|
||||||
{
|
{
|
||||||
register states st = m->st;
|
states st = m->st;
|
||||||
register states fresh = m->fresh;
|
states fresh = m->fresh;
|
||||||
register states tmp = m->tmp;
|
states tmp = m->tmp;
|
||||||
register char *p = start;
|
const char *p = start;
|
||||||
register int c = (start == m->beginp) ? OUT : *(start-1);
|
wint_t c;
|
||||||
register int lastc; /* previous c */
|
wint_t lastc; /* previous c */
|
||||||
register int flagch;
|
wint_t flagch;
|
||||||
register int i;
|
int i;
|
||||||
register char *coldp; /* last p after which no match was underway */
|
const char *coldp; /* last p after which no match was underway */
|
||||||
|
size_t clen;
|
||||||
|
|
||||||
CLEAR(st);
|
CLEAR(st);
|
||||||
SET1(st, startst);
|
SET1(st, startst);
|
||||||
|
SP("fast", st, *p);
|
||||||
st = step(m->g, startst, stopst, st, NOTHING, st);
|
st = step(m->g, startst, stopst, st, NOTHING, st);
|
||||||
ASSIGN(fresh, st);
|
ASSIGN(fresh, st);
|
||||||
SP("start", st, *p);
|
SP("start", st, *p);
|
||||||
coldp = NULL;
|
coldp = NULL;
|
||||||
|
if (start == m->beginp)
|
||||||
|
c = OUT;
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* XXX Wrong if the previous character was multi-byte.
|
||||||
|
* Newline never is (in encodings supported by FreeBSD),
|
||||||
|
* so this only breaks the ISWORD tests below.
|
||||||
|
*/
|
||||||
|
c = (uch)*(start - 1);
|
||||||
|
}
|
||||||
for (;;) {
|
for (;;) {
|
||||||
/* next character */
|
/* next character */
|
||||||
lastc = c;
|
lastc = c;
|
||||||
c = (p == m->endp) ? OUT : *p;
|
if (p == m->endp) {
|
||||||
|
clen = 0;
|
||||||
|
c = OUT;
|
||||||
|
} else
|
||||||
|
clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
|
||||||
if (EQ(st, fresh))
|
if (EQ(st, fresh))
|
||||||
coldp = p;
|
coldp = p;
|
||||||
|
|
||||||
|
@ -686,7 +855,7 @@ sopno stopst;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* are we done? */
|
/* are we done? */
|
||||||
if (ISSET(st, stopst) || p == stop)
|
if (ISSET(st, stopst) || p == stop || clen > stop - p)
|
||||||
break; /* NOTE BREAK OUT */
|
break; /* NOTE BREAK OUT */
|
||||||
|
|
||||||
/* no, we must deal with this character */
|
/* no, we must deal with this character */
|
||||||
|
@ -696,39 +865,39 @@ sopno stopst;
|
||||||
st = step(m->g, startst, stopst, tmp, c, st);
|
st = step(m->g, startst, stopst, tmp, c, st);
|
||||||
SP("aft", st, c);
|
SP("aft", st, c);
|
||||||
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
|
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
|
||||||
p++;
|
p += clen;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(coldp != NULL);
|
assert(coldp != NULL);
|
||||||
m->coldp = coldp;
|
m->coldp = coldp;
|
||||||
if (ISSET(st, stopst))
|
if (ISSET(st, stopst))
|
||||||
return(p+1);
|
return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
|
||||||
else
|
else
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
- slow - step through the string more deliberately
|
- slow - step through the string more deliberately
|
||||||
== static char *slow(register struct match *m, char *start, \
|
== static const char *slow(struct match *m, const char *start, \
|
||||||
== char *stop, sopno startst, sopno stopst);
|
== const char *stop, sopno startst, sopno stopst);
|
||||||
*/
|
*/
|
||||||
static char * /* where it ended */
|
static const char * /* where it ended */
|
||||||
slow(m, start, stop, startst, stopst)
|
slow( struct match *m,
|
||||||
register struct match *m;
|
const char *start,
|
||||||
char *start;
|
const char *stop,
|
||||||
char *stop;
|
sopno startst,
|
||||||
sopno startst;
|
sopno stopst)
|
||||||
sopno stopst;
|
|
||||||
{
|
{
|
||||||
register states st = m->st;
|
states st = m->st;
|
||||||
register states empty = m->empty;
|
states empty = m->empty;
|
||||||
register states tmp = m->tmp;
|
states tmp = m->tmp;
|
||||||
register char *p = start;
|
const char *p = start;
|
||||||
register int c = (start == m->beginp) ? OUT : *(start-1);
|
wint_t c;
|
||||||
register int lastc; /* previous c */
|
wint_t lastc; /* previous c */
|
||||||
register int flagch;
|
wint_t flagch;
|
||||||
register int i;
|
int i;
|
||||||
register char *matchp; /* last p at which a match ended */
|
const char *matchp; /* last p at which a match ended */
|
||||||
|
size_t clen;
|
||||||
|
|
||||||
AT("slow", start, stop, startst, stopst);
|
AT("slow", start, stop, startst, stopst);
|
||||||
CLEAR(st);
|
CLEAR(st);
|
||||||
|
@ -736,10 +905,24 @@ sopno stopst;
|
||||||
SP("sstart", st, *p);
|
SP("sstart", st, *p);
|
||||||
st = step(m->g, startst, stopst, st, NOTHING, st);
|
st = step(m->g, startst, stopst, st, NOTHING, st);
|
||||||
matchp = NULL;
|
matchp = NULL;
|
||||||
|
if (start == m->beginp)
|
||||||
|
c = OUT;
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* XXX Wrong if the previous character was multi-byte.
|
||||||
|
* Newline never is (in encodings supported by FreeBSD),
|
||||||
|
* so this only breaks the ISWORD tests below.
|
||||||
|
*/
|
||||||
|
c = (uch)*(start - 1);
|
||||||
|
}
|
||||||
for (;;) {
|
for (;;) {
|
||||||
/* next character */
|
/* next character */
|
||||||
lastc = c;
|
lastc = c;
|
||||||
c = (p == m->endp) ? OUT : *p;
|
if (p == m->endp) {
|
||||||
|
c = OUT;
|
||||||
|
clen = 0;
|
||||||
|
} else
|
||||||
|
clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
|
||||||
|
|
||||||
/* is there an EOL and/or BOL between lastc and c? */
|
/* is there an EOL and/or BOL between lastc and c? */
|
||||||
flagch = '\0';
|
flagch = '\0';
|
||||||
|
@ -777,7 +960,7 @@ sopno stopst;
|
||||||
/* are we done? */
|
/* are we done? */
|
||||||
if (ISSET(st, stopst))
|
if (ISSET(st, stopst))
|
||||||
matchp = p;
|
matchp = p;
|
||||||
if (EQ(st, empty) || p == stop)
|
if (EQ(st, empty) || p == stop || clen > stop - p)
|
||||||
break; /* NOTE BREAK OUT */
|
break; /* NOTE BREAK OUT */
|
||||||
|
|
||||||
/* no, we must deal with this character */
|
/* no, we must deal with this character */
|
||||||
|
@ -787,7 +970,7 @@ sopno stopst;
|
||||||
st = step(m->g, startst, stopst, tmp, c, st);
|
st = step(m->g, startst, stopst, tmp, c, st);
|
||||||
SP("saft", st, c);
|
SP("saft", st, c);
|
||||||
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
|
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
|
||||||
p++;
|
p += clen;
|
||||||
}
|
}
|
||||||
|
|
||||||
return(matchp);
|
return(matchp);
|
||||||
|
@ -796,33 +979,31 @@ sopno stopst;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
- step - map set of states reachable before char to set reachable after
|
- step - map set of states reachable before char to set reachable after
|
||||||
== static states step(register struct re_guts *g, sopno start, sopno stop, \
|
== static states step(struct re_guts *g, sopno start, sopno stop, \
|
||||||
== register states bef, int ch, register states aft);
|
== states bef, int ch, states aft);
|
||||||
== #define BOL (OUT+1)
|
== #define BOL (OUT-1)
|
||||||
== #define EOL (BOL+1)
|
== #define EOL (BOL-1)
|
||||||
== #define BOLEOL (BOL+2)
|
== #define BOLEOL (BOL-2)
|
||||||
== #define NOTHING (BOL+3)
|
== #define NOTHING (BOL-3)
|
||||||
== #define BOW (BOL+4)
|
== #define BOW (BOL-4)
|
||||||
== #define EOW (BOL+5)
|
== #define EOW (BOL-5)
|
||||||
== #define CODEMAX (BOL+5) // highest code used
|
== #define BADCHAR (BOL-6)
|
||||||
== #define NONCHAR(c) ((c) > CHAR_MAX)
|
== #define NONCHAR(c) ((c) <= OUT)
|
||||||
== #define NNONCHAR (CODEMAX-CHAR_MAX)
|
|
||||||
*/
|
*/
|
||||||
static states
|
static states
|
||||||
step(g, start, stop, bef, ch, aft)
|
step(struct re_guts *g,
|
||||||
register struct re_guts *g;
|
sopno start, /* start state within strip */
|
||||||
sopno start; /* start state within strip */
|
sopno stop, /* state after stop state within strip */
|
||||||
sopno stop; /* state after stop state within strip */
|
states bef, /* states reachable before */
|
||||||
register states bef; /* states reachable before */
|
wint_t ch, /* character or NONCHAR code */
|
||||||
int ch; /* character or NONCHAR code */
|
states aft) /* states already known reachable after */
|
||||||
register states aft; /* states already known reachable after */
|
|
||||||
{
|
{
|
||||||
register cset *cs;
|
cset *cs;
|
||||||
register sop s;
|
sop s;
|
||||||
register sopno pc;
|
sopno pc;
|
||||||
register onestate here; /* note, macros know this name */
|
onestate here; /* note, macros know this name */
|
||||||
register sopno look;
|
sopno look;
|
||||||
register long i;
|
int i;
|
||||||
|
|
||||||
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
|
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
|
||||||
s = g->strip[pc];
|
s = g->strip[pc];
|
||||||
|
@ -832,8 +1013,8 @@ register states aft; /* states already known reachable after */
|
||||||
break;
|
break;
|
||||||
case OCHAR:
|
case OCHAR:
|
||||||
/* only characters can match */
|
/* only characters can match */
|
||||||
assert(!NONCHAR(ch) || ch != (char)OPND(s));
|
assert(!NONCHAR(ch) || ch != OPND(s));
|
||||||
if (ch == (char)OPND(s))
|
if (ch == OPND(s))
|
||||||
FWD(aft, bef, 1);
|
FWD(aft, bef, 1);
|
||||||
break;
|
break;
|
||||||
case OBOL:
|
case OBOL:
|
||||||
|
@ -900,7 +1081,7 @@ register states aft; /* states already known reachable after */
|
||||||
OP(s = g->strip[pc+look]) != O_CH;
|
OP(s = g->strip[pc+look]) != O_CH;
|
||||||
look += OPND(s))
|
look += OPND(s))
|
||||||
assert(OP(s) == OOR2);
|
assert(OP(s) == OOR2);
|
||||||
FWD(aft, aft, look);
|
FWD(aft, aft, look + 1);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case OOR2: /* propagate OCH_'s marking */
|
case OOR2: /* propagate OCH_'s marking */
|
||||||
|
@ -926,21 +1107,20 @@ register states aft; /* states already known reachable after */
|
||||||
/*
|
/*
|
||||||
- print - print a set of states
|
- print - print a set of states
|
||||||
== #ifdef REDEBUG
|
== #ifdef REDEBUG
|
||||||
== static void print(struct match *m, char *caption, states st, \
|
== static void print(struct match *m, const char *caption, states st, \
|
||||||
== int ch, FILE *d);
|
== int ch, FILE *d);
|
||||||
== #endif
|
== #endif
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
print(m, caption, st, ch, d)
|
print(struct match *m,
|
||||||
struct match *m;
|
const char *caption,
|
||||||
char *caption;
|
states st,
|
||||||
states st;
|
int ch,
|
||||||
int ch;
|
FILE *d)
|
||||||
FILE *d;
|
|
||||||
{
|
{
|
||||||
register struct re_guts *g = m->g;
|
struct re_guts *g = m->g;
|
||||||
register int i;
|
int i;
|
||||||
register int first = 1;
|
int first = 1;
|
||||||
|
|
||||||
if (!(m->eflags®_TRACE))
|
if (!(m->eflags®_TRACE))
|
||||||
return;
|
return;
|
||||||
|
@ -959,18 +1139,17 @@ FILE *d;
|
||||||
/*
|
/*
|
||||||
- at - print current situation
|
- at - print current situation
|
||||||
== #ifdef REDEBUG
|
== #ifdef REDEBUG
|
||||||
== static void at(struct match *m, char *title, char *start, char *stop, \
|
== static void at(struct match *m, const char *title, const char *start, \
|
||||||
== sopno startst, sopno stopst);
|
== const char *stop, sopno startst, sopno stopst);
|
||||||
== #endif
|
== #endif
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
at(m, title, start, stop, startst, stopst)
|
at( struct match *m,
|
||||||
struct match *m;
|
const char *title,
|
||||||
char *title;
|
const char *start,
|
||||||
char *start;
|
const char *stop,
|
||||||
char *stop;
|
sopno startst,
|
||||||
sopno startst;
|
sopno stopst)
|
||||||
sopno stopst;
|
|
||||||
{
|
{
|
||||||
if (!(m->eflags®_TRACE))
|
if (!(m->eflags®_TRACE))
|
||||||
return;
|
return;
|
||||||
|
@ -985,7 +1164,7 @@ sopno stopst;
|
||||||
/*
|
/*
|
||||||
- pchar - make a character printable
|
- pchar - make a character printable
|
||||||
== #ifdef REDEBUG
|
== #ifdef REDEBUG
|
||||||
== static char *pchar(int ch);
|
== static const char *pchar(int ch);
|
||||||
== #endif
|
== #endif
|
||||||
*
|
*
|
||||||
* Is this identical to regchar() over in debug.c? Well, yes. But a
|
* Is this identical to regchar() over in debug.c? Well, yes. But a
|
||||||
|
@ -993,13 +1172,12 @@ sopno stopst;
|
||||||
* a matching debug.o, and this is convenient. It all disappears in
|
* a matching debug.o, and this is convenient. It all disappears in
|
||||||
* the non-debug compilation anyway, so it doesn't matter much.
|
* the non-debug compilation anyway, so it doesn't matter much.
|
||||||
*/
|
*/
|
||||||
static char * /* -> representation */
|
static const char * /* -> representation */
|
||||||
pchar(ch)
|
pchar(int ch)
|
||||||
int ch;
|
|
||||||
{
|
{
|
||||||
static char pbuf[10];
|
static char pbuf[10];
|
||||||
|
|
||||||
if (isprint(ch) || ch == ' ')
|
if (isprint((uch)ch) || ch == ' ')
|
||||||
sprintf(pbuf, "%c", ch);
|
sprintf(pbuf, "%c", ch);
|
||||||
else
|
else
|
||||||
sprintf(pbuf, "\\%o", ch);
|
sprintf(pbuf, "\\%o", ch);
|
||||||
|
|
|
@ -1,35 +0,0 @@
|
||||||
/* ========= begin header generated by ./mkh ========= */
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* === engine.c === */
|
|
||||||
static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
|
|
||||||
static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
|
||||||
static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
|
|
||||||
static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
|
||||||
static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
|
||||||
static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
|
|
||||||
#define BOL (OUT+1)
|
|
||||||
#define EOL (BOL+1)
|
|
||||||
#define BOLEOL (BOL+2)
|
|
||||||
#define NOTHING (BOL+3)
|
|
||||||
#define BOW (BOL+4)
|
|
||||||
#define EOW (BOL+5)
|
|
||||||
#define CODEMAX (BOL+5) /* highest code used */
|
|
||||||
#define NONCHAR(c) ((c) > CHAR_MAX)
|
|
||||||
#define NNONCHAR (CODEMAX-CHAR_MAX)
|
|
||||||
#ifdef REDEBUG
|
|
||||||
static void print(struct match *m, char *caption, states st, int ch, FILE *d);
|
|
||||||
#endif
|
|
||||||
#ifdef REDEBUG
|
|
||||||
static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
|
|
||||||
#endif
|
|
||||||
#ifdef REDEBUG
|
|
||||||
static char *pchar(int ch);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
/* ========= end header generated by ./mkh ========= */
|
|
|
@ -1,76 +0,0 @@
|
||||||
#! /bin/sh
|
|
||||||
# mkh - pull headers out of C source
|
|
||||||
PATH=/bin:/usr/bin ; export PATH
|
|
||||||
|
|
||||||
# egrep pattern to pick out marked lines
|
|
||||||
egrep='^ =([ ]|$)'
|
|
||||||
|
|
||||||
# Sed program to process marked lines into lines for the header file.
|
|
||||||
# The markers have already been removed. Two things are done here: removal
|
|
||||||
# of backslashed newlines, and some fudging of comments. The first is done
|
|
||||||
# because -o needs to have prototypes on one line to strip them down.
|
|
||||||
# Getting comments into the output is tricky; we turn C++-style // comments
|
|
||||||
# into /* */ comments, after altering any existing */'s to avoid trouble.
|
|
||||||
peel=' /\\$/N
|
|
||||||
/\\\n[ ]*/s///g
|
|
||||||
/\/\//s;\*/;* /;g
|
|
||||||
/\/\//s;//\(.*\);/*\1 */;'
|
|
||||||
|
|
||||||
for a
|
|
||||||
do
|
|
||||||
case "$a" in
|
|
||||||
-o) # old (pre-function-prototype) compiler
|
|
||||||
# add code to comment out argument lists
|
|
||||||
peel="$peel
|
|
||||||
"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);'
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
-b) # funny Berkeley __P macro
|
|
||||||
peel="$peel
|
|
||||||
"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));'
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
-s) # compiler doesn't like `static foo();'
|
|
||||||
# add code to get rid of the `static'
|
|
||||||
peel="$peel
|
|
||||||
"'/^static[ ][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;'
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
-p) # private declarations
|
|
||||||
egrep='^ ==([ ]|$)'
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
-i) # wrap in #ifndef, argument is name
|
|
||||||
ifndef="$2"
|
|
||||||
shift ; shift
|
|
||||||
;;
|
|
||||||
*) break
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
if test " $ifndef" != " "
|
|
||||||
then
|
|
||||||
echo "#ifndef $ifndef"
|
|
||||||
echo "#define $ifndef /* never again */"
|
|
||||||
fi
|
|
||||||
echo "/* ========= begin header generated by $0 ========= */"
|
|
||||||
echo '#ifdef __cplusplus'
|
|
||||||
echo 'extern "C" {'
|
|
||||||
echo '#endif'
|
|
||||||
for f
|
|
||||||
do
|
|
||||||
echo
|
|
||||||
echo "/* === $f === */"
|
|
||||||
egrep "$egrep" $f | sed 's/^ ==*[ ]//;s/^ ==*$//' | sed "$peel"
|
|
||||||
echo
|
|
||||||
done
|
|
||||||
echo '#ifdef __cplusplus'
|
|
||||||
echo '}'
|
|
||||||
echo '#endif'
|
|
||||||
echo "/* ========= end header generated by $0 ========= */"
|
|
||||||
if test " $ifndef" != " "
|
|
||||||
then
|
|
||||||
echo "#endif"
|
|
||||||
fi
|
|
||||||
exit 0
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,48 +0,0 @@
|
||||||
/* ========= begin header generated by ./mkh ========= */
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* === regcomp.c === */
|
|
||||||
static void p_ere(register struct parse *p, int stop);
|
|
||||||
static void p_ere_exp(register struct parse *p);
|
|
||||||
static void p_str(register struct parse *p);
|
|
||||||
static void p_bre(register struct parse *p, register int end1, register int end2);
|
|
||||||
static int p_simp_re(register struct parse *p, int starordinary);
|
|
||||||
static int p_count(register struct parse *p);
|
|
||||||
static void p_bracket(register struct parse *p);
|
|
||||||
static void p_b_term(register struct parse *p, register cset *cs);
|
|
||||||
static void p_b_cclass(register struct parse *p, register cset *cs);
|
|
||||||
static void p_b_eclass(register struct parse *p, register cset *cs);
|
|
||||||
static char p_b_symbol(register struct parse *p);
|
|
||||||
static char p_b_coll_elem(register struct parse *p, int endc);
|
|
||||||
static char othercase(int ch);
|
|
||||||
static void bothcases(register struct parse *p, int ch);
|
|
||||||
static void ordinary(register struct parse *p, register int ch);
|
|
||||||
static void nonnewline(register struct parse *p);
|
|
||||||
static void repeat(register struct parse *p, sopno start, int from, int to);
|
|
||||||
static int seterr(register struct parse *p, int e);
|
|
||||||
static cset *allocset(register struct parse *p);
|
|
||||||
static void freeset(register struct parse *p, register cset *cs);
|
|
||||||
static int freezeset(register struct parse *p, register cset *cs);
|
|
||||||
static int firstch(register struct parse *p, register cset *cs);
|
|
||||||
static int nch(register struct parse *p, register cset *cs);
|
|
||||||
static void mcadd(register struct parse *p, register cset *cs, register const char *cp);
|
|
||||||
static void mcinvert(register struct parse *p, register cset *cs);
|
|
||||||
static void mccase(register struct parse *p, register cset *cs);
|
|
||||||
static int isinsets(register struct re_guts *g, int c);
|
|
||||||
static int samesets(register struct re_guts *g, int c1, int c2);
|
|
||||||
static void categorize(struct parse *p, register struct re_guts *g);
|
|
||||||
static sopno dupl(register struct parse *p, sopno start, sopno finish);
|
|
||||||
static void doemit(register struct parse *p, sop op, size_t opnd);
|
|
||||||
static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
|
|
||||||
static void dofwd(register struct parse *p, sopno pos, sop value);
|
|
||||||
static void enlarge(register struct parse *p, sopno size);
|
|
||||||
static void stripsnug(register struct parse *p, register struct re_guts *g);
|
|
||||||
static void findmust(register struct parse *p, register struct re_guts *g);
|
|
||||||
static sopno pluscount(register struct parse *p, register struct re_guts *g);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
/* ========= end header generated by ./mkh ========= */
|
|
|
@ -1,17 +1,66 @@
|
||||||
#include "winsup.h"
|
/*-
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regerror.c 8.4 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(LIBC_SCCS) && !defined(lint)
|
||||||
|
static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94";
|
||||||
|
#endif /* LIBC_SCCS and not lint */
|
||||||
|
#include <sys/cdefs.h>
|
||||||
|
__FBSDID("$FreeBSD: src/lib/libc/regex/regerror.c,v 1.11 2007/06/11 03:05:54 delphij Exp $");
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "regex.h"
|
#include <regex.h>
|
||||||
|
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "regerror.ih"
|
|
||||||
|
|
||||||
|
/* ========= begin header generated by ./mkh ========= */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* === regerror.c === */
|
||||||
|
static char *regatoi(const regex_t *preg, char *localbuf);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/* ========= end header generated by ./mkh ========= */
|
||||||
/*
|
/*
|
||||||
= #define REG_OKAY 0
|
|
||||||
= #define REG_NOMATCH 1
|
= #define REG_NOMATCH 1
|
||||||
= #define REG_BADPAT 2
|
= #define REG_BADPAT 2
|
||||||
= #define REG_ECOLLATE 3
|
= #define REG_ECOLLATE 3
|
||||||
|
@ -28,15 +77,20 @@
|
||||||
= #define REG_EMPTY 14
|
= #define REG_EMPTY 14
|
||||||
= #define REG_ASSERT 15
|
= #define REG_ASSERT 15
|
||||||
= #define REG_INVARG 16
|
= #define REG_INVARG 16
|
||||||
|
= #define REG_ILLSEQ 17
|
||||||
= #define REG_ATOI 255 // convert name to number (!)
|
= #define REG_ATOI 255 // convert name to number (!)
|
||||||
= #define REG_ITOA 0400 // convert number to name (!)
|
= #define REG_ITOA 0400 // convert number to name (!)
|
||||||
*/
|
*/
|
||||||
static struct rerr {
|
static struct rerr {
|
||||||
int code;
|
int code;
|
||||||
|
#ifdef __CYGWIN__ /* Avoid whining compiler */
|
||||||
const char *name;
|
const char *name;
|
||||||
const char *explain;
|
const char *explain;
|
||||||
|
#else
|
||||||
|
char *name;
|
||||||
|
char *explain;
|
||||||
|
#endif
|
||||||
} rerrs[] = {
|
} rerrs[] = {
|
||||||
{REG_OKAY, "REG_OKAY", "no errors detected"},
|
|
||||||
{REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"},
|
{REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"},
|
||||||
{REG_BADPAT, "REG_BADPAT", "invalid regular expression"},
|
{REG_BADPAT, "REG_BADPAT", "invalid regular expression"},
|
||||||
{REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"},
|
{REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"},
|
||||||
|
@ -53,7 +107,8 @@ static struct rerr {
|
||||||
{REG_EMPTY, "REG_EMPTY", "empty (sub)expression"},
|
{REG_EMPTY, "REG_EMPTY", "empty (sub)expression"},
|
||||||
{REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"},
|
{REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"},
|
||||||
{REG_INVARG, "REG_INVARG", "invalid argument to regex routine"},
|
{REG_INVARG, "REG_INVARG", "invalid argument to regex routine"},
|
||||||
{-1, "", "*** unknown regexp error code ***"},
|
{REG_ILLSEQ, "REG_ILLSEQ", "illegal byte sequence"},
|
||||||
|
{0, "", "*** unknown regexp error code ***"}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -62,27 +117,30 @@ static struct rerr {
|
||||||
*/
|
*/
|
||||||
/* ARGSUSED */
|
/* ARGSUSED */
|
||||||
size_t
|
size_t
|
||||||
regerror(errcode, preg, errbuf, errbuf_size)
|
regerror(int errcode,
|
||||||
int errcode;
|
const regex_t * __restrict preg,
|
||||||
const regex_t *preg;
|
char * __restrict errbuf,
|
||||||
char *errbuf;
|
size_t errbuf_size)
|
||||||
size_t errbuf_size;
|
|
||||||
{
|
{
|
||||||
register struct rerr *r;
|
struct rerr *r;
|
||||||
register size_t len;
|
size_t len;
|
||||||
register int target = errcode &~ REG_ITOA;
|
int target = errcode &~ REG_ITOA;
|
||||||
register const char *s;
|
#ifdef __CYGWIN__ /* Avoid whining compiler */
|
||||||
|
const char *s;
|
||||||
|
#else
|
||||||
|
char *s;
|
||||||
|
#endif
|
||||||
char convbuf[50];
|
char convbuf[50];
|
||||||
|
|
||||||
if (errcode == REG_ATOI)
|
if (errcode == REG_ATOI)
|
||||||
s = regatoi(preg, convbuf);
|
s = regatoi(preg, convbuf);
|
||||||
else {
|
else {
|
||||||
for (r = rerrs; r->code >= 0; r++)
|
for (r = rerrs; r->code != 0; r++)
|
||||||
if (r->code == target)
|
if (r->code == target)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (errcode®_ITOA) {
|
if (errcode®_ITOA) {
|
||||||
if (r->code >= 0)
|
if (r->code != 0)
|
||||||
(void) strcpy(convbuf, r->name);
|
(void) strcpy(convbuf, r->name);
|
||||||
else
|
else
|
||||||
sprintf(convbuf, "REG_0x%x", target);
|
sprintf(convbuf, "REG_0x%x", target);
|
||||||
|
@ -109,18 +167,23 @@ size_t errbuf_size;
|
||||||
- regatoi - internal routine to implement REG_ATOI
|
- regatoi - internal routine to implement REG_ATOI
|
||||||
== static char *regatoi(const regex_t *preg, char *localbuf);
|
== static char *regatoi(const regex_t *preg, char *localbuf);
|
||||||
*/
|
*/
|
||||||
static const char *
|
static char *
|
||||||
regatoi(preg, localbuf)
|
regatoi(const regex_t *preg, char *localbuf)
|
||||||
const regex_t *preg;
|
|
||||||
char *localbuf;
|
|
||||||
{
|
{
|
||||||
register struct rerr *r;
|
struct rerr *r;
|
||||||
|
|
||||||
for (r = rerrs; r->code >= 0; r++)
|
for (r = rerrs; r->code != 0; r++)
|
||||||
if (strcmp(r->name, preg->re_endp) == 0)
|
if (strcmp(r->name, preg->re_endp) == 0)
|
||||||
break;
|
break;
|
||||||
if (r->code < 0)
|
if (r->code == 0)
|
||||||
|
#ifdef __CYGWIN__ /* Avoid whining compiler */
|
||||||
|
{
|
||||||
|
static const char null[] = "0";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
#else
|
||||||
return("0");
|
return("0");
|
||||||
|
#endif
|
||||||
|
|
||||||
sprintf(localbuf, "%d", r->code);
|
sprintf(localbuf, "%d", r->code);
|
||||||
return(localbuf);
|
return(localbuf);
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* ========= begin header generated by ./mkh ========= */
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* === regerror.c === */
|
|
||||||
static const char *regatoi(const regex_t *preg, char *localbuf);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
/* ========= end header generated by ./mkh ========= */
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,151 +1,317 @@
|
||||||
.TH REGEX 7 "25 Oct 1995"
|
.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
.BY "Henry Spencer"
|
.\" Copyright (c) 1992, 1993, 1994
|
||||||
.SH NAME
|
.\" The Regents of the University of California. All rights reserved.
|
||||||
regex \- POSIX 1003.2 regular expressions
|
.\"
|
||||||
.SH DESCRIPTION
|
.\" This code is derived from software contributed to Berkeley by
|
||||||
Regular expressions (``RE''s),
|
.\" Henry Spencer.
|
||||||
as defined in POSIX 1003.2, come in two forms:
|
.\"
|
||||||
|
.\" Redistribution and use in source and binary forms, with or without
|
||||||
|
.\" modification, are permitted provided that the following conditions
|
||||||
|
.\" are met:
|
||||||
|
.\" 1. Redistributions of source code must retain the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer.
|
||||||
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer in the
|
||||||
|
.\" documentation and/or other materials provided with the distribution.
|
||||||
|
.\" 4. Neither the name of the University nor the names of its contributors
|
||||||
|
.\" may be used to endorse or promote products derived from this software
|
||||||
|
.\" without specific prior written permission.
|
||||||
|
.\"
|
||||||
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
.\" SUCH DAMAGE.
|
||||||
|
.\"
|
||||||
|
.\" @(#)re_format.7 8.3 (Berkeley) 3/20/94
|
||||||
|
.\" $FreeBSD: src/lib/libc/regex/re_format.7,v 1.12 2008/09/05 17:41:20 keramida Exp $
|
||||||
|
.\"
|
||||||
|
.Dd March 20, 1994
|
||||||
|
.Dt RE_FORMAT 7
|
||||||
|
.Os
|
||||||
|
.Sh NAME
|
||||||
|
.Nm re_format
|
||||||
|
.Nd POSIX 1003.2 regular expressions
|
||||||
|
.Sh DESCRIPTION
|
||||||
|
Regular expressions
|
||||||
|
.Pq Dq RE Ns s ,
|
||||||
|
as defined in
|
||||||
|
.St -p1003.2 ,
|
||||||
|
come in two forms:
|
||||||
modern REs (roughly those of
|
modern REs (roughly those of
|
||||||
.IR egrep ;
|
.Xr egrep 1 ;
|
||||||
1003.2 calls these ``extended'' REs)
|
1003.2 calls these
|
||||||
|
.Dq extended
|
||||||
|
REs)
|
||||||
and obsolete REs (roughly those of
|
and obsolete REs (roughly those of
|
||||||
.IR ed ;
|
.Xr ed 1 ;
|
||||||
1003.2 ``basic'' REs).
|
1003.2
|
||||||
|
.Dq basic
|
||||||
|
REs).
|
||||||
Obsolete REs mostly exist for backward compatibility in some old programs;
|
Obsolete REs mostly exist for backward compatibility in some old programs;
|
||||||
they will be discussed at the end.
|
they will be discussed at the end.
|
||||||
1003.2 leaves some aspects of RE syntax and semantics open;
|
.St -p1003.2
|
||||||
`\(dg' marks decisions on these aspects that
|
leaves some aspects of RE syntax and semantics open;
|
||||||
may not be fully portable to other 1003.2 implementations.
|
`\(dd' marks decisions on these aspects that
|
||||||
.PP
|
may not be fully portable to other
|
||||||
A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR,
|
.St -p1003.2
|
||||||
separated by `|'.
|
implementations.
|
||||||
|
.Pp
|
||||||
|
A (modern) RE is one\(dd or more non-empty\(dd
|
||||||
|
.Em branches ,
|
||||||
|
separated by
|
||||||
|
.Ql \&| .
|
||||||
It matches anything that matches one of the branches.
|
It matches anything that matches one of the branches.
|
||||||
.PP
|
.Pp
|
||||||
A branch is one\(dg or more \fIpieces\fR, concatenated.
|
A branch is one\(dd or more
|
||||||
|
.Em pieces ,
|
||||||
|
concatenated.
|
||||||
It matches a match for the first, followed by a match for the second, etc.
|
It matches a match for the first, followed by a match for the second, etc.
|
||||||
.PP
|
.Pp
|
||||||
A piece is an \fIatom\fR possibly followed
|
A piece is an
|
||||||
by a single\(dg `*', `+', `?', or \fIbound\fR.
|
.Em atom
|
||||||
An atom followed by `*' matches a sequence of 0 or more matches of the atom.
|
possibly followed
|
||||||
An atom followed by `+' matches a sequence of 1 or more matches of the atom.
|
by a single\(dd
|
||||||
An atom followed by `?' matches a sequence of 0 or 1 matches of the atom.
|
.Ql \&* ,
|
||||||
.PP
|
.Ql \&+ ,
|
||||||
A \fIbound\fR is `{' followed by an unsigned decimal integer,
|
.Ql \&? ,
|
||||||
possibly followed by `,'
|
or
|
||||||
|
.Em bound .
|
||||||
|
An atom followed by
|
||||||
|
.Ql \&*
|
||||||
|
matches a sequence of 0 or more matches of the atom.
|
||||||
|
An atom followed by
|
||||||
|
.Ql \&+
|
||||||
|
matches a sequence of 1 or more matches of the atom.
|
||||||
|
An atom followed by
|
||||||
|
.Ql ?\&
|
||||||
|
matches a sequence of 0 or 1 matches of the atom.
|
||||||
|
.Pp
|
||||||
|
A
|
||||||
|
.Em bound
|
||||||
|
is
|
||||||
|
.Ql \&{
|
||||||
|
followed by an unsigned decimal integer,
|
||||||
|
possibly followed by
|
||||||
|
.Ql \&,
|
||||||
possibly followed by another unsigned decimal integer,
|
possibly followed by another unsigned decimal integer,
|
||||||
always followed by `}'.
|
always followed by
|
||||||
The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive,
|
.Ql \&} .
|
||||||
|
The integers must lie between 0 and
|
||||||
|
.Dv RE_DUP_MAX
|
||||||
|
(255\(dd) inclusive,
|
||||||
and if there are two of them, the first may not exceed the second.
|
and if there are two of them, the first may not exceed the second.
|
||||||
An atom followed by a bound containing one integer \fIi\fR
|
An atom followed by a bound containing one integer
|
||||||
|
.Em i
|
||||||
and no comma matches
|
and no comma matches
|
||||||
a sequence of exactly \fIi\fR matches of the atom.
|
a sequence of exactly
|
||||||
|
.Em i
|
||||||
|
matches of the atom.
|
||||||
An atom followed by a bound
|
An atom followed by a bound
|
||||||
containing one integer \fIi\fR and a comma matches
|
containing one integer
|
||||||
a sequence of \fIi\fR or more matches of the atom.
|
.Em i
|
||||||
|
and a comma matches
|
||||||
|
a sequence of
|
||||||
|
.Em i
|
||||||
|
or more matches of the atom.
|
||||||
An atom followed by a bound
|
An atom followed by a bound
|
||||||
containing two integers \fIi\fR and \fIj\fR matches
|
containing two integers
|
||||||
a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom.
|
.Em i
|
||||||
.PP
|
and
|
||||||
An atom is a regular expression enclosed in `()' (matching a match for the
|
.Em j
|
||||||
|
matches
|
||||||
|
a sequence of
|
||||||
|
.Em i
|
||||||
|
through
|
||||||
|
.Em j
|
||||||
|
(inclusive) matches of the atom.
|
||||||
|
.Pp
|
||||||
|
An atom is a regular expression enclosed in
|
||||||
|
.Ql ()
|
||||||
|
(matching a match for the
|
||||||
regular expression),
|
regular expression),
|
||||||
an empty set of `()' (matching the null string)\(dg,
|
an empty set of
|
||||||
a \fIbracket expression\fR (see below), `.'
|
.Ql ()
|
||||||
(matching any single character), `^' (matching the null string at the
|
(matching the null string)\(dd,
|
||||||
beginning of a line), `$' (matching the null string at the
|
a
|
||||||
end of a line), a `\e' followed by one of the characters
|
.Em bracket expression
|
||||||
`^.[$()|*+?{\e'
|
(see below),
|
||||||
|
.Ql .\&
|
||||||
|
(matching any single character),
|
||||||
|
.Ql \&^
|
||||||
|
(matching the null string at the beginning of a line),
|
||||||
|
.Ql \&$
|
||||||
|
(matching the null string at the end of a line), a
|
||||||
|
.Ql \e
|
||||||
|
followed by one of the characters
|
||||||
|
.Ql ^.[$()|*+?{\e
|
||||||
(matching that character taken as an ordinary character),
|
(matching that character taken as an ordinary character),
|
||||||
a `\e' followed by any other character\(dg
|
a
|
||||||
|
.Ql \e
|
||||||
|
followed by any other character\(dd
|
||||||
(matching that character taken as an ordinary character,
|
(matching that character taken as an ordinary character,
|
||||||
as if the `\e' had not been present\(dg),
|
as if the
|
||||||
|
.Ql \e
|
||||||
|
had not been present\(dd),
|
||||||
or a single character with no other significance (matching that character).
|
or a single character with no other significance (matching that character).
|
||||||
A `{' followed by a character other than a digit is an ordinary
|
A
|
||||||
character, not the beginning of a bound\(dg.
|
.Ql \&{
|
||||||
It is illegal to end an RE with `\e'.
|
followed by a character other than a digit is an ordinary
|
||||||
.PP
|
character, not the beginning of a bound\(dd.
|
||||||
A \fIbracket expression\fR is a list of characters enclosed in `[]'.
|
It is illegal to end an RE with
|
||||||
|
.Ql \e .
|
||||||
|
.Pp
|
||||||
|
A
|
||||||
|
.Em bracket expression
|
||||||
|
is a list of characters enclosed in
|
||||||
|
.Ql [] .
|
||||||
It normally matches any single character from the list (but see below).
|
It normally matches any single character from the list (but see below).
|
||||||
If the list begins with `^',
|
If the list begins with
|
||||||
|
.Ql \&^ ,
|
||||||
it matches any single character
|
it matches any single character
|
||||||
(but see below) \fInot\fR from the rest of the list.
|
(but see below)
|
||||||
If two characters in the list are separated by `\-', this is shorthand
|
.Em not
|
||||||
for the full \fIrange\fR of characters between those two (inclusive) in the
|
from the rest of the list.
|
||||||
|
If two characters in the list are separated by
|
||||||
|
.Ql \&- ,
|
||||||
|
this is shorthand
|
||||||
|
for the full
|
||||||
|
.Em range
|
||||||
|
of characters between those two (inclusive) in the
|
||||||
collating sequence,
|
collating sequence,
|
||||||
e.g. `[0\-9]' in ASCII matches any decimal digit.
|
.No e.g. Ql [0-9]
|
||||||
It is illegal\(dg for two ranges to share an
|
in ASCII matches any decimal digit.
|
||||||
endpoint, e.g. `a\-c\-e'.
|
It is illegal\(dd for two ranges to share an
|
||||||
|
endpoint,
|
||||||
|
.No e.g. Ql a-c-e .
|
||||||
Ranges are very collating-sequence-dependent,
|
Ranges are very collating-sequence-dependent,
|
||||||
and portable programs should avoid relying on them.
|
and portable programs should avoid relying on them.
|
||||||
.PP
|
.Pp
|
||||||
To include a literal `]' in the list, make it the first character
|
To include a literal
|
||||||
(following a possible `^').
|
.Ql \&]
|
||||||
To include a literal `\-', make it the first or last character,
|
in the list, make it the first character
|
||||||
|
(following a possible
|
||||||
|
.Ql \&^ ) .
|
||||||
|
To include a literal
|
||||||
|
.Ql \&- ,
|
||||||
|
make it the first or last character,
|
||||||
or the second endpoint of a range.
|
or the second endpoint of a range.
|
||||||
To use a literal `\-' as the first endpoint of a range,
|
To use a literal
|
||||||
enclose it in `[.' and `.]' to make it a collating element (see below).
|
.Ql \&-
|
||||||
With the exception of these and some combinations using `[' (see next
|
as the first endpoint of a range,
|
||||||
paragraphs), all other special characters, including `\e', lose their
|
enclose it in
|
||||||
special significance within a bracket expression.
|
.Ql [.\&
|
||||||
.PP
|
and
|
||||||
|
.Ql .]\&
|
||||||
|
to make it a collating element (see below).
|
||||||
|
With the exception of these and some combinations using
|
||||||
|
.Ql \&[
|
||||||
|
(see next paragraphs), all other special characters, including
|
||||||
|
.Ql \e ,
|
||||||
|
lose their special significance within a bracket expression.
|
||||||
|
.Pp
|
||||||
Within a bracket expression, a collating element (a character,
|
Within a bracket expression, a collating element (a character,
|
||||||
a multi-character sequence that collates as if it were a single character,
|
a multi-character sequence that collates as if it were a single character,
|
||||||
or a collating-sequence name for either)
|
or a collating-sequence name for either)
|
||||||
enclosed in `[.' and `.]' stands for the
|
enclosed in
|
||||||
|
.Ql [.\&
|
||||||
|
and
|
||||||
|
.Ql .]\&
|
||||||
|
stands for the
|
||||||
sequence of characters of that collating element.
|
sequence of characters of that collating element.
|
||||||
The sequence is a single element of the bracket expression's list.
|
The sequence is a single element of the bracket expression's list.
|
||||||
A bracket expression containing a multi-character collating element
|
A bracket expression containing a multi-character collating element
|
||||||
can thus match more than one character,
|
can thus match more than one character,
|
||||||
e.g. if the collating sequence includes a `ch' collating element,
|
e.g.\& if the collating sequence includes a
|
||||||
then the RE `[[.ch.]]*c' matches the first five characters
|
.Ql ch
|
||||||
of `chchcc'.
|
collating element,
|
||||||
.PP
|
then the RE
|
||||||
Within a bracket expression, a collating element enclosed in `[=' and
|
.Ql [[.ch.]]*c
|
||||||
`=]' is an equivalence class, standing for the sequences of characters
|
matches the first five characters
|
||||||
|
of
|
||||||
|
.Ql chchcc .
|
||||||
|
.Pp
|
||||||
|
Within a bracket expression, a collating element enclosed in
|
||||||
|
.Ql [=
|
||||||
|
and
|
||||||
|
.Ql =]
|
||||||
|
is an equivalence class, standing for the sequences of characters
|
||||||
of all collating elements equivalent to that one, including itself.
|
of all collating elements equivalent to that one, including itself.
|
||||||
(If there are no other equivalent collating elements,
|
(If there are no other equivalent collating elements,
|
||||||
the treatment is as if the enclosing delimiters were `[.' and `.]'.)
|
the treatment is as if the enclosing delimiters were
|
||||||
For example, if o and \o'o^' are the members of an equivalence class,
|
.Ql [.\&
|
||||||
then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous.
|
and
|
||||||
An equivalence class may not\(dg be an endpoint
|
.Ql .] . )
|
||||||
|
For example, if
|
||||||
|
.Ql x
|
||||||
|
and
|
||||||
|
.Ql y
|
||||||
|
are the members of an equivalence class,
|
||||||
|
then
|
||||||
|
.Ql [[=x=]] ,
|
||||||
|
.Ql [[=y=]] ,
|
||||||
|
and
|
||||||
|
.Ql [xy]
|
||||||
|
are all synonymous.
|
||||||
|
An equivalence class may not\(dd be an endpoint
|
||||||
of a range.
|
of a range.
|
||||||
.PP
|
.Pp
|
||||||
Within a bracket expression, the name of a \fIcharacter class\fR enclosed
|
Within a bracket expression, the name of a
|
||||||
in `[:' and `:]' stands for the list of all characters belonging to that
|
.Em character class
|
||||||
|
enclosed in
|
||||||
|
.Ql [:
|
||||||
|
and
|
||||||
|
.Ql :]
|
||||||
|
stands for the list of all characters belonging to that
|
||||||
class.
|
class.
|
||||||
Standard character class names are:
|
Standard character class names are:
|
||||||
.PP
|
.Pp
|
||||||
.RS
|
.Bl -column "alnum" "digit" "xdigit" -offset indent
|
||||||
.nf
|
.It Em "alnum digit punct"
|
||||||
.ta 3c 6c 9c
|
.It Em "alpha graph space"
|
||||||
alnum digit punct
|
.It Em "blank lower upper"
|
||||||
alpha graph space
|
.It Em "cntrl print xdigit"
|
||||||
blank lower upper
|
.El
|
||||||
cntrl print xdigit
|
.Pp
|
||||||
.fi
|
|
||||||
.RE
|
|
||||||
.PP
|
|
||||||
These stand for the character classes defined in
|
These stand for the character classes defined in
|
||||||
.IR ctype (3).
|
.Xr ctype 3 .
|
||||||
A locale may provide others.
|
A locale may provide others.
|
||||||
A character class may not be used as an endpoint of a range.
|
A character class may not be used as an endpoint of a range.
|
||||||
.PP
|
.Pp
|
||||||
There are two special cases\(dg of bracket expressions:
|
A bracketed expression like
|
||||||
the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at
|
.Ql [[:class:]]
|
||||||
the beginning and end of a word respectively.
|
can be used to match a single character that belongs to a character
|
||||||
A word is defined as a sequence of
|
class.
|
||||||
word characters
|
The reverse, matching any character that does not belong to a specific
|
||||||
|
class, the negation operator of bracket expressions may be used:
|
||||||
|
.Ql [^[:class:]] .
|
||||||
|
.Pp
|
||||||
|
There are two special cases\(dd of bracket expressions:
|
||||||
|
the bracket expressions
|
||||||
|
.Ql [[:<:]]
|
||||||
|
and
|
||||||
|
.Ql [[:>:]]
|
||||||
|
match the null string at the beginning and end of a word respectively.
|
||||||
|
A word is defined as a sequence of word characters
|
||||||
which is neither preceded nor followed by
|
which is neither preceded nor followed by
|
||||||
word characters.
|
word characters.
|
||||||
A word character is an
|
A word character is an
|
||||||
.I alnum
|
.Em alnum
|
||||||
character (as defined by
|
character (as defined by
|
||||||
.IR ctype (3))
|
.Xr ctype 3 )
|
||||||
or an underscore.
|
or an underscore.
|
||||||
This is an extension,
|
This is an extension,
|
||||||
compatible with but not specified by POSIX 1003.2,
|
compatible with but not specified by
|
||||||
|
.St -p1003.2 ,
|
||||||
and should be used with
|
and should be used with
|
||||||
caution in software intended to be portable to other systems.
|
caution in software intended to be portable to other systems.
|
||||||
.PP
|
.Pp
|
||||||
In the event that an RE could match more than one substring of a given
|
In the event that an RE could match more than one substring of a given
|
||||||
string,
|
string,
|
||||||
the RE matches the one starting earliest in the string.
|
the RE matches the one starting earliest in the string.
|
||||||
|
@ -157,79 +323,158 @@ with subexpressions starting earlier in the RE taking priority over
|
||||||
ones starting later.
|
ones starting later.
|
||||||
Note that higher-level subexpressions thus take priority over
|
Note that higher-level subexpressions thus take priority over
|
||||||
their lower-level component subexpressions.
|
their lower-level component subexpressions.
|
||||||
.PP
|
.Pp
|
||||||
Match lengths are measured in characters, not collating elements.
|
Match lengths are measured in characters, not collating elements.
|
||||||
A null string is considered longer than no match at all.
|
A null string is considered longer than no match at all.
|
||||||
For example,
|
For example,
|
||||||
`bb*' matches the three middle characters of `abbbc',
|
.Ql bb*
|
||||||
`(wee|week)(knights|nights)' matches all ten characters of `weeknights',
|
matches the three middle characters of
|
||||||
when `(.*).*' is matched against `abc' the parenthesized subexpression
|
.Ql abbbc ,
|
||||||
|
.Ql (wee|week)(knights|nights)
|
||||||
|
matches all ten characters of
|
||||||
|
.Ql weeknights ,
|
||||||
|
when
|
||||||
|
.Ql (.*).*\&
|
||||||
|
is matched against
|
||||||
|
.Ql abc
|
||||||
|
the parenthesized subexpression
|
||||||
matches all three characters, and
|
matches all three characters, and
|
||||||
when `(a*)*' is matched against `bc' both the whole RE and the parenthesized
|
when
|
||||||
|
.Ql (a*)*
|
||||||
|
is matched against
|
||||||
|
.Ql bc
|
||||||
|
both the whole RE and the parenthesized
|
||||||
subexpression match the null string.
|
subexpression match the null string.
|
||||||
.PP
|
.Pp
|
||||||
If case-independent matching is specified,
|
If case-independent matching is specified,
|
||||||
the effect is much as if all case distinctions had vanished from the
|
the effect is much as if all case distinctions had vanished from the
|
||||||
alphabet.
|
alphabet.
|
||||||
When an alphabetic that exists in multiple cases appears as an
|
When an alphabetic that exists in multiple cases appears as an
|
||||||
ordinary character outside a bracket expression, it is effectively
|
ordinary character outside a bracket expression, it is effectively
|
||||||
transformed into a bracket expression containing both cases,
|
transformed into a bracket expression containing both cases,
|
||||||
e.g. `x' becomes `[xX]'.
|
.No e.g. Ql x
|
||||||
|
becomes
|
||||||
|
.Ql [xX] .
|
||||||
When it appears inside a bracket expression, all case counterparts
|
When it appears inside a bracket expression, all case counterparts
|
||||||
of it are added to the bracket expression, so that (e.g.) `[x]'
|
of it are added to the bracket expression, so that (e.g.)
|
||||||
becomes `[xX]' and `[^x]' becomes `[^xX]'.
|
.Ql [x]
|
||||||
.PP
|
becomes
|
||||||
No particular limit is imposed on the length of REs\(dg.
|
.Ql [xX]
|
||||||
|
and
|
||||||
|
.Ql [^x]
|
||||||
|
becomes
|
||||||
|
.Ql [^xX] .
|
||||||
|
.Pp
|
||||||
|
No particular limit is imposed on the length of REs\(dd.
|
||||||
Programs intended to be portable should not employ REs longer
|
Programs intended to be portable should not employ REs longer
|
||||||
than 256 bytes,
|
than 256 bytes,
|
||||||
as an implementation can refuse to accept such REs and remain
|
as an implementation can refuse to accept such REs and remain
|
||||||
POSIX-compliant.
|
POSIX-compliant.
|
||||||
.PP
|
.Pp
|
||||||
Obsolete (``basic'') regular expressions differ in several respects.
|
Obsolete
|
||||||
`|', `+', and `?' are ordinary characters and there is no equivalent
|
.Pq Dq basic
|
||||||
for their functionality.
|
regular expressions differ in several respects.
|
||||||
The delimiters for bounds are `\e{' and `\e}',
|
.Ql \&|
|
||||||
with `{' and `}' by themselves ordinary characters.
|
is an ordinary character and there is no equivalent
|
||||||
The parentheses for nested subexpressions are `\e(' and `\e)',
|
for its functionality.
|
||||||
with `(' and `)' by themselves ordinary characters.
|
.Ql \&+
|
||||||
`^' is an ordinary character except at the beginning of the
|
and
|
||||||
RE or\(dg the beginning of a parenthesized subexpression,
|
.Ql ?\&
|
||||||
`$' is an ordinary character except at the end of the
|
are ordinary characters, and their functionality
|
||||||
RE or\(dg the end of a parenthesized subexpression,
|
can be expressed using bounds
|
||||||
and `*' is an ordinary character if it appears at the beginning of the
|
.No ( Ql {1,}
|
||||||
|
or
|
||||||
|
.Ql {0,1}
|
||||||
|
respectively).
|
||||||
|
Also note that
|
||||||
|
.Ql x+
|
||||||
|
in modern REs is equivalent to
|
||||||
|
.Ql xx* .
|
||||||
|
The delimiters for bounds are
|
||||||
|
.Ql \e{
|
||||||
|
and
|
||||||
|
.Ql \e} ,
|
||||||
|
with
|
||||||
|
.Ql \&{
|
||||||
|
and
|
||||||
|
.Ql \&}
|
||||||
|
by themselves ordinary characters.
|
||||||
|
The parentheses for nested subexpressions are
|
||||||
|
.Ql \e(
|
||||||
|
and
|
||||||
|
.Ql \e) ,
|
||||||
|
with
|
||||||
|
.Ql \&(
|
||||||
|
and
|
||||||
|
.Ql \&)
|
||||||
|
by themselves ordinary characters.
|
||||||
|
.Ql \&^
|
||||||
|
is an ordinary character except at the beginning of the
|
||||||
|
RE or\(dd the beginning of a parenthesized subexpression,
|
||||||
|
.Ql \&$
|
||||||
|
is an ordinary character except at the end of the
|
||||||
|
RE or\(dd the end of a parenthesized subexpression,
|
||||||
|
and
|
||||||
|
.Ql \&*
|
||||||
|
is an ordinary character if it appears at the beginning of the
|
||||||
RE or the beginning of a parenthesized subexpression
|
RE or the beginning of a parenthesized subexpression
|
||||||
(after a possible leading `^').
|
(after a possible leading
|
||||||
Finally, there is one new type of atom, a \fIback reference\fR:
|
.Ql \&^ ) .
|
||||||
`\e' followed by a non-zero decimal digit \fId\fR
|
Finally, there is one new type of atom, a
|
||||||
|
.Em back reference :
|
||||||
|
.Ql \e
|
||||||
|
followed by a non-zero decimal digit
|
||||||
|
.Em d
|
||||||
matches the same sequence of characters
|
matches the same sequence of characters
|
||||||
matched by the \fId\fRth parenthesized subexpression
|
matched by the
|
||||||
|
.Em d Ns th
|
||||||
|
parenthesized subexpression
|
||||||
(numbering subexpressions by the positions of their opening parentheses,
|
(numbering subexpressions by the positions of their opening parentheses,
|
||||||
left to right),
|
left to right),
|
||||||
so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'.
|
so that (e.g.)
|
||||||
.SH SEE ALSO
|
.Ql \e([bc]\e)\e1
|
||||||
regex(3)
|
matches
|
||||||
.PP
|
.Ql bb
|
||||||
POSIX 1003.2, section 2.8 (Regular Expression Notation).
|
or
|
||||||
.SH HISTORY
|
.Ql cc
|
||||||
Written by Henry Spencer, based on the 1003.2 spec.
|
but not
|
||||||
.SH BUGS
|
.Ql bc .
|
||||||
|
.Sh SEE ALSO
|
||||||
|
.Xr regex 3
|
||||||
|
.Rs
|
||||||
|
.%T Regular Expression Notation
|
||||||
|
.%R IEEE Std
|
||||||
|
.%N 1003.2
|
||||||
|
.%P section 2.8
|
||||||
|
.Re
|
||||||
|
.Sh BUGS
|
||||||
Having two kinds of REs is a botch.
|
Having two kinds of REs is a botch.
|
||||||
.PP
|
.Pp
|
||||||
The current 1003.2 spec says that `)' is an ordinary character in
|
The current
|
||||||
the absence of an unmatched `(';
|
.St -p1003.2
|
||||||
|
spec says that
|
||||||
|
.Ql \&)
|
||||||
|
is an ordinary character in
|
||||||
|
the absence of an unmatched
|
||||||
|
.Ql \&( ;
|
||||||
this was an unintentional result of a wording error,
|
this was an unintentional result of a wording error,
|
||||||
and change is likely.
|
and change is likely.
|
||||||
Avoid relying on it.
|
Avoid relying on it.
|
||||||
.PP
|
.Pp
|
||||||
Back references are a dreadful botch,
|
Back references are a dreadful botch,
|
||||||
posing major problems for efficient implementations.
|
posing major problems for efficient implementations.
|
||||||
They are also somewhat vaguely defined
|
They are also somewhat vaguely defined
|
||||||
(does
|
(does
|
||||||
`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?).
|
.Ql a\e(\e(b\e)*\e2\e)*d
|
||||||
|
match
|
||||||
|
.Ql abbbd ? ) .
|
||||||
Avoid using them.
|
Avoid using them.
|
||||||
.PP
|
.Pp
|
||||||
1003.2's specification of case-independent matching is vague.
|
.St -p1003.2
|
||||||
The ``one case implies all cases'' definition given above
|
specification of case-independent matching is vague.
|
||||||
|
The
|
||||||
|
.Dq one case implies all cases
|
||||||
|
definition given above
|
||||||
is current consensus among implementors as to the right interpretation.
|
is current consensus among implementors as to the right interpretation.
|
||||||
.PP
|
.Pp
|
||||||
The syntax for word boundaries is incredibly ugly.
|
The syntax for word boundaries is incredibly ugly.
|
||||||
|
|
|
@ -1,76 +0,0 @@
|
||||||
#ifndef _REGEX_H_
|
|
||||||
#define _REGEX_H_ /* never again */
|
|
||||||
#include <sys/types.h>
|
|
||||||
/* ========= begin header generated by ./mkh ========= */
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* === regex2.h === */
|
|
||||||
typedef _off_t regoff_t;
|
|
||||||
typedef struct {
|
|
||||||
int re_magic;
|
|
||||||
size_t re_nsub; /* number of parenthesized subexpressions */
|
|
||||||
const char *re_endp; /* end pointer for REG_PEND */
|
|
||||||
struct re_guts *re_g; /* none of your business :-) */
|
|
||||||
} regex_t;
|
|
||||||
typedef struct {
|
|
||||||
regoff_t rm_so; /* start of match */
|
|
||||||
regoff_t rm_eo; /* end of match */
|
|
||||||
} regmatch_t;
|
|
||||||
|
|
||||||
|
|
||||||
/* === regcomp.c === */
|
|
||||||
extern int regcomp(regex_t *, const char *, int);
|
|
||||||
#define REG_BASIC 0000
|
|
||||||
#define REG_EXTENDED 0001
|
|
||||||
#define REG_ICASE 0002
|
|
||||||
#define REG_NOSUB 0004
|
|
||||||
#define REG_NEWLINE 0010
|
|
||||||
#define REG_NOSPEC 0020
|
|
||||||
#define REG_PEND 0040
|
|
||||||
#define REG_DUMP 0200
|
|
||||||
|
|
||||||
|
|
||||||
/* === regerror.c === */
|
|
||||||
#define REG_OKAY 0
|
|
||||||
#define REG_NOMATCH 1
|
|
||||||
#define REG_BADPAT 2
|
|
||||||
#define REG_ECOLLATE 3
|
|
||||||
#define REG_ECTYPE 4
|
|
||||||
#define REG_EESCAPE 5
|
|
||||||
#define REG_ESUBREG 6
|
|
||||||
#define REG_EBRACK 7
|
|
||||||
#define REG_EPAREN 8
|
|
||||||
#define REG_EBRACE 9
|
|
||||||
#define REG_BADBR 10
|
|
||||||
#define REG_ERANGE 11
|
|
||||||
#define REG_ESPACE 12
|
|
||||||
#define REG_BADRPT 13
|
|
||||||
#define REG_EMPTY 14
|
|
||||||
#define REG_ASSERT 15
|
|
||||||
#define REG_INVARG 16
|
|
||||||
#define REG_ATOI 255 /* convert name to number (!) */
|
|
||||||
#define REG_ITOA 0400 /* convert number to name (!) */
|
|
||||||
extern size_t regerror(int, const regex_t *, char *, size_t);
|
|
||||||
|
|
||||||
|
|
||||||
/* === regexec.c === */
|
|
||||||
extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
|
|
||||||
#define REG_NOTBOL 00001
|
|
||||||
#define REG_NOTEOL 00002
|
|
||||||
#define REG_STARTEND 00004
|
|
||||||
#define REG_TRACE 00400 /* tracing of execution */
|
|
||||||
#define REG_LARGE 01000 /* force large representation */
|
|
||||||
#define REG_BACKR 02000 /* force use of backref code */
|
|
||||||
|
|
||||||
|
|
||||||
/* === regfree.c === */
|
|
||||||
extern void regfree(regex_t *);
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
/* ========= end header generated by ./mkh ========= */
|
|
||||||
#endif
|
|
|
@ -1,6 +1,42 @@
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
|
||||||
|
* $FreeBSD: src/lib/libc/regex/regex2.h,v 1.11 2007/01/09 00:28:04 imp Exp $
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First, the stuff that ends up in the outside-world include file
|
* First, the stuff that ends up in the outside-world include file
|
||||||
= typedef _off_t regoff_t;
|
= typedef off_t regoff_t;
|
||||||
= typedef struct {
|
= typedef struct {
|
||||||
= int re_magic;
|
= int re_magic;
|
||||||
= size_t re_nsub; // number of parenthesized subexpressions
|
= size_t re_nsub; // number of parenthesized subexpressions
|
||||||
|
@ -36,66 +72,91 @@
|
||||||
* In state representations, an operator's bit is on to signify a state
|
* In state representations, an operator's bit is on to signify a state
|
||||||
* immediately *preceding* "execution" of that operator.
|
* immediately *preceding* "execution" of that operator.
|
||||||
*/
|
*/
|
||||||
typedef long sop; /* strip operator */
|
typedef unsigned long sop; /* strip operator */
|
||||||
typedef long sopno;
|
typedef long sopno;
|
||||||
#define OPRMASK 0x7c000000
|
#define OPRMASK 0xf8000000L
|
||||||
#define OPDMASK 0x03ffffff
|
#define OPDMASK 0x07ffffffL
|
||||||
#define OPSHIFT (26)
|
#define OPSHIFT ((unsigned)27)
|
||||||
#define OP(n) ((n)&OPRMASK)
|
#define OP(n) ((n)&OPRMASK)
|
||||||
#define OPND(n) ((n)&OPDMASK)
|
#define OPND(n) ((n)&OPDMASK)
|
||||||
#define SOP(op, opnd) ((op)|(opnd))
|
#define SOP(op, opnd) ((op)|(opnd))
|
||||||
/* operators meaning operand */
|
/* operators meaning operand */
|
||||||
/* (back, fwd are offsets) */
|
/* (back, fwd are offsets) */
|
||||||
#define OEND (1<<OPSHIFT) /* endmarker - */
|
#define OEND (1L<<OPSHIFT) /* endmarker - */
|
||||||
#define OCHAR (2<<OPSHIFT) /* character unsigned char */
|
#define OCHAR (2L<<OPSHIFT) /* character wide character */
|
||||||
#define OBOL (3<<OPSHIFT) /* left anchor - */
|
#define OBOL (3L<<OPSHIFT) /* left anchor - */
|
||||||
#define OEOL (4<<OPSHIFT) /* right anchor - */
|
#define OEOL (4L<<OPSHIFT) /* right anchor - */
|
||||||
#define OANY (5<<OPSHIFT) /* . - */
|
#define OANY (5L<<OPSHIFT) /* . - */
|
||||||
#define OANYOF (6<<OPSHIFT) /* [...] set number */
|
#define OANYOF (6L<<OPSHIFT) /* [...] set number */
|
||||||
#define OBACK_ (7<<OPSHIFT) /* begin \d paren number */
|
#define OBACK_ (7L<<OPSHIFT) /* begin \d paren number */
|
||||||
#define O_BACK (8<<OPSHIFT) /* end \d paren number */
|
#define O_BACK (8L<<OPSHIFT) /* end \d paren number */
|
||||||
#define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */
|
#define OPLUS_ (9L<<OPSHIFT) /* + prefix fwd to suffix */
|
||||||
#define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */
|
#define O_PLUS (10L<<OPSHIFT) /* + suffix back to prefix */
|
||||||
#define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */
|
#define OQUEST_ (11L<<OPSHIFT) /* ? prefix fwd to suffix */
|
||||||
#define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */
|
#define O_QUEST (12L<<OPSHIFT) /* ? suffix back to prefix */
|
||||||
#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */
|
#define OLPAREN (13L<<OPSHIFT) /* ( fwd to ) */
|
||||||
#define ORPAREN (14<<OPSHIFT) /* ) back to ( */
|
#define ORPAREN (14L<<OPSHIFT) /* ) back to ( */
|
||||||
#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */
|
#define OCH_ (15L<<OPSHIFT) /* begin choice fwd to OOR2 */
|
||||||
#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
|
#define OOR1 (16L<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
|
||||||
#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
|
#define OOR2 (17L<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
|
||||||
#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */
|
#define O_CH (18L<<OPSHIFT) /* end choice back to OOR1 */
|
||||||
#define OBOW (19<<OPSHIFT) /* begin word - */
|
#define OBOW (19L<<OPSHIFT) /* begin word - */
|
||||||
#define OEOW (20<<OPSHIFT) /* end word - */
|
#define OEOW (20L<<OPSHIFT) /* end word - */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Structure for [] character-set representation. Character sets are
|
* Structures for [] character-set representation.
|
||||||
* done as bit vectors, grouped 8 to a byte vector for compactness.
|
|
||||||
* The individual set therefore has both a pointer to the byte vector
|
|
||||||
* and a mask to pick out the relevant bit of each byte. A hash code
|
|
||||||
* simplifies testing whether two sets could be identical.
|
|
||||||
*
|
|
||||||
* This will get trickier for multicharacter collating elements. As
|
|
||||||
* preliminary hooks for dealing with such things, we also carry along
|
|
||||||
* a string of multi-character elements, and decide the size of the
|
|
||||||
* vectors at run time.
|
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uch *ptr; /* -> uch [csetsize] */
|
wint_t min;
|
||||||
uch mask; /* bit within array */
|
wint_t max;
|
||||||
uch hash; /* hash code */
|
} crange;
|
||||||
size_t smultis;
|
typedef struct {
|
||||||
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
|
unsigned char bmp[NC / 8];
|
||||||
|
wctype_t *types;
|
||||||
|
int ntypes;
|
||||||
|
wint_t *wides;
|
||||||
|
int nwides;
|
||||||
|
crange *ranges;
|
||||||
|
int nranges;
|
||||||
|
int invert;
|
||||||
|
int icase;
|
||||||
} cset;
|
} cset;
|
||||||
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
|
|
||||||
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
|
|
||||||
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
|
|
||||||
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
|
|
||||||
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
|
|
||||||
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
|
|
||||||
#define MCin(p, cs, cp) mcin(p, cs, cp)
|
|
||||||
|
|
||||||
/* stuff for character categories */
|
static int
|
||||||
typedef unsigned char cat_t;
|
CHIN1(cset *cs, wint_t ch)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
assert(ch >= 0);
|
||||||
|
if (ch < NC)
|
||||||
|
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
|
||||||
|
cs->invert);
|
||||||
|
for (i = 0; i < cs->nwides; i++)
|
||||||
|
if (ch == cs->wides[i])
|
||||||
|
return (!cs->invert);
|
||||||
|
for (i = 0; i < cs->nranges; i++)
|
||||||
|
if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max)
|
||||||
|
return (!cs->invert);
|
||||||
|
for (i = 0; i < cs->ntypes; i++)
|
||||||
|
if (iswctype(ch, cs->types[i]))
|
||||||
|
return (!cs->invert);
|
||||||
|
return (cs->invert);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline int
|
||||||
|
CHIN(cset *cs, wint_t ch)
|
||||||
|
{
|
||||||
|
|
||||||
|
assert(ch >= 0);
|
||||||
|
if (ch < NC)
|
||||||
|
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
|
||||||
|
cs->invert);
|
||||||
|
else if (cs->icase)
|
||||||
|
return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
|
||||||
|
CHIN1(cs, towupper(ch)));
|
||||||
|
else
|
||||||
|
return (CHIN1(cs, ch));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* main compiled-expression structure
|
* main compiled-expression structure
|
||||||
|
@ -104,10 +165,8 @@ struct re_guts {
|
||||||
int magic;
|
int magic;
|
||||||
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
||||||
sop *strip; /* malloced area for strip */
|
sop *strip; /* malloced area for strip */
|
||||||
int csetsize; /* number of bits in a cset vector */
|
|
||||||
int ncsets; /* number of csets in use */
|
int ncsets; /* number of csets in use */
|
||||||
cset *sets; /* -> cset [ncsets] */
|
cset *sets; /* -> cset [ncsets] */
|
||||||
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
|
|
||||||
int cflags; /* copy of regcomp() cflags argument */
|
int cflags; /* copy of regcomp() cflags argument */
|
||||||
sopno nstates; /* = number of sops */
|
sopno nstates; /* = number of sops */
|
||||||
sopno firststate; /* the initial OEND (normally 0) */
|
sopno firststate; /* the initial OEND (normally 0) */
|
||||||
|
@ -118,17 +177,16 @@ struct re_guts {
|
||||||
# define BAD 04 /* something wrong */
|
# define BAD 04 /* something wrong */
|
||||||
int nbol; /* number of ^ used */
|
int nbol; /* number of ^ used */
|
||||||
int neol; /* number of $ used */
|
int neol; /* number of $ used */
|
||||||
int ncategories; /* how many character categories */
|
|
||||||
cat_t *categories; /* ->catspace[-CHAR_MIN] */
|
|
||||||
char *must; /* match must contain this string */
|
char *must; /* match must contain this string */
|
||||||
|
int moffset; /* latest point at which must may be located */
|
||||||
|
int *charjump; /* Boyer-Moore char jump table */
|
||||||
|
int *matchjump; /* Boyer-Moore match jump table */
|
||||||
int mlen; /* length of must */
|
int mlen; /* length of must */
|
||||||
size_t nsub; /* copy of re_nsub */
|
size_t nsub; /* copy of re_nsub */
|
||||||
int backrefs; /* does it use back references? */
|
int backrefs; /* does it use back references? */
|
||||||
sopno nplus; /* how deep does it nest +s? */
|
sopno nplus; /* how deep does it nest +s? */
|
||||||
/* catspace must be last */
|
|
||||||
cat_t catspace[1]; /* actually [NC] */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* misc utilities */
|
/* misc utilities */
|
||||||
#define OUT (CHAR_MAX+1) /* a non-character value */
|
#define OUT (CHAR_MIN - 1) /* a non-character value */
|
||||||
#define ISWORD(c) (isalnum((unsigned char)c) || (c) == '_')
|
#define ISWORD(c) (iswalnum((uch)(c)) || (c) == '_')
|
||||||
|
|
|
@ -1,48 +1,131 @@
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regexec.c 8.3 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(LIBC_SCCS) && !defined(lint)
|
||||||
|
static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94";
|
||||||
|
#endif /* LIBC_SCCS and not lint */
|
||||||
|
#include <sys/cdefs.h>
|
||||||
|
__FBSDID("$FreeBSD: src/lib/libc/regex/regexec.c,v 1.8 2007/06/11 03:05:54 delphij Exp $");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the outer shell of regexec()
|
* the outer shell of regexec()
|
||||||
*
|
*
|
||||||
* This file includes engine.c *twice*, after muchos fiddling with the
|
* This file includes engine.c three times, after muchos fiddling with the
|
||||||
* macros that code uses. This lets the same code operate on two different
|
* macros that code uses. This lets the same code operate on two different
|
||||||
* representations for state sets.
|
* representations for state sets and characters.
|
||||||
*/
|
*/
|
||||||
|
#ifdef __CYGWIN__
|
||||||
#include "winsup.h"
|
#include "winsup.h"
|
||||||
|
#endif
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include "regex.h"
|
#include <regex.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
#include <wctype.h>
|
||||||
|
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "regex2.h"
|
#include "regex2.h"
|
||||||
|
|
||||||
#ifdef lint
|
#ifdef __CYGWIN__
|
||||||
static int nope = 0; /* for use in asserts; shuts lint up */
|
#define __unused __attribute__ ((unused))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static int nope __unused = 0; /* for use in asserts; shuts lint up */
|
||||||
|
|
||||||
|
static __inline size_t
|
||||||
|
xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
|
||||||
|
{
|
||||||
|
size_t nr;
|
||||||
|
wchar_t wc;
|
||||||
|
|
||||||
|
nr = mbrtowc(&wc, s, n, mbs);
|
||||||
|
if (wi != NULL)
|
||||||
|
*wi = wc;
|
||||||
|
if (nr == 0)
|
||||||
|
return (1);
|
||||||
|
else if (nr == (size_t)-1 || nr == (size_t)-2) {
|
||||||
|
memset(mbs, 0, sizeof(*mbs));
|
||||||
|
if (wi != NULL)
|
||||||
|
*wi = dummy;
|
||||||
|
return (1);
|
||||||
|
} else
|
||||||
|
return (nr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline size_t
|
||||||
|
xmbrtowc_dummy(wint_t *wi,
|
||||||
|
const char *s,
|
||||||
|
size_t n __unused,
|
||||||
|
mbstate_t *mbs __unused,
|
||||||
|
wint_t dummy __unused)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (wi != NULL)
|
||||||
|
*wi = (unsigned char)*s;
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
|
||||||
/* macros for manipulating states, small version */
|
/* macros for manipulating states, small version */
|
||||||
#define states unsigned
|
#define states long
|
||||||
#define states1 unsigned /* for later use in regexec() decision */
|
#define states1 states /* for later use in regexec() decision */
|
||||||
#define CLEAR(v) ((v) = 0)
|
#define CLEAR(v) ((v) = 0)
|
||||||
#define SET0(v, n) ((v) &= ~((unsigned)1 << (n)))
|
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
|
||||||
#define SET1(v, n) ((v) |= (unsigned)1 << (n))
|
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
|
||||||
#define ISSET(v, n) ((v) & ((unsigned)1 << (n)))
|
#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
|
||||||
#define ASSIGN(d, s) ((d) = (s))
|
#define ASSIGN(d, s) ((d) = (s))
|
||||||
#define EQ(a, b) ((a) == (b))
|
#define EQ(a, b) ((a) == (b))
|
||||||
#define STATEVARS int dummy /* dummy version */
|
#define STATEVARS long dummy /* dummy version */
|
||||||
#define STATESETUP(m, n) /* nothing */
|
#define STATESETUP(m, n) /* nothing */
|
||||||
#define STATETEARDOWN(m) /* nothing */
|
#define STATETEARDOWN(m) /* nothing */
|
||||||
#define SETUP(v) ((v) = 0)
|
#define SETUP(v) ((v) = 0)
|
||||||
#define onestate unsigned
|
#define onestate long
|
||||||
#define INIT(o, n) ((o) = (unsigned)1 << (n))
|
#define INIT(o, n) ((o) = (unsigned long)1 << (n))
|
||||||
#define INC(o) ((o) <<= 1)
|
#define INC(o) ((o) <<= 1)
|
||||||
#define ISSTATEIN(v, o) ((v) & (o))
|
#define ISSTATEIN(v, o) (((v) & (o)) != 0)
|
||||||
/* some abbreviations; note that some of these know variable names! */
|
/* some abbreviations; note that some of these know variable names! */
|
||||||
/* do "if I'm here, I can also be there" etc without branches */
|
/* do "if I'm here, I can also be there" etc without branches */
|
||||||
#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n))
|
#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
|
||||||
#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n))
|
#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
|
||||||
#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n)))
|
#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
|
||||||
|
/* no multibyte support */
|
||||||
|
#define XMBRTOWC xmbrtowc_dummy
|
||||||
|
#define ZAPSTATE(mbs) ((void)(mbs))
|
||||||
/* function names */
|
/* function names */
|
||||||
#define SNAMES /* engine.c looks after details */
|
#define SNAMES /* engine.c looks after details */
|
||||||
|
|
||||||
|
@ -68,6 +151,8 @@ static int nope = 0; /* for use in asserts; shuts lint up */
|
||||||
#undef BACK
|
#undef BACK
|
||||||
#undef ISSETBACK
|
#undef ISSETBACK
|
||||||
#undef SNAMES
|
#undef SNAMES
|
||||||
|
#undef XMBRTOWC
|
||||||
|
#undef ZAPSTATE
|
||||||
|
|
||||||
/* macros for manipulating states, large version */
|
/* macros for manipulating states, large version */
|
||||||
#define states char *
|
#define states char *
|
||||||
|
@ -77,13 +162,13 @@ static int nope = 0; /* for use in asserts; shuts lint up */
|
||||||
#define ISSET(v, n) ((v)[n])
|
#define ISSET(v, n) ((v)[n])
|
||||||
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
|
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
|
||||||
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
|
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
|
||||||
#define STATEVARS int vn; char *space
|
#define STATEVARS long vn; char *space
|
||||||
#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
|
#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
|
||||||
if ((m)->space == NULL) return(REG_ESPACE); \
|
if ((m)->space == NULL) return(REG_ESPACE); \
|
||||||
(m)->vn = 0; }
|
(m)->vn = 0; }
|
||||||
#define STATETEARDOWN(m) { free((m)->space); }
|
#define STATETEARDOWN(m) { free((m)->space); }
|
||||||
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
|
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
|
||||||
#define onestate int
|
#define onestate long
|
||||||
#define INIT(o, n) ((o) = (n))
|
#define INIT(o, n) ((o) = (n))
|
||||||
#define INC(o) ((o)++)
|
#define INC(o) ((o)++)
|
||||||
#define ISSTATEIN(v, o) ((v)[o])
|
#define ISSTATEIN(v, o) ((v)[o])
|
||||||
|
@ -92,11 +177,24 @@ static int nope = 0; /* for use in asserts; shuts lint up */
|
||||||
#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
|
#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
|
||||||
#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
|
#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
|
||||||
#define ISSETBACK(v, n) ((v)[here - (n)])
|
#define ISSETBACK(v, n) ((v)[here - (n)])
|
||||||
|
/* no multibyte support */
|
||||||
|
#define XMBRTOWC xmbrtowc_dummy
|
||||||
|
#define ZAPSTATE(mbs) ((void)(mbs))
|
||||||
/* function names */
|
/* function names */
|
||||||
#define LNAMES /* flag */
|
#define LNAMES /* flag */
|
||||||
|
|
||||||
#include "engine.c"
|
#include "engine.c"
|
||||||
|
|
||||||
|
/* multibyte character & large states version */
|
||||||
|
#undef LNAMES
|
||||||
|
#undef XMBRTOWC
|
||||||
|
#undef ZAPSTATE
|
||||||
|
#define XMBRTOWC xmbrtowc
|
||||||
|
#define ZAPSTATE(mbs) memset((mbs), 0, sizeof(*(mbs)))
|
||||||
|
#define MNAMES
|
||||||
|
|
||||||
|
#include "engine.c"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
- regexec - interface for matching
|
- regexec - interface for matching
|
||||||
= extern int regexec(const regex_t *, const char *, size_t, \
|
= extern int regexec(const regex_t *, const char *, size_t, \
|
||||||
|
@ -113,14 +211,13 @@ static int nope = 0; /* for use in asserts; shuts lint up */
|
||||||
* have been prototyped.
|
* have been prototyped.
|
||||||
*/
|
*/
|
||||||
int /* 0 success, REG_NOMATCH failure */
|
int /* 0 success, REG_NOMATCH failure */
|
||||||
regexec(preg, string, nmatch, pmatch, eflags)
|
regexec(const regex_t * __restrict preg,
|
||||||
const regex_t *preg;
|
const char * __restrict string,
|
||||||
const char *string;
|
size_t nmatch,
|
||||||
size_t nmatch;
|
regmatch_t pmatch[__restrict],
|
||||||
regmatch_t pmatch[];
|
int eflags)
|
||||||
int eflags;
|
|
||||||
{
|
{
|
||||||
register struct re_guts *g = preg->re_g;
|
struct re_guts *g = preg->re_g;
|
||||||
#ifdef REDEBUG
|
#ifdef REDEBUG
|
||||||
# define GOODFLAGS(f) (f)
|
# define GOODFLAGS(f) (f)
|
||||||
#else
|
#else
|
||||||
|
@ -134,7 +231,9 @@ int eflags;
|
||||||
return(REG_BADPAT);
|
return(REG_BADPAT);
|
||||||
eflags = GOODFLAGS(eflags);
|
eflags = GOODFLAGS(eflags);
|
||||||
|
|
||||||
if ((unsigned) g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
|
if (MB_CUR_MAX > 1)
|
||||||
|
return(mmatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||||
|
else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
|
||||||
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
|
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||||
else
|
else
|
||||||
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
|
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||||
|
|
|
@ -1,8 +1,51 @@
|
||||||
#include "winsup.h"
|
/*-
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)regfree.c 8.3 (Berkeley) 3/20/94
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(LIBC_SCCS) && !defined(lint)
|
||||||
|
static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94";
|
||||||
|
#endif /* LIBC_SCCS and not lint */
|
||||||
|
#include <sys/cdefs.h>
|
||||||
|
__FBSDID("$FreeBSD: src/lib/libc/regex/regfree.c,v 1.8 2007/06/11 03:05:54 delphij Exp $");
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "regex.h"
|
#include <limits.h>
|
||||||
|
#include <regex.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
#include <wctype.h>
|
||||||
|
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "regex2.h"
|
#include "regex2.h"
|
||||||
|
@ -12,10 +55,10 @@
|
||||||
= extern void regfree(regex_t *);
|
= extern void regfree(regex_t *);
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
regfree(preg)
|
regfree(regex_t *preg)
|
||||||
regex_t *preg;
|
|
||||||
{
|
{
|
||||||
register struct re_guts *g;
|
struct re_guts *g;
|
||||||
|
int i;
|
||||||
|
|
||||||
if (preg->re_magic != MAGIC1) /* oops */
|
if (preg->re_magic != MAGIC1) /* oops */
|
||||||
return; /* nice to complain, but hard */
|
return; /* nice to complain, but hard */
|
||||||
|
@ -28,11 +71,19 @@ regex_t *preg;
|
||||||
|
|
||||||
if (g->strip != NULL)
|
if (g->strip != NULL)
|
||||||
free((char *)g->strip);
|
free((char *)g->strip);
|
||||||
if (g->sets != NULL)
|
if (g->sets != NULL) {
|
||||||
|
for (i = 0; i < g->ncsets; i++) {
|
||||||
|
free(g->sets[i].ranges);
|
||||||
|
free(g->sets[i].wides);
|
||||||
|
free(g->sets[i].types);
|
||||||
|
}
|
||||||
free((char *)g->sets);
|
free((char *)g->sets);
|
||||||
if (g->setbits != NULL)
|
}
|
||||||
free((char *)g->setbits);
|
|
||||||
if (g->must != NULL)
|
if (g->must != NULL)
|
||||||
free(g->must);
|
free(g->must);
|
||||||
|
if (g->charjump != NULL)
|
||||||
|
free(&g->charjump[CHAR_MIN]);
|
||||||
|
if (g->matchjump != NULL)
|
||||||
|
free(g->matchjump);
|
||||||
free((char *)g);
|
free((char *)g);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,477 +0,0 @@
|
||||||
# regular expression test set
|
|
||||||
# Lines are at least three fields, separated by one or more tabs. "" stands
|
|
||||||
# for an empty field. First field is an RE. Second field is flags. If
|
|
||||||
# C flag given, regcomp() is expected to fail, and the third field is the
|
|
||||||
# error name (minus the leading REG_).
|
|
||||||
#
|
|
||||||
# Otherwise it is expected to succeed, and the third field is the string to
|
|
||||||
# try matching it against. If there is no fourth field, the match is
|
|
||||||
# expected to fail. If there is a fourth field, it is the substring that
|
|
||||||
# the RE is expected to match. If there is a fifth field, it is a comma-
|
|
||||||
# separated list of what the subexpressions should match, with - indicating
|
|
||||||
# no match for that one. In both the fourth and fifth fields, a (sub)field
|
|
||||||
# starting with @ indicates that the (sub)expression is expected to match
|
|
||||||
# a null string followed by the stuff after the @; this provides a way to
|
|
||||||
# test where null strings match. The character `N' in REs and strings
|
|
||||||
# is newline, `S' is space, `T' is tab, `Z' is NUL.
|
|
||||||
#
|
|
||||||
# The full list of flags:
|
|
||||||
# - placeholder, does nothing
|
|
||||||
# b RE is a BRE, not an ERE
|
|
||||||
# & try it as both an ERE and a BRE
|
|
||||||
# C regcomp() error expected, third field is error name
|
|
||||||
# i REG_ICASE
|
|
||||||
# m ("mundane") REG_NOSPEC
|
|
||||||
# s REG_NOSUB (not really testable)
|
|
||||||
# n REG_NEWLINE
|
|
||||||
# ^ REG_NOTBOL
|
|
||||||
# $ REG_NOTEOL
|
|
||||||
# # REG_STARTEND (see below)
|
|
||||||
# p REG_PEND
|
|
||||||
#
|
|
||||||
# For REG_STARTEND, the start/end offsets are those of the substring
|
|
||||||
# enclosed in ().
|
|
||||||
|
|
||||||
# basics
|
|
||||||
a & a a
|
|
||||||
abc & abc abc
|
|
||||||
abc|de - abc abc
|
|
||||||
a|b|c - abc a
|
|
||||||
|
|
||||||
# parentheses and perversions thereof
|
|
||||||
a(b)c - abc abc
|
|
||||||
a\(b\)c b abc abc
|
|
||||||
a( C EPAREN
|
|
||||||
a( b a( a(
|
|
||||||
a\( - a( a(
|
|
||||||
a\( bC EPAREN
|
|
||||||
a\(b bC EPAREN
|
|
||||||
a(b C EPAREN
|
|
||||||
a(b b a(b a(b
|
|
||||||
# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
|
|
||||||
a) - a) a)
|
|
||||||
) - ) )
|
|
||||||
# end gagging (in a just world, those *should* give EPAREN)
|
|
||||||
a) b a) a)
|
|
||||||
a\) bC EPAREN
|
|
||||||
\) bC EPAREN
|
|
||||||
a()b - ab ab
|
|
||||||
a\(\)b b ab ab
|
|
||||||
|
|
||||||
# anchoring and REG_NEWLINE
|
|
||||||
^abc$ & abc abc
|
|
||||||
a^b - a^b
|
|
||||||
a^b b a^b a^b
|
|
||||||
a$b - a$b
|
|
||||||
a$b b a$b a$b
|
|
||||||
^ & abc @abc
|
|
||||||
$ & abc @
|
|
||||||
^$ & "" @
|
|
||||||
$^ - "" @
|
|
||||||
\($\)\(^\) b "" @
|
|
||||||
# stop retching, those are legitimate (although disgusting)
|
|
||||||
^^ - "" @
|
|
||||||
$$ - "" @
|
|
||||||
b$ & abNc
|
|
||||||
b$ &n abNc b
|
|
||||||
^b$ & aNbNc
|
|
||||||
^b$ &n aNbNc b
|
|
||||||
^$ &n aNNb @Nb
|
|
||||||
^$ n abc
|
|
||||||
^$ n abcN @
|
|
||||||
$^ n aNNb @Nb
|
|
||||||
\($\)\(^\) bn aNNb @Nb
|
|
||||||
^^ n^ aNNb @Nb
|
|
||||||
$$ n aNNb @NN
|
|
||||||
^a ^ a
|
|
||||||
a$ $ a
|
|
||||||
^a ^n aNb
|
|
||||||
^b ^n aNb b
|
|
||||||
a$ $n bNa
|
|
||||||
b$ $n bNa b
|
|
||||||
a*(^b$)c* - b b
|
|
||||||
a*\(^b$\)c* b b b
|
|
||||||
|
|
||||||
# certain syntax errors and non-errors
|
|
||||||
| C EMPTY
|
|
||||||
| b | |
|
|
||||||
* C BADRPT
|
|
||||||
* b * *
|
|
||||||
+ C BADRPT
|
|
||||||
? C BADRPT
|
|
||||||
"" &C EMPTY
|
|
||||||
() - abc @abc
|
|
||||||
\(\) b abc @abc
|
|
||||||
a||b C EMPTY
|
|
||||||
|ab C EMPTY
|
|
||||||
ab| C EMPTY
|
|
||||||
(|a)b C EMPTY
|
|
||||||
(a|)b C EMPTY
|
|
||||||
(*a) C BADRPT
|
|
||||||
(+a) C BADRPT
|
|
||||||
(?a) C BADRPT
|
|
||||||
({1}a) C BADRPT
|
|
||||||
\(\{1\}a\) bC BADRPT
|
|
||||||
(a|*b) C BADRPT
|
|
||||||
(a|+b) C BADRPT
|
|
||||||
(a|?b) C BADRPT
|
|
||||||
(a|{1}b) C BADRPT
|
|
||||||
^* C BADRPT
|
|
||||||
^* b * *
|
|
||||||
^+ C BADRPT
|
|
||||||
^? C BADRPT
|
|
||||||
^{1} C BADRPT
|
|
||||||
^\{1\} bC BADRPT
|
|
||||||
|
|
||||||
# metacharacters, backslashes
|
|
||||||
a.c & abc abc
|
|
||||||
a[bc]d & abd abd
|
|
||||||
a\*c & a*c a*c
|
|
||||||
a\\b & a\b a\b
|
|
||||||
a\\\*b & a\*b a\*b
|
|
||||||
a\bc & abc abc
|
|
||||||
a\ &C EESCAPE
|
|
||||||
a\\bc & a\bc a\bc
|
|
||||||
\{ bC BADRPT
|
|
||||||
a\[b & a[b a[b
|
|
||||||
a[b &C EBRACK
|
|
||||||
# trailing $ is a peculiar special case for the BRE code
|
|
||||||
a$ & a a
|
|
||||||
a$ & a$
|
|
||||||
a\$ & a
|
|
||||||
a\$ & a$ a$
|
|
||||||
a\\$ & a
|
|
||||||
a\\$ & a$
|
|
||||||
a\\$ & a\$
|
|
||||||
a\\$ & a\ a\
|
|
||||||
|
|
||||||
# back references, ugh
|
|
||||||
a\(b\)\2c bC ESUBREG
|
|
||||||
a\(b\1\)c bC ESUBREG
|
|
||||||
a\(b*\)c\1d b abbcbbd abbcbbd bb
|
|
||||||
a\(b*\)c\1d b abbcbd
|
|
||||||
a\(b*\)c\1d b abbcbbbd
|
|
||||||
^\(.\)\1 b abc
|
|
||||||
a\([bc]\)\1d b abcdabbd abbd b
|
|
||||||
a\(\([bc]\)\2\)*d b abbccd abbccd
|
|
||||||
a\(\([bc]\)\2\)*d b abbcbd
|
|
||||||
# actually, this next one probably ought to fail, but the spec is unclear
|
|
||||||
a\(\(b\)*\2\)*d b abbbd abbbd
|
|
||||||
# here is a case that no NFA implementation does right
|
|
||||||
\(ab*\)[ab]*\1 b ababaaa ababaaa a
|
|
||||||
# check out normal matching in the presence of back refs
|
|
||||||
\(a\)\1bcd b aabcd aabcd
|
|
||||||
\(a\)\1bc*d b aabcd aabcd
|
|
||||||
\(a\)\1bc*d b aabd aabd
|
|
||||||
\(a\)\1bc*d b aabcccd aabcccd
|
|
||||||
\(a\)\1bc*[ce]d b aabcccd aabcccd
|
|
||||||
^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd
|
|
||||||
|
|
||||||
# ordinary repetitions
|
|
||||||
ab*c & abc abc
|
|
||||||
ab+c - abc abc
|
|
||||||
ab?c - abc abc
|
|
||||||
a\(*\)b b a*b a*b
|
|
||||||
a\(**\)b b ab ab
|
|
||||||
a\(***\)b bC BADRPT
|
|
||||||
*a b *a *a
|
|
||||||
**a b a a
|
|
||||||
***a bC BADRPT
|
|
||||||
|
|
||||||
# the dreaded bounded repetitions
|
|
||||||
{ & { {
|
|
||||||
{abc & {abc {abc
|
|
||||||
{1 C BADRPT
|
|
||||||
{1} C BADRPT
|
|
||||||
a{b & a{b a{b
|
|
||||||
a{1}b - ab ab
|
|
||||||
a\{1\}b b ab ab
|
|
||||||
a{1,}b - ab ab
|
|
||||||
a\{1,\}b b ab ab
|
|
||||||
a{1,2}b - aab aab
|
|
||||||
a\{1,2\}b b aab aab
|
|
||||||
a{1 C EBRACE
|
|
||||||
a\{1 bC EBRACE
|
|
||||||
a{1a C EBRACE
|
|
||||||
a\{1a bC EBRACE
|
|
||||||
a{1a} C BADBR
|
|
||||||
a\{1a\} bC BADBR
|
|
||||||
a{,2} - a{,2} a{,2}
|
|
||||||
a\{,2\} bC BADBR
|
|
||||||
a{,} - a{,} a{,}
|
|
||||||
a\{,\} bC BADBR
|
|
||||||
a{1,x} C BADBR
|
|
||||||
a\{1,x\} bC BADBR
|
|
||||||
a{1,x C EBRACE
|
|
||||||
a\{1,x bC EBRACE
|
|
||||||
a{300} C BADBR
|
|
||||||
a\{300\} bC BADBR
|
|
||||||
a{1,0} C BADBR
|
|
||||||
a\{1,0\} bC BADBR
|
|
||||||
ab{0,0}c - abcac ac
|
|
||||||
ab\{0,0\}c b abcac ac
|
|
||||||
ab{0,1}c - abcac abc
|
|
||||||
ab\{0,1\}c b abcac abc
|
|
||||||
ab{0,3}c - abbcac abbc
|
|
||||||
ab\{0,3\}c b abbcac abbc
|
|
||||||
ab{1,1}c - acabc abc
|
|
||||||
ab\{1,1\}c b acabc abc
|
|
||||||
ab{1,3}c - acabc abc
|
|
||||||
ab\{1,3\}c b acabc abc
|
|
||||||
ab{2,2}c - abcabbc abbc
|
|
||||||
ab\{2,2\}c b abcabbc abbc
|
|
||||||
ab{2,4}c - abcabbc abbc
|
|
||||||
ab\{2,4\}c b abcabbc abbc
|
|
||||||
((a{1,10}){1,10}){1,10} - a a a,a
|
|
||||||
|
|
||||||
# multiple repetitions
|
|
||||||
a** &C BADRPT
|
|
||||||
a++ C BADRPT
|
|
||||||
a?? C BADRPT
|
|
||||||
a*+ C BADRPT
|
|
||||||
a*? C BADRPT
|
|
||||||
a+* C BADRPT
|
|
||||||
a+? C BADRPT
|
|
||||||
a?* C BADRPT
|
|
||||||
a?+ C BADRPT
|
|
||||||
a{1}{1} C BADRPT
|
|
||||||
a*{1} C BADRPT
|
|
||||||
a+{1} C BADRPT
|
|
||||||
a?{1} C BADRPT
|
|
||||||
a{1}* C BADRPT
|
|
||||||
a{1}+ C BADRPT
|
|
||||||
a{1}? C BADRPT
|
|
||||||
a*{b} - a{b} a{b}
|
|
||||||
a\{1\}\{1\} bC BADRPT
|
|
||||||
a*\{1\} bC BADRPT
|
|
||||||
a\{1\}* bC BADRPT
|
|
||||||
|
|
||||||
# brackets, and numerous perversions thereof
|
|
||||||
a[b]c & abc abc
|
|
||||||
a[ab]c & abc abc
|
|
||||||
a[^ab]c & adc adc
|
|
||||||
a[]b]c & a]c a]c
|
|
||||||
a[[b]c & a[c a[c
|
|
||||||
a[-b]c & a-c a-c
|
|
||||||
a[^]b]c & adc adc
|
|
||||||
a[^-b]c & adc adc
|
|
||||||
a[b-]c & a-c a-c
|
|
||||||
a[b &C EBRACK
|
|
||||||
a[] &C EBRACK
|
|
||||||
a[1-3]c & a2c a2c
|
|
||||||
a[3-1]c &C ERANGE
|
|
||||||
a[1-3-5]c &C ERANGE
|
|
||||||
a[[.-.]--]c & a-c a-c
|
|
||||||
a[1- &C ERANGE
|
|
||||||
a[[. &C EBRACK
|
|
||||||
a[[.x &C EBRACK
|
|
||||||
a[[.x. &C EBRACK
|
|
||||||
a[[.x.] &C EBRACK
|
|
||||||
a[[.x.]] & ax ax
|
|
||||||
a[[.x,.]] &C ECOLLATE
|
|
||||||
a[[.one.]]b & a1b a1b
|
|
||||||
a[[.notdef.]]b &C ECOLLATE
|
|
||||||
a[[.].]]b & a]b a]b
|
|
||||||
a[[:alpha:]]c & abc abc
|
|
||||||
a[[:notdef:]]c &C ECTYPE
|
|
||||||
a[[: &C EBRACK
|
|
||||||
a[[:alpha &C EBRACK
|
|
||||||
a[[:alpha:] &C EBRACK
|
|
||||||
a[[:alpha,:] &C ECTYPE
|
|
||||||
a[[:]:]]b &C ECTYPE
|
|
||||||
a[[:-:]]b &C ECTYPE
|
|
||||||
a[[:alph:]] &C ECTYPE
|
|
||||||
a[[:alphabet:]] &C ECTYPE
|
|
||||||
[[:alnum:]]+ - -%@a0X- a0X
|
|
||||||
[[:alpha:]]+ - -%@aX0- aX
|
|
||||||
[[:blank:]]+ - aSSTb SST
|
|
||||||
[[:cntrl:]]+ - aNTb NT
|
|
||||||
[[:digit:]]+ - a019b 019
|
|
||||||
[[:graph:]]+ - Sa%bS a%b
|
|
||||||
[[:lower:]]+ - AabC ab
|
|
||||||
[[:print:]]+ - NaSbN aSb
|
|
||||||
[[:punct:]]+ - S%-&T %-&
|
|
||||||
[[:space:]]+ - aSNTb SNT
|
|
||||||
[[:upper:]]+ - aBCd BC
|
|
||||||
[[:xdigit:]]+ - p0f3Cq 0f3C
|
|
||||||
a[[=b=]]c & abc abc
|
|
||||||
a[[= &C EBRACK
|
|
||||||
a[[=b &C EBRACK
|
|
||||||
a[[=b= &C EBRACK
|
|
||||||
a[[=b=] &C EBRACK
|
|
||||||
a[[=b,=]] &C ECOLLATE
|
|
||||||
a[[=one=]]b & a1b a1b
|
|
||||||
|
|
||||||
# complexities
|
|
||||||
a(((b)))c - abc abc
|
|
||||||
a(b|(c))d - abd abd
|
|
||||||
a(b*|c)d - abbd abbd
|
|
||||||
# just gotta have one DFA-buster, of course
|
|
||||||
a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
|
|
||||||
# and an inline expansion in case somebody gets tricky
|
|
||||||
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
|
|
||||||
# and in case somebody just slips in an NFA...
|
|
||||||
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
|
|
||||||
# fish for anomalies as the number of states passes 32
|
|
||||||
12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789
|
|
||||||
123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890
|
|
||||||
1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901
|
|
||||||
12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012
|
|
||||||
123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
|
|
||||||
# and one really big one, beyond any plausible word width
|
|
||||||
1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
|
|
||||||
# fish for problems as brackets go past 8
|
|
||||||
[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
|
|
||||||
[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
|
|
||||||
[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
|
|
||||||
[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
|
|
||||||
|
|
||||||
# subtleties of matching
|
|
||||||
abc & xabcy abc
|
|
||||||
a\(b\)?c\1d b acd
|
|
||||||
aBc i Abc Abc
|
|
||||||
a[Bc]*d i abBCcd abBCcd
|
|
||||||
0[[:upper:]]1 &i 0a1 0a1
|
|
||||||
0[[:lower:]]1 &i 0A1 0A1
|
|
||||||
a[^b]c &i abc
|
|
||||||
a[^b]c &i aBc
|
|
||||||
a[^b]c &i adc adc
|
|
||||||
[a]b[c] - abc abc
|
|
||||||
[a]b[a] - aba aba
|
|
||||||
[abc]b[abc] - abc abc
|
|
||||||
[abc]b[abd] - abd abd
|
|
||||||
a(b?c)+d - accd accd
|
|
||||||
(wee|week)(knights|night) - weeknights weeknights
|
|
||||||
(we|wee|week|frob)(knights|night|day) - weeknights weeknights
|
|
||||||
a[bc]d - xyzaaabcaababdacd abd
|
|
||||||
a[ab]c - aaabc abc
|
|
||||||
abc s abc abc
|
|
||||||
a* & b @b
|
|
||||||
|
|
||||||
# Let's have some fun -- try to match a C comment.
|
|
||||||
# first the obvious, which looks okay at first glance...
|
|
||||||
/\*.*\*/ - /*x*/ /*x*/
|
|
||||||
# but...
|
|
||||||
/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/
|
|
||||||
# okay, we must not match */ inside; try to do that...
|
|
||||||
/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/
|
|
||||||
/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/
|
|
||||||
# but...
|
|
||||||
/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/
|
|
||||||
# and a still fancier version, which does it right (I think)...
|
|
||||||
/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/
|
|
||||||
/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/
|
|
||||||
/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/
|
|
||||||
/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/
|
|
||||||
/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/
|
|
||||||
/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/
|
|
||||||
|
|
||||||
# subexpressions
|
|
||||||
.* - abc abc -
|
|
||||||
a(b)(c)d - abcd abcd b,c
|
|
||||||
a(((b)))c - abc abc b,b,b
|
|
||||||
a(b|(c))d - abd abd b,-
|
|
||||||
a(b*|c|e)d - abbd abbd bb
|
|
||||||
a(b*|c|e)d - acd acd c
|
|
||||||
a(b*|c|e)d - ad ad @d
|
|
||||||
a(b?)c - abc abc b
|
|
||||||
a(b?)c - ac ac @c
|
|
||||||
a(b+)c - abc abc b
|
|
||||||
a(b+)c - abbbc abbbc bbb
|
|
||||||
a(b*)c - ac ac @c
|
|
||||||
(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de
|
|
||||||
# the regression tester only asks for 9 subexpressions
|
|
||||||
a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j
|
|
||||||
a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k
|
|
||||||
a([bc]?)c - abc abc b
|
|
||||||
a([bc]?)c - ac ac @c
|
|
||||||
a([bc]+)c - abc abc b
|
|
||||||
a([bc]+)c - abcc abcc bc
|
|
||||||
a([bc]+)bc - abcbc abcbc bc
|
|
||||||
a(bb+|b)b - abb abb b
|
|
||||||
a(bbb+|bb+|b)b - abb abb b
|
|
||||||
a(bbb+|bb+|b)b - abbb abbb bb
|
|
||||||
a(bbb+|bb+|b)bb - abbb abbb b
|
|
||||||
(.*).* - abcdef abcdef abcdef
|
|
||||||
(a*)* - bc @b @b
|
|
||||||
|
|
||||||
# do we get the right subexpression when it is used more than once?
|
|
||||||
a(b|c)*d - ad ad -
|
|
||||||
a(b|c)*d - abcd abcd c
|
|
||||||
a(b|c)+d - abd abd b
|
|
||||||
a(b|c)+d - abcd abcd c
|
|
||||||
a(b|c?)+d - ad ad @d
|
|
||||||
a(b|c?)+d - abcd abcd @d
|
|
||||||
a(b|c){0,0}d - ad ad -
|
|
||||||
a(b|c){0,1}d - ad ad -
|
|
||||||
a(b|c){0,1}d - abd abd b
|
|
||||||
a(b|c){0,2}d - ad ad -
|
|
||||||
a(b|c){0,2}d - abcd abcd c
|
|
||||||
a(b|c){0,}d - ad ad -
|
|
||||||
a(b|c){0,}d - abcd abcd c
|
|
||||||
a(b|c){1,1}d - abd abd b
|
|
||||||
a(b|c){1,1}d - acd acd c
|
|
||||||
a(b|c){1,2}d - abd abd b
|
|
||||||
a(b|c){1,2}d - abcd abcd c
|
|
||||||
a(b|c){1,}d - abd abd b
|
|
||||||
a(b|c){1,}d - abcd abcd c
|
|
||||||
a(b|c){2,2}d - acbd acbd b
|
|
||||||
a(b|c){2,2}d - abcd abcd c
|
|
||||||
a(b|c){2,4}d - abcd abcd c
|
|
||||||
a(b|c){2,4}d - abcbd abcbd b
|
|
||||||
a(b|c){2,4}d - abcbcd abcbcd c
|
|
||||||
a(b|c){2,}d - abcd abcd c
|
|
||||||
a(b|c){2,}d - abcbd abcbd b
|
|
||||||
a(b+|((c)*))+d - abd abd @d,@d,-
|
|
||||||
a(b+|((c)*))+d - abcd abcd @d,@d,-
|
|
||||||
|
|
||||||
# check out the STARTEND option
|
|
||||||
[abc] &# a(b)c b
|
|
||||||
[abc] &# a(d)c
|
|
||||||
[abc] &# a(bc)d b
|
|
||||||
[abc] &# a(dc)d c
|
|
||||||
. &# a()c
|
|
||||||
b.*c &# b(bc)c bc
|
|
||||||
b.* &# b(bc)c bc
|
|
||||||
.*c &# b(bc)c bc
|
|
||||||
|
|
||||||
# plain strings, with the NOSPEC flag
|
|
||||||
abc m abc abc
|
|
||||||
abc m xabcy abc
|
|
||||||
abc m xyz
|
|
||||||
a*b m aba*b a*b
|
|
||||||
a*b m ab
|
|
||||||
"" mC EMPTY
|
|
||||||
|
|
||||||
# cases involving NULs
|
|
||||||
aZb & a a
|
|
||||||
aZb &p a
|
|
||||||
aZb &p# (aZb) aZb
|
|
||||||
aZ*b &p# (ab) ab
|
|
||||||
a.b &# (aZb) aZb
|
|
||||||
a.* &# (aZb)c aZb
|
|
||||||
|
|
||||||
# word boundaries (ick)
|
|
||||||
[[:<:]]a & a a
|
|
||||||
[[:<:]]a & ba
|
|
||||||
[[:<:]]a & -a a
|
|
||||||
a[[:>:]] & a a
|
|
||||||
a[[:>:]] & ab
|
|
||||||
a[[:>:]] & a- a
|
|
||||||
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
|
|
||||||
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
|
|
||||||
[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
|
|
||||||
[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc
|
|
||||||
[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_
|
|
||||||
[[:<:]]a_b[[:>:]] & x_a_b
|
|
||||||
|
|
||||||
# past problems, and suspected problems
|
|
||||||
(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1
|
|
||||||
abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
|
|
||||||
abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
|
|
||||||
(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11
|
|
||||||
CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11
|
|
||||||
Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz
|
|
||||||
a?b - ab ab
|
|
||||||
-\{0,1\}[0-9]*$ b -5 -5
|
|
||||||
a*a*a*a*a*a*a* & aaaaaa aaaaaa
|
|
|
@ -1,9 +1,41 @@
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||||
|
* Copyright (c) 1992, 1993, 1994
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to Berkeley by
|
||||||
|
* Henry Spencer.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)utils.h 8.3 (Berkeley) 3/20/94
|
||||||
|
* $FreeBSD: src/lib/libc/regex/utils.h,v 1.3 2007/01/09 00:28:04 imp Exp $
|
||||||
|
*/
|
||||||
|
|
||||||
/* utility definitions */
|
/* utility definitions */
|
||||||
#ifdef _POSIX2_RE_DUP_MAX
|
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
|
||||||
#define DUPMAX _POSIX2_RE_DUP_MAX
|
|
||||||
#else
|
|
||||||
#define DUPMAX 255
|
|
||||||
#endif
|
|
||||||
#define INFINITY (DUPMAX + 1)
|
#define INFINITY (DUPMAX + 1)
|
||||||
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
||||||
typedef unsigned char uch;
|
typedef unsigned char uch;
|
||||||
|
|
|
@ -49,6 +49,7 @@ extern char *__locale_charset ();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __INSIDE_CYGWIN__
|
#ifdef __INSIDE_CYGWIN__
|
||||||
|
#ifdef __cplusplus
|
||||||
size_t __stdcall sys_cp_wcstombs (wctomb_p, const char *, char *, size_t,
|
size_t __stdcall sys_cp_wcstombs (wctomb_p, const char *, char *, size_t,
|
||||||
const wchar_t *, size_t = (size_t) -1)
|
const wchar_t *, size_t = (size_t) -1)
|
||||||
__attribute__ ((regparm(3)));
|
__attribute__ ((regparm(3)));
|
||||||
|
@ -68,6 +69,7 @@ size_t __stdcall sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src,
|
||||||
size_t __stdcall sys_mbstowcs_alloc (wchar_t **, int, const char *,
|
size_t __stdcall sys_mbstowcs_alloc (wchar_t **, int, const char *,
|
||||||
size_t = (size_t) -1)
|
size_t = (size_t) -1)
|
||||||
__attribute__ ((regparm(3)));
|
__attribute__ ((regparm(3)));
|
||||||
|
#endif /* __cplusplus */
|
||||||
#endif /* __INSIDE_CYGWIN__ */
|
#endif /* __INSIDE_CYGWIN__ */
|
||||||
|
|
||||||
#endif /* _CYGWIN_WCHAR_H */
|
#endif /* _CYGWIN_WCHAR_H */
|
||||||
|
|
Loading…
Reference in New Issue