Cygwin: replace regex with latest verbatim FreeBSD version

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2023-03-16 11:12:08 +01:00
parent 2a4dd6a239
commit 24f34edc2a
12 changed files with 1101 additions and 668 deletions

View File

@ -1,4 +1,6 @@
/*- /*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1992 Henry Spencer. * Copyright (c) 1992 Henry Spencer.
* Copyright (c) 1992, 1993 * Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved. * The Regents of the University of California. All rights reserved.
@ -14,7 +16,7 @@
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
@ -38,32 +40,20 @@
#define _REGEX_H_ #define _REGEX_H_
#include <sys/cdefs.h> #include <sys/cdefs.h>
#include <_ansi.h>
#include <sys/_types.h> #include <sys/_types.h>
/* types */ /* types */
#ifdef __CYGWIN__
typedef _off_t regoff_t;
#define __need_size_t
#include <stddef.h>
#else /* !__CYGWIN__ */
typedef __off_t regoff_t; typedef __off_t regoff_t;
#ifndef _SIZE_T_DECLARED #ifndef _SIZE_T_DECLARED
typedef __size_t size_t; typedef __size_t size_t;
#define _SIZE_T_DECLARED #define _SIZE_T_DECLARED
#endif #endif
#endif /* !__CYGWIN__ */
typedef struct { typedef struct {
int re_magic; int re_magic;
size_t re_nsub; /* number of parenthesized subexpressions */ size_t re_nsub; /* number of parenthesized subexpressions */
#ifdef __CYGWIN__
const char *re_endp; /* end pointer for REG_PEND */ const char *re_endp; /* end pointer for REG_PEND */
#else
__const char *re_endp; /* end pointer for REG_PEND */
#endif
struct re_guts *re_g; /* none of your business :-) */ struct re_guts *re_g; /* none of your business :-) */
} regex_t; } regex_t;
@ -81,12 +71,10 @@ typedef struct {
#define REG_NOSPEC 0020 #define REG_NOSPEC 0020
#define REG_PEND 0040 #define REG_PEND 0040
#define REG_DUMP 0200 #define REG_DUMP 0200
#define REG_POSIX 0400 /* only POSIX-compliant regex (libregex) */
/* regerror() flags */ /* regerror() flags */
#define REG_ENOSYS (-1) #define REG_ENOSYS (-1)
#ifdef __CYGWIN__
#define REG_NOERROR 0 /* GNU extension */
#endif
#define REG_NOMATCH 1 #define REG_NOMATCH 1
#define REG_BADPAT 2 #define REG_BADPAT 2
#define REG_ECOLLATE 3 #define REG_ECOLLATE 3

View File

@ -32,6 +32,10 @@ to the following restrictions:
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors * 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.

View File

@ -1,4 +1,6 @@
/*- /*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994 * Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved. * The Regents of the University of California. All rights reserved.
@ -14,7 +16,7 @@
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
@ -31,7 +33,7 @@
* SUCH DAMAGE. * SUCH DAMAGE.
* *
* @(#)cname.h 8.3 (Berkeley) 3/20/94 * @(#)cname.h 8.3 (Berkeley) 3/20/94
* $FreeBSD: src/lib/libc/regex/cname.h,v 1.4 2007/01/09 00:28:04 imp Exp $ * $FreeBSD$
*/ */
/* character-name table */ /* character-name table */
@ -108,7 +110,7 @@ static struct cname {
{"four", '4'}, {"four", '4'},
{"five", '5'}, {"five", '5'},
{"six", '6'}, {"six", '6'},
{"seven", '7'}, {"seven", '7'},
{"eight", '8'}, {"eight", '8'},
{"nine", '9'}, {"nine", '9'},
{"colon", ':'}, {"colon", ':'},

View File

@ -1,4 +1,6 @@
/*- /*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994 * Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved. * The Regents of the University of California. All rights reserved.
@ -14,7 +16,7 @@
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
@ -34,7 +36,9 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds Exp $"); __FBSDID("$FreeBSD$");
#include <stdbool.h>
/* /*
* The matching engine and friends. This file is #included by regexec.c * The matching engine and friends. This file is #included by regexec.c
@ -44,9 +48,9 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds E
*/ */
#ifdef SNAMES #ifdef SNAMES
#define stepback sstepback
#define matcher smatcher #define matcher smatcher
#define fast sfast #define walk swalk
#define slow sslow
#define dissect sdissect #define dissect sdissect
#define backref sbackref #define backref sbackref
#define step sstep #define step sstep
@ -55,9 +59,9 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds E
#define match smat #define match smat
#endif #endif
#ifdef LNAMES #ifdef LNAMES
#define stepback lstepback
#define matcher lmatcher #define matcher lmatcher
#define fast lfast #define walk lwalk
#define slow lslow
#define dissect ldissect #define dissect ldissect
#define backref lbackref #define backref lbackref
#define step lstep #define step lstep
@ -66,9 +70,9 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds E
#define match lmat #define match lmat
#endif #endif
#ifdef MNAMES #ifdef MNAMES
#define stepback mstepback
#define matcher mmatcher #define matcher mmatcher
#define fast mfast #define walk mwalk
#define slow mslow
#define dissect mdissect #define dissect mdissect
#define backref mbackref #define backref mbackref
#define step mstep #define step mstep
@ -104,9 +108,8 @@ extern "C" {
static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int); static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast);
static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags);
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
#define MAX_RECURSION 100 #define MAX_RECURSION 100
#define BOL (OUT-1) #define BOL (OUT-1)
#define EOL (BOL-1) #define EOL (BOL-1)
@ -115,10 +118,12 @@ static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_
#define BOW (BOL-4) #define BOW (BOL-4)
#define EOW (BOL-5) #define EOW (BOL-5)
#define BADCHAR (BOL-6) #define BADCHAR (BOL-6)
/* When using wint_t, which is defined as unsigned int on BSD, #define NWBND (BOL-7)
as well as on Cygwin or Linux, the NONCHAR test is broken without #define NONCHAR(c) ((c) <= OUT)
the below cast. I'm wondering how this is supposed to work at all... */ /* sflags */
#define NONCHAR(c) ((int)(c) <= OUT) #define SBOS 0x0001
#define SEOS 0x0002
#ifdef REDEBUG #ifdef REDEBUG
static void print(struct match *m, const char *caption, states st, int ch, FILE *d); static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
#endif #endif
@ -144,6 +149,39 @@ static const char *pchar(int ch);
#define NOTE(s) /* nothing */ #define NOTE(s) /* nothing */
#endif #endif
/*
* Given a multibyte string pointed to by start, step back nchar characters
* from current position pointed to by cur.
*/
static const char *
stepback(const char *start, const char *cur, int nchar)
{
const char *ret;
int wc, mbc;
mbstate_t mbs;
size_t clen;
if (MB_CUR_MAX == 1)
return ((cur - nchar) > start ? cur - nchar : NULL);
ret = cur;
for (wc = nchar; wc > 0; wc--) {
for (mbc = 1; mbc <= MB_CUR_MAX; mbc++) {
if ((ret - mbc) < start)
return (NULL);
memset(&mbs, 0, sizeof(mbs));
clen = mbrtowc(NULL, ret - mbc, mbc, &mbs);
if (clen != (size_t)-1 && clen != (size_t)-2)
break;
}
if (mbc > MB_CUR_MAX)
return (NULL);
ret -= mbc;
}
return (ret);
}
/* /*
- matcher - the actual matching engine - matcher - the actual matching engine
== static int matcher(struct re_guts *g, const char *string, \ == static int matcher(struct re_guts *g, const char *string, \
@ -157,7 +195,7 @@ matcher(struct re_guts *g,
int eflags) int eflags)
{ {
const char *endp; const char *endp;
int i; size_t i;
struct match mv; struct match mv;
struct match *m = &mv; struct match *m = &mv;
const char *dp = NULL; const char *dp = NULL;
@ -247,17 +285,19 @@ matcher(struct re_guts *g,
ZAPSTATE(&m->mbs); ZAPSTATE(&m->mbs);
/* Adjust start according to moffset, to speed things up */ /* Adjust start according to moffset, to speed things up */
#ifndef MNAMES if (dp != NULL && g->moffset > -1) {
/* The code evaluating moffset doesn't seem to work right const char *nstart;
in the multibyte case. */
if (g->moffset > -1) nstart = stepback(start, dp, g->moffset);
start = ((dp - g->moffset) < start) ? start : dp - g->moffset; if (nstart != NULL)
#endif start = nstart;
}
SP("mloop", m->st, *start); SP("mloop", m->st, *start);
/* this loop does only one repetition except for backrefs */ /* this loop does only one repetition except for backrefs */
for (;;) { for (;;) {
endp = fast(m, start, stop, gf, gl); endp = walk(m, start, stop, gf, gl, true);
if (endp == NULL) { /* a miss */ if (endp == NULL) { /* a miss */
if (m->pmatch != NULL) if (m->pmatch != NULL)
free((char *)m->pmatch); free((char *)m->pmatch);
@ -273,7 +313,7 @@ matcher(struct re_guts *g,
assert(m->coldp != NULL); assert(m->coldp != NULL);
for (;;) { for (;;) {
NOTE("finding start"); NOTE("finding start");
endp = slow(m, m->coldp, stop, gf, gl); endp = walk(m, m->coldp, stop, gf, gl, false);
if (endp != NULL) if (endp != NULL)
break; break;
assert(m->coldp < m->endp); assert(m->coldp < m->endp);
@ -318,7 +358,7 @@ matcher(struct re_guts *g,
if (dp != NULL || endp <= m->coldp) if (dp != NULL || endp <= m->coldp)
break; /* defeat */ break; /* defeat */
NOTE("backoff"); NOTE("backoff");
endp = slow(m, m->coldp, endp-1, gf, gl); endp = walk(m, m->coldp, endp-1, gf, gl, false);
if (endp == NULL) if (endp == NULL)
break; /* defeat */ break; /* defeat */
/* try it on a shorter possibility */ /* try it on a shorter possibility */
@ -391,7 +431,7 @@ dissect(struct match *m,
const char *ssp; /* start of string matched by subsubRE */ const char *ssp; /* start of string matched by subsubRE */
const char *sep; /* end of string matched by subsubRE */ const char *sep; /* end of string matched by subsubRE */
const char *oldssp; /* previous ssp */ const char *oldssp; /* previous ssp */
const char *dp __attribute__ ((unused)); const char *dp __unused;
AT("diss", start, stop, startst, stopst); AT("diss", start, stop, startst, stopst);
sp = start; sp = start;
@ -404,7 +444,7 @@ dissect(struct match *m,
es += OPND(m->g->strip[es]); es += OPND(m->g->strip[es]);
break; break;
case OCH_: case OCH_:
while (OP(m->g->strip[es]) != O_CH) while (OP(m->g->strip[es]) != (sop)O_CH)
es += OPND(m->g->strip[es]); es += OPND(m->g->strip[es]);
break; break;
} }
@ -422,6 +462,10 @@ dissect(struct match *m,
case OEOL: case OEOL:
case OBOW: case OBOW:
case OEOW: case OEOW:
case OBOS:
case OEOS:
case OWBND:
case ONWBND:
break; break;
case OANY: case OANY:
case OANYOF: case OANYOF:
@ -436,10 +480,10 @@ dissect(struct match *m,
stp = stop; stp = stop;
for (;;) { for (;;) {
/* how long could this one be? */ /* how long could this one be? */
rest = slow(m, sp, stp, ss, es); rest = walk(m, sp, stp, ss, es, false);
assert(rest != NULL); /* it did match */ assert(rest != NULL); /* it did match */
/* could the rest match the rest? */ /* could the rest match the rest? */
tail = slow(m, rest, stop, es, stopst); tail = walk(m, rest, stop, es, stopst, false);
if (tail == stop) if (tail == stop)
break; /* yes! */ break; /* yes! */
/* no -- try a shorter match for this one */ /* no -- try a shorter match for this one */
@ -449,7 +493,7 @@ dissect(struct match *m,
ssub = ss + 1; ssub = ss + 1;
esub = es - 1; esub = es - 1;
/* did innards match? */ /* did innards match? */
if (slow(m, sp, rest, ssub, esub) != NULL) { if (walk(m, sp, rest, ssub, esub, false) != NULL) {
dp = dissect(m, sp, rest, ssub, esub); dp = dissect(m, sp, rest, ssub, esub);
assert(dp == rest); assert(dp == rest);
} else /* no */ } else /* no */
@ -460,10 +504,10 @@ dissect(struct match *m,
stp = stop; stp = stop;
for (;;) { for (;;) {
/* how long could this one be? */ /* how long could this one be? */
rest = slow(m, sp, stp, ss, es); rest = walk(m, sp, stp, ss, es, false);
assert(rest != NULL); /* it did match */ assert(rest != NULL); /* it did match */
/* could the rest match the rest? */ /* could the rest match the rest? */
tail = slow(m, rest, stop, es, stopst); tail = walk(m, rest, stop, es, stopst, false);
if (tail == stop) if (tail == stop)
break; /* yes! */ break; /* yes! */
/* no -- try a shorter match for this one */ /* no -- try a shorter match for this one */
@ -475,7 +519,7 @@ dissect(struct match *m,
ssp = sp; ssp = sp;
oldssp = ssp; oldssp = ssp;
for (;;) { /* find last match of innards */ for (;;) { /* find last match of innards */
sep = slow(m, ssp, rest, ssub, esub); sep = walk(m, ssp, rest, ssub, esub, false);
if (sep == NULL || sep == ssp) if (sep == NULL || sep == ssp)
break; /* failed or matched null */ break; /* failed or matched null */
oldssp = ssp; /* on to next try */ oldssp = ssp; /* on to next try */
@ -487,7 +531,7 @@ dissect(struct match *m,
ssp = oldssp; ssp = oldssp;
} }
assert(sep == rest); /* must exhaust substring */ assert(sep == rest); /* must exhaust substring */
assert(slow(m, ssp, sep, ssub, esub) == rest); assert(walk(m, ssp, sep, ssub, esub, false) == rest);
dp = dissect(m, ssp, sep, ssub, esub); dp = dissect(m, ssp, sep, ssub, esub);
assert(dp == sep); assert(dp == sep);
sp = rest; sp = rest;
@ -496,10 +540,10 @@ dissect(struct match *m,
stp = stop; stp = stop;
for (;;) { for (;;) {
/* how long could this one be? */ /* how long could this one be? */
rest = slow(m, sp, stp, ss, es); rest = walk(m, sp, stp, ss, es, false);
assert(rest != NULL); /* it did match */ assert(rest != NULL); /* it did match */
/* could the rest match the rest? */ /* could the rest match the rest? */
tail = slow(m, rest, stop, es, stopst); tail = walk(m, rest, stop, es, stopst, false);
if (tail == stop) if (tail == stop)
break; /* yes! */ break; /* yes! */
/* no -- try a shorter match for this one */ /* no -- try a shorter match for this one */
@ -510,7 +554,7 @@ dissect(struct match *m,
esub = ss + OPND(m->g->strip[ss]) - 1; esub = ss + OPND(m->g->strip[ss]) - 1;
assert(OP(m->g->strip[esub]) == OOR1); assert(OP(m->g->strip[esub]) == OOR1);
for (;;) { /* find first matching branch */ for (;;) { /* find first matching branch */
if (slow(m, sp, rest, ssub, esub) == rest) if (walk(m, sp, rest, ssub, esub, false) == rest)
break; /* it matched all of it */ break; /* it matched all of it */
/* that one missed, try next one */ /* that one missed, try next one */
assert(OP(m->g->strip[esub]) == OOR1); assert(OP(m->g->strip[esub]) == OOR1);
@ -518,7 +562,7 @@ dissect(struct match *m,
assert(OP(m->g->strip[esub]) == OOR2); assert(OP(m->g->strip[esub]) == OOR2);
ssub = esub + 1; ssub = esub + 1;
esub += OPND(m->g->strip[esub]); esub += OPND(m->g->strip[esub]);
if (OP(m->g->strip[esub]) == OOR2) if (OP(m->g->strip[esub]) == (sop)OOR2)
esub--; esub--;
else else
assert(OP(m->g->strip[esub]) == O_CH); assert(OP(m->g->strip[esub]) == O_CH);
@ -554,6 +598,17 @@ dissect(struct match *m,
return(sp); return(sp);
} }
#define ISBOW(m, sp) \
(sp < m->endp && ISWORD(*sp) && \
((sp == m->beginp && !(m->eflags&REG_NOTBOL)) || \
(sp > m->offp && !ISWORD(*(sp-1)))))
#define ISEOW(m, sp) \
(((sp == m->endp && !(m->eflags&REG_NOTEOL)) || \
(sp < m->endp && *sp == '\n' && \
(m->g->cflags&REG_NEWLINE)) || \
(sp < m->endp && !ISWORD(*sp)) ) && \
(sp > m->beginp && ISWORD(*(sp-1)))) \
/* /*
- backref - figure out what matched what, figuring in back references - backref - figure out what matched what, figuring in back references
== static const char *backref(struct match *m, const char *start, \ == static const char *backref(struct match *m, const char *start, \
@ -611,10 +666,22 @@ backref(struct match *m,
if (wc == BADCHAR || !CHIN(cs, wc)) if (wc == BADCHAR || !CHIN(cs, wc))
return(NULL); return(NULL);
break; break;
case OBOS:
if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0)
{ /* yes */ }
else
return(NULL);
break;
case OEOS:
if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0)
{ /* yes */ }
else
return(NULL);
break;
case OBOL: case OBOL:
if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) || if ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
(sp < m->endp && *(sp-1) == '\n' && (sp > m->offp && sp < m->endp &&
(m->g->cflags&REG_NEWLINE)) ) *(sp-1) == '\n' && (m->g->cflags&REG_NEWLINE)))
{ /* yes */ } { /* yes */ }
else else
return(NULL); return(NULL);
@ -627,23 +694,29 @@ backref(struct match *m,
else else
return(NULL); return(NULL);
break; break;
case OWBND:
if (ISBOW(m, sp) || ISEOW(m, sp))
{ /* yes */ }
else
return(NULL);
break;
case ONWBND:
if (((sp == m->beginp) && !ISWORD(*sp)) ||
(sp == m->endp && !ISWORD(*(sp - 1))))
{ /* yes, beginning/end of subject */ }
else if (ISWORD(*(sp - 1)) == ISWORD(*sp))
{ /* yes, beginning/end of subject */ }
else
return(NULL);
break;
case OBOW: case OBOW:
if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) || if (ISBOW(m, sp))
(sp < m->endp && *(sp-1) == '\n' &&
(m->g->cflags&REG_NEWLINE)) ||
(sp > m->beginp &&
!ISWORD(*(sp-1))) ) &&
(sp < m->endp && ISWORD(*sp)) )
{ /* yes */ } { /* yes */ }
else else
return(NULL); return(NULL);
break; break;
case OEOW: case OEOW:
if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) || if (ISEOW(m, sp))
(sp < m->endp && *sp == '\n' &&
(m->g->cflags&REG_NEWLINE)) ||
(sp < m->endp && !ISWORD(*sp)) ) &&
(sp > m->beginp && ISWORD(*(sp-1))) )
{ /* yes */ } { /* yes */ }
else else
return(NULL); return(NULL);
@ -656,7 +729,7 @@ backref(struct match *m,
do { do {
assert(OP(s) == OOR2); assert(OP(s) == OOR2);
ss += OPND(s); ss += OPND(s);
} while (OP(s = m->g->strip[ss]) != O_CH); } while (OP(s = m->g->strip[ss]) != (sop)O_CH);
/* note that the ss++ gets us past the O_CH */ /* note that the ss++ gets us past the O_CH */
break; break;
default: /* have to make a choice */ default: /* have to make a choice */
@ -689,22 +762,19 @@ backref(struct match *m,
ssp = m->offp + m->pmatch[i].rm_so; ssp = m->offp + m->pmatch[i].rm_so;
if (memcmp(sp, ssp, len) != 0) if (memcmp(sp, ssp, len) != 0)
return(NULL); return(NULL);
while (m->g->strip[ss] != SOP(O_BACK, i)) while (m->g->strip[ss] != (sop)SOP(O_BACK, i))
ss++; ss++;
return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
break;
case OQUEST_: /* to null or not */ case OQUEST_: /* to null or not */
dp = backref(m, sp, stop, ss+1, stopst, lev, rec); dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
if (dp != NULL) if (dp != NULL)
return(dp); /* not */ return(dp); /* not */
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
break;
case OPLUS_: case OPLUS_:
assert(m->lastpos != NULL); assert(m->lastpos != NULL);
assert(lev+1 <= m->g->nplus); assert(lev+1 <= m->g->nplus);
m->lastpos[lev+1] = sp; m->lastpos[lev+1] = sp;
return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
break;
case O_PLUS: case O_PLUS:
if (sp == m->lastpos[lev]) /* last pass matched null */ if (sp == m->lastpos[lev]) /* last pass matched null */
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
@ -715,7 +785,6 @@ backref(struct match *m,
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
else else
return(dp); return(dp);
break;
case OCH_: /* find the right one, if any */ case OCH_: /* find the right one, if any */
ssub = ss + 1; ssub = ss + 1;
esub = ss + OPND(s) - 1; esub = ss + OPND(s) - 1;
@ -725,17 +794,18 @@ backref(struct match *m,
if (dp != NULL) if (dp != NULL)
return(dp); return(dp);
/* that one missed, try next one */ /* that one missed, try next one */
if (OP(m->g->strip[esub]) == O_CH) if (OP(m->g->strip[esub]) == (sop)O_CH)
return(NULL); /* there is none */ return(NULL); /* there is none */
esub++; esub++;
assert(OP(m->g->strip[esub]) == OOR2); assert(OP(m->g->strip[esub]) == (sop)OOR2);
ssub = esub + 1; ssub = esub + 1;
esub += OPND(m->g->strip[esub]); esub += OPND(m->g->strip[esub]);
if (OP(m->g->strip[esub]) == OOR2) if (OP(m->g->strip[esub]) == (sop)OOR2)
esub--; esub--;
else else
assert(OP(m->g->strip[esub]) == O_CH); assert(OP(m->g->strip[esub]) == O_CH);
} }
/* NOTREACHED */
break; break;
case OLPAREN: /* must undo assignment if rest fails */ case OLPAREN: /* must undo assignment if rest fails */
i = OPND(s); i = OPND(s);
@ -747,7 +817,6 @@ backref(struct match *m,
return(dp); return(dp);
m->pmatch[i].rm_so = offsave; m->pmatch[i].rm_so = offsave;
return(NULL); return(NULL);
break;
case ORPAREN: /* must undo assignment if rest fails */ case ORPAREN: /* must undo assignment if rest fails */
i = OPND(s); i = OPND(s);
assert(0 < i && i <= m->g->nsub); assert(0 < i && i <= m->g->nsub);
@ -758,7 +827,6 @@ backref(struct match *m,
return(dp); return(dp);
m->pmatch[i].rm_eo = offsave; m->pmatch[i].rm_eo = offsave;
return(NULL); return(NULL);
break;
default: /* uh oh */ default: /* uh oh */
assert(nope); assert(nope);
break; break;
@ -771,141 +839,36 @@ backref(struct match *m,
} }
/* /*
- fast - step through the string at top speed - walk - step through the string either quickly or slowly
== static const char *fast(struct match *m, const char *start, \ == static const char *walk(struct match *m, const char *start, \
== const char *stop, sopno startst, sopno stopst); == const char *stop, sopno startst, sopno stopst, bool fast);
*/ */
static const char * /* where tentative match ended, or NULL */ static const char * /* where it ended, or NULL */
fast( struct match *m, walk(struct match *m, const char *start, const char *stop, sopno startst,
const char *start, sopno stopst, bool fast)
const char *stop,
sopno startst,
sopno stopst)
{ {
states st = m->st; states st = m->st;
states fresh = m->fresh; states fresh = m->fresh;
states tmp = m->tmp;
const char *p = start;
wint_t c;
wint_t lastc; /* previous c */
wint_t flagch;
int i;
const char *coldp; /* last p after which no match was underway */
size_t clen;
CLEAR(st);
SET1(st, startst);
SP("fast", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st);
ASSIGN(fresh, st);
SP("start", st, *p);
coldp = NULL;
if (start == m->beginp)
c = OUT;
else {
/*
* XXX Wrong if the previous character was multi-byte.
* Newline never is (in encodings supported by FreeBSD),
* so this only breaks the ISWORD tests below.
*/
c = (uch)*(start - 1);
}
for (;;) {
/* next character */
lastc = c;
if (p == m->endp) {
clen = 0;
c = OUT;
} else
clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
if (EQ(st, fresh))
coldp = p;
/* is there an EOL and/or BOL between lastc and c? */
flagch = '\0';
i = 0;
if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
flagch = BOL;
i = m->g->nbol;
}
if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
flagch = (flagch == BOL) ? BOLEOL : EOL;
i += m->g->neol;
}
if (i != 0) {
for (; i > 0; i--)
st = step(m->g, startst, stopst, st, flagch, st);
SP("boleol", st, c);
}
/* how about a word boundary? */
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
(c != OUT && ISWORD(c)) ) {
flagch = BOW;
}
if ( (lastc != OUT && ISWORD(lastc)) &&
(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
flagch = EOW;
}
if (flagch == BOW || flagch == EOW) {
st = step(m->g, startst, stopst, st, flagch, st);
SP("boweow", st, c);
}
/* are we done? */
if (ISSET(st, stopst) || p == stop || clen > stop - p)
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
ASSIGN(tmp, st);
ASSIGN(st, fresh);
assert(c != OUT);
st = step(m->g, startst, stopst, tmp, c, st);
SP("aft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
p += clen;
}
assert(coldp != NULL);
m->coldp = coldp;
if (ISSET(st, stopst))
return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
else
return(NULL);
}
/*
- slow - step through the string more deliberately
== static const char *slow(struct match *m, const char *start, \
== const char *stop, sopno startst, sopno stopst);
*/
static const char * /* where it ended */
slow( struct match *m,
const char *start,
const char *stop,
sopno startst,
sopno stopst)
{
states st = m->st;
states empty = m->empty; states empty = m->empty;
states tmp = m->tmp; states tmp = m->tmp;
const char *p = start; const char *p = start;
wint_t c; wint_t c;
wint_t lastc; /* previous c */ wint_t lastc; /* previous c */
wint_t flagch; wint_t flagch;
int i; int i, sflags;
const char *matchp; /* last p at which a match ended */ const char *matchp; /* last p at which a match ended */
size_t clen; size_t clen;
sflags = 0;
AT("slow", start, stop, startst, stopst); AT("slow", start, stop, startst, stopst);
CLEAR(st); CLEAR(st);
SET1(st, startst); SET1(st, startst);
SP("sstart", st, *p); SP("sstart", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st); st = step(m->g, startst, stopst, st, NOTHING, st, sflags);
if (fast)
ASSIGN(fresh, st);
matchp = NULL; matchp = NULL;
if (start == m->beginp) if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
c = OUT; c = OUT;
else { else {
/* /*
@ -918,12 +881,16 @@ slow( struct match *m,
for (;;) { for (;;) {
/* next character */ /* next character */
lastc = c; lastc = c;
sflags = 0;
if (p == m->endp) { if (p == m->endp) {
c = OUT; c = OUT;
clen = 0; clen = 0;
} else } else
clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
if (fast && EQ(st, fresh))
matchp = p;
/* is there an EOL and/or BOL between lastc and c? */ /* is there an EOL and/or BOL between lastc and c? */
flagch = '\0'; flagch = '\0';
i = 0; i = 0;
@ -937,9 +904,20 @@ slow( struct match *m,
flagch = (flagch == BOL) ? BOLEOL : EOL; flagch = (flagch == BOL) ? BOLEOL : EOL;
i += m->g->neol; i += m->g->neol;
} }
if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) {
sflags |= SBOS;
/* Step one more for BOS. */
i++;
}
if (c == OUT && (m->eflags & REG_NOTEOL) == 0) {
sflags |= SEOS;
/* Step one more for EOS. */
i++;
}
if (i != 0) { if (i != 0) {
for (; i > 0; i--) for (; i > 0; i--)
st = step(m->g, startst, stopst, st, flagch, st); st = step(m->g, startst, stopst, st, flagch, st,
sflags);
SP("sboleol", st, c); SP("sboleol", st, c);
} }
@ -953,30 +931,56 @@ slow( struct match *m,
flagch = EOW; flagch = EOW;
} }
if (flagch == BOW || flagch == EOW) { if (flagch == BOW || flagch == EOW) {
st = step(m->g, startst, stopst, st, flagch, st); st = step(m->g, startst, stopst, st, flagch, st, sflags);
SP("sboweow", st, c); SP("sboweow", st, c);
} }
if (lastc != OUT && c != OUT &&
ISWORD(lastc) == ISWORD(c)) {
flagch = NWBND;
} else if ((lastc == OUT && !ISWORD(c)) ||
(c == OUT && !ISWORD(lastc))) {
flagch = NWBND;
}
if (flagch == NWBND) {
st = step(m->g, startst, stopst, st, flagch, st, sflags);
SP("snwbnd", st, c);
}
/* are we done? */ /* are we done? */
if (ISSET(st, stopst)) if (ISSET(st, stopst)) {
matchp = p; if (fast)
if (EQ(st, empty) || p == stop || clen > stop - p) break;
else
matchp = p;
}
if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
break; /* NOTE BREAK OUT */ break; /* NOTE BREAK OUT */
/* no, we must deal with this character */ /* no, we must deal with this character */
ASSIGN(tmp, st); ASSIGN(tmp, st);
ASSIGN(st, empty); if (fast)
ASSIGN(st, fresh);
else
ASSIGN(st, empty);
assert(c != OUT); assert(c != OUT);
st = step(m->g, startst, stopst, tmp, c, st); st = step(m->g, startst, stopst, tmp, c, st, sflags);
SP("saft", st, c); SP("saft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags),
st));
p += clen; p += clen;
} }
return(matchp); if (fast) {
assert(matchp != NULL);
m->coldp = matchp;
if (ISSET(st, stopst))
return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
else
return (NULL);
} else
return (matchp);
} }
/* /*
- step - map set of states reachable before char to set reachable after - step - map set of states reachable before char to set reachable after
== static states step(struct re_guts *g, sopno start, sopno stop, \ == static states step(struct re_guts *g, sopno start, sopno stop, \
@ -996,7 +1000,8 @@ step(struct re_guts *g,
sopno stop, /* state after stop state within strip */ sopno stop, /* state after stop state within strip */
states bef, /* states reachable before */ states bef, /* states reachable before */
wint_t ch, /* character or NONCHAR code */ wint_t ch, /* character or NONCHAR code */
states aft) /* states already known reachable after */ states aft, /* states already known reachable after */
int sflags) /* state flags */
{ {
cset *cs; cset *cs;
sop s; sop s;
@ -1017,6 +1022,14 @@ step(struct re_guts *g,
if (ch == OPND(s)) if (ch == OPND(s))
FWD(aft, bef, 1); FWD(aft, bef, 1);
break; break;
case OBOS:
if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0)
FWD(aft, bef, 1);
break;
case OEOS:
if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0)
FWD(aft, bef, 1);
break;
case OBOL: case OBOL:
if (ch == BOL || ch == BOLEOL) if (ch == BOL || ch == BOLEOL)
FWD(aft, bef, 1); FWD(aft, bef, 1);
@ -1033,6 +1046,14 @@ step(struct re_guts *g,
if (ch == EOW) if (ch == EOW)
FWD(aft, bef, 1); FWD(aft, bef, 1);
break; break;
case OWBND:
if (ch == BOW || ch == EOW)
FWD(aft, bef, 1);
break;
case ONWBND:
if (ch == NWBND)
FWD(aft, aft, 1);
break;
case OANY: case OANY:
if (!NONCHAR(ch)) if (!NONCHAR(ch))
FWD(aft, bef, 1); FWD(aft, bef, 1);
@ -1072,22 +1093,22 @@ step(struct re_guts *g,
break; break;
case OCH_: /* mark the first two branches */ case OCH_: /* mark the first two branches */
FWD(aft, aft, 1); FWD(aft, aft, 1);
assert(OP(g->strip[pc+OPND(s)]) == OOR2); assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
FWD(aft, aft, OPND(s)); FWD(aft, aft, OPND(s));
break; break;
case OOR1: /* done a branch, find the O_CH */ case OOR1: /* done a branch, find the O_CH */
if (ISSTATEIN(aft, here)) { if (ISSTATEIN(aft, here)) {
for (look = 1; for (look = 1;
OP(s = g->strip[pc+look]) != O_CH; OP(s = g->strip[pc+look]) != (sop)O_CH;
look += OPND(s)) look += OPND(s))
assert(OP(s) == OOR2); assert(OP(s) == (sop)OOR2);
FWD(aft, aft, look + 1); FWD(aft, aft, look + 1);
} }
break; break;
case OOR2: /* propagate OCH_'s marking */ case OOR2: /* propagate OCH_'s marking */
FWD(aft, aft, 1); FWD(aft, aft, 1);
if (OP(g->strip[pc+OPND(s)]) != O_CH) { if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) {
assert(OP(g->strip[pc+OPND(s)]) == OOR2); assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
FWD(aft, aft, OPND(s)); FWD(aft, aft, OPND(s));
} }
break; break;
@ -1119,7 +1140,7 @@ print(struct match *m,
FILE *d) FILE *d)
{ {
struct re_guts *g = m->g; struct re_guts *g = m->g;
int i; sopno i;
int first = 1; int first = 1;
if (!(m->eflags&REG_TRACE)) if (!(m->eflags&REG_TRACE))
@ -1130,7 +1151,7 @@ print(struct match *m,
fprintf(d, " %s", pchar(ch)); fprintf(d, " %s", pchar(ch));
for (i = 0; i < g->nstates; i++) for (i = 0; i < g->nstates; i++)
if (ISSET(st, i)) { if (ISSET(st, i)) {
fprintf(d, "%s%d", (first) ? "\t" : ", ", i); fprintf(d, "%s%lu", (first) ? "\t" : ", ", i);
first = 0; first = 0;
} }
fprintf(d, "\n"); fprintf(d, "\n");
@ -1186,9 +1207,9 @@ pchar(int ch)
#endif #endif
#endif #endif
#undef stepback
#undef matcher #undef matcher
#undef fast #undef walk
#undef slow
#undef dissect #undef dissect
#undef backref #undef backref
#undef step #undef step

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,6 @@
/*- /*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994 * Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved. * The Regents of the University of California. All rights reserved.
@ -14,7 +16,7 @@
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
@ -37,7 +39,7 @@
static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94"; static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94";
#endif /* LIBC_SCCS and not lint */ #endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/regex/regerror.c,v 1.11 2007/06/11 03:05:54 delphij Exp $"); __FBSDID("$FreeBSD$");
#include <sys/types.h> #include <sys/types.h>
#include <stdio.h> #include <stdio.h>
@ -54,7 +56,7 @@ extern "C" {
#endif #endif
/* === regerror.c === */ /* === regerror.c === */
static char *regatoi(const regex_t *preg, char *localbuf); static const char *regatoi(const regex_t *preg, char *localbuf);
#ifdef __cplusplus #ifdef __cplusplus
} }
@ -83,13 +85,8 @@ static char *regatoi(const regex_t *preg, char *localbuf);
*/ */
static struct rerr { static struct rerr {
int code; int code;
#ifdef __CYGWIN__ /* Avoid whining compiler */
const char *name; const char *name;
const char *explain; const char *explain;
#else
char *name;
char *explain;
#endif
} rerrs[] = { } rerrs[] = {
{REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"}, {REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"},
{REG_BADPAT, "REG_BADPAT", "invalid regular expression"}, {REG_BADPAT, "REG_BADPAT", "invalid regular expression"},
@ -125,11 +122,7 @@ regerror(int errcode,
struct rerr *r; struct rerr *r;
size_t len; size_t len;
int target = errcode &~ REG_ITOA; int target = errcode &~ REG_ITOA;
#ifdef __CYGWIN__ /* Avoid whining compiler */
const char *s; const char *s;
#else
char *s;
#endif
char convbuf[50]; char convbuf[50];
if (errcode == REG_ATOI) if (errcode == REG_ATOI)
@ -167,7 +160,7 @@ regerror(int errcode,
- regatoi - internal routine to implement REG_ATOI - regatoi - internal routine to implement REG_ATOI
== static char *regatoi(const regex_t *preg, char *localbuf); == static char *regatoi(const regex_t *preg, char *localbuf);
*/ */
static char * static const char *
regatoi(const regex_t *preg, char *localbuf) regatoi(const regex_t *preg, char *localbuf)
{ {
struct rerr *r; struct rerr *r;
@ -176,14 +169,7 @@ regatoi(const regex_t *preg, char *localbuf)
if (strcmp(r->name, preg->re_endp) == 0) if (strcmp(r->name, preg->re_endp) == 0)
break; break;
if (r->code == 0) if (r->code == 0)
#ifdef __CYGWIN__ /* Avoid whining compiler */
{
static char null[] = "0";
return null;
}
#else
return("0"); return("0");
#endif
sprintf(localbuf, "%d", r->code); sprintf(localbuf, "%d", r->code);
return(localbuf); return(localbuf);

View File

@ -13,7 +13,7 @@
.\" 2. Redistributions in binary form must reproduce the above copyright .\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the .\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution. .\" documentation and/or other materials provided with the distribution.
.\" 4. Neither the name of the University nor the names of its contributors .\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software .\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission. .\" without specific prior written permission.
.\" .\"
@ -30,9 +30,9 @@
.\" SUCH DAMAGE. .\" SUCH DAMAGE.
.\" .\"
.\" @(#)regex.3 8.4 (Berkeley) 3/20/94 .\" @(#)regex.3 8.4 (Berkeley) 3/20/94
.\" $FreeBSD: src/lib/libc/regex/regex.3,v 1.21 2007/01/09 00:28:04 imp Exp $ .\" $FreeBSD$
.\" .\"
.Dd August 17, 2005 .Dd April 15, 2017
.Dt REGEX 3 .Dt REGEX 3
.Os .Os
.Sh NAME .Sh NAME
@ -183,6 +183,17 @@ compatible with but not specified by
.St -p1003.2 , .St -p1003.2 ,
and should be used with and should be used with
caution in software intended to be portable to other systems. caution in software intended to be portable to other systems.
.It Dv REG_POSIX
Compile only
.St -p1003.2
compliant expressions.
This flag has no effect unless linking against
.Nm libregex .
This is an extension,
compatible with but not specified by
.St -p1003.2 ,
and should be used with
caution in software intended to be portable to other systems.
.El .El
.Pp .Pp
When successful, When successful,
@ -235,11 +246,16 @@ The
argument is the bitwise OR of zero or more of the following flags: argument is the bitwise OR of zero or more of the following flags:
.Bl -tag -width REG_STARTEND .Bl -tag -width REG_STARTEND
.It Dv REG_NOTBOL .It Dv REG_NOTBOL
The first character of The first character of the string is treated as the continuation
the string of a line.
is not the beginning of a line, so the This means that the anchors
.Ql ^\& .Ql ^\& ,
anchor should not match before it. .Ql [[:<:]] ,
and
.Ql \e<
do not match before it; but see
.Dv REG_STARTEND
below.
This does not affect the behavior of newlines under This does not affect the behavior of newlines under
.Dv REG_NEWLINE . .Dv REG_NEWLINE .
.It Dv REG_NOTEOL .It Dv REG_NOTEOL
@ -247,19 +263,16 @@ The NUL terminating
the string the string
does not end a line, so the does not end a line, so the
.Ql $\& .Ql $\&
anchor should not match before it. anchor does not match before it.
This does not affect the behavior of newlines under This does not affect the behavior of newlines under
.Dv REG_NEWLINE . .Dv REG_NEWLINE .
.It Dv REG_STARTEND .It Dv REG_STARTEND
The string is considered to start at The string is considered to start at
.Fa string .Fa string No +
+ .Fa pmatch Ns [0]. Ns Fa rm_so
.Fa pmatch Ns [0]. Ns Va rm_so and to end before the byte located at
and to have a terminating NUL located at .Fa string No +
.Fa string .Fa pmatch Ns [0]. Ns Fa rm_eo ,
+
.Fa pmatch Ns [0]. Ns Va rm_eo
(there need not actually be a NUL at that location),
regardless of the value of regardless of the value of
.Fa nmatch . .Fa nmatch .
See below for the definition of See below for the definition of
@ -271,13 +284,37 @@ compatible with but not specified by
.St -p1003.2 , .St -p1003.2 ,
and should be used with and should be used with
caution in software intended to be portable to other systems. caution in software intended to be portable to other systems.
Note that a non-zero .Pp
.Va rm_so Without
does not imply .Dv REG_NOTBOL ,
.Dv REG_NOTBOL ; the position
.Dv REG_STARTEND .Fa rm_so
affects only the location of the string, is considered the beginning of a line, such that
not how it is matched. .Ql ^
matches before it, and the beginning of a word if there is a word
character at this position, such that
.Ql [[:<:]]
and
.Ql \e<
match before it.
.Pp
With
.Dv REG_NOTBOL ,
the character at position
.Fa rm_so
is treated as the continuation of a line, and if
.Fa rm_so
is greater than 0, the preceding character is taken into consideration.
If the preceding character is a newline and the regular expression was compiled
with
.Dv REG_NEWLINE ,
.Ql ^
matches before the string; if the preceding character is not a word character
but the string starts with a word character,
.Ql [[:<:]]
and
.Ql \e<
match before the string.
.El .El
.Pp .Pp
See See
@ -420,10 +457,12 @@ it should have been the result from the most recent
using that using that
.Ft regex_t . .Ft regex_t .
The The
.Fn ( regerror .Po
.Fn regerror
may be able to supply a more detailed message using information may be able to supply a more detailed message using information
from the from the
.Ft regex_t . ) .Ft regex_t .
.Pc
The The
.Fn regerror .Fn regerror
function function

View File

@ -13,6 +13,10 @@
.\" 2. Redistributions in binary form must reproduce the above copyright .\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the .\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution. .\" documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\" must display the following acknowledgement:
.\" This product includes software developed by the University of
.\" California, Berkeley and its contributors.
.\" 4. Neither the name of the University nor the names of its contributors .\" 4. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software .\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission. .\" without specific prior written permission.
@ -30,9 +34,9 @@
.\" SUCH DAMAGE. .\" SUCH DAMAGE.
.\" .\"
.\" @(#)re_format.7 8.3 (Berkeley) 3/20/94 .\" @(#)re_format.7 8.3 (Berkeley) 3/20/94
.\" $FreeBSD: src/lib/libc/regex/re_format.7,v 1.12 2008/09/05 17:41:20 keramida Exp $ .\" $FreeBSD$
.\" .\"
.Dd March 20, 1994 .Dd June 30, 2014
.Dt RE_FORMAT 7 .Dt RE_FORMAT 7
.Os .Os
.Sh NAME .Sh NAME
@ -271,7 +275,6 @@ and
stands for the list of all characters belonging to that stands for the list of all characters belonging to that
class. class.
Standard character class names are: Standard character class names are:
.Pp
.Bl -column "alnum" "digit" "xdigit" -offset indent .Bl -column "alnum" "digit" "xdigit" -offset indent
.It Em "alnum digit punct" .It Em "alnum digit punct"
.It Em "alpha graph space" .It Em "alpha graph space"
@ -311,6 +314,13 @@ compatible with but not specified by
.St -p1003.2 , .St -p1003.2 ,
and should be used with and should be used with
caution in software intended to be portable to other systems. caution in software intended to be portable to other systems.
The additional word delimiters
.Ql \e<
and
.Ql \e>
are provided to ease compatibility with traditional
SVR4
systems but are not portable and should be avoided.
.Pp .Pp
In the event that an RE could match more than one substring of a given In the event that an RE could match more than one substring of a given
string, string,
@ -382,10 +392,12 @@ and
.Ql ?\& .Ql ?\&
are ordinary characters, and their functionality are ordinary characters, and their functionality
can be expressed using bounds can be expressed using bounds
.No ( Ql {1,} .Po
.Ql {1,}
or or
.Ql {0,1} .Ql {0,1}
respectively). respectively
.Pc .
Also note that Also note that
.Ql x+ .Ql x+
in modern REs is equivalent to in modern REs is equivalent to

View File

@ -1,4 +1,6 @@
/*- /*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994 * Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved. * The Regents of the University of California. All rights reserved.
@ -14,7 +16,7 @@
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
@ -31,21 +33,21 @@
* SUCH DAMAGE. * SUCH DAMAGE.
* *
* @(#)regex2.h 8.4 (Berkeley) 3/20/94 * @(#)regex2.h 8.4 (Berkeley) 3/20/94
* $FreeBSD: src/lib/libc/regex/regex2.h,v 1.11 2007/01/09 00:28:04 imp Exp $ * $FreeBSD$
*/ */
/* /*
* First, the stuff that ends up in the outside-world include file * First, the stuff that ends up in the outside-world include file
= typedef off_t regoff_t; = typedef off_t regoff_t;
= typedef struct { = typedef struct {
= int re_magic; = int re_magic;
= size_t re_nsub; // number of parenthesized subexpressions = size_t re_nsub; // number of parenthesized subexpressions
= const char *re_endp; // end pointer for REG_PEND = const char *re_endp; // end pointer for REG_PEND
= struct re_guts *re_g; // none of your business :-) = struct re_guts *re_g; // none of your business :-)
= } regex_t; = } regex_t;
= typedef struct { = typedef struct {
= regoff_t rm_so; // start of match = regoff_t rm_so; // start of match
= regoff_t rm_eo; // end of match = regoff_t rm_eo; // end of match
= } regmatch_t; = } regmatch_t;
*/ */
/* /*
@ -73,7 +75,7 @@
* immediately *preceding* "execution" of that operator. * immediately *preceding* "execution" of that operator.
*/ */
typedef unsigned long sop; /* strip operator */ typedef unsigned long sop; /* strip operator */
typedef long sopno; typedef unsigned long sopno;
#define OPRMASK 0xf8000000L #define OPRMASK 0xf8000000L
#define OPDMASK 0x07ffffffL #define OPDMASK 0x07ffffffL
#define OPSHIFT ((unsigned)27) #define OPSHIFT ((unsigned)27)
@ -102,6 +104,10 @@ typedef long sopno;
#define O_CH (18L<<OPSHIFT) /* end choice back to OOR1 */ #define O_CH (18L<<OPSHIFT) /* end choice back to OOR1 */
#define OBOW (19L<<OPSHIFT) /* begin word - */ #define OBOW (19L<<OPSHIFT) /* begin word - */
#define OEOW (20L<<OPSHIFT) /* end word - */ #define OEOW (20L<<OPSHIFT) /* end word - */
#define OBOS (21L<<OPSHIFT) /* begin subj. - */
#define OEOS (22L<<OPSHIFT) /* end subj. - */
#define OWBND (23L<<OPSHIFT) /* word bound - */
#define ONWBND (24L<<OPSHIFT) /* not bound - */
/* /*
* Structures for [] character-set representation. * Structures for [] character-set representation.
@ -111,13 +117,13 @@ typedef struct {
wint_t max; wint_t max;
} crange; } crange;
typedef struct { typedef struct {
unsigned char bmp[NC / 8]; unsigned char bmp[NC_MAX / 8];
wctype_t *types; wctype_t *types;
int ntypes; unsigned int ntypes;
wint_t *wides; wint_t *wides;
int nwides; unsigned int nwides;
crange *ranges; crange *ranges;
int nranges; unsigned int nranges;
int invert; int invert;
int icase; int icase;
} cset; } cset;
@ -125,15 +131,20 @@ typedef struct {
static int static int
CHIN1(cset *cs, wint_t ch) CHIN1(cset *cs, wint_t ch)
{ {
int i; unsigned int i;
assert(ch >= 0); assert(ch >= 0);
if (ch < NC) if (ch < NC)
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^ return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
cs->invert); cs->invert);
for (i = 0; i < cs->nwides; i++) for (i = 0; i < cs->nwides; i++) {
if (ch == cs->wides[i]) if (cs->icase) {
if (ch == towlower(cs->wides[i]) ||
ch == towupper(cs->wides[i]))
return (!cs->invert);
} else if (ch == cs->wides[i])
return (!cs->invert); return (!cs->invert);
}
for (i = 0; i < cs->nranges; i++) for (i = 0; i < cs->nranges; i++)
if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max) if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max)
return (!cs->invert); return (!cs->invert);
@ -151,14 +162,10 @@ CHIN(cset *cs, wint_t ch)
if (ch < NC) if (ch < NC)
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^ return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
cs->invert); cs->invert);
else if (cs->icase) { else if (cs->icase)
if (cs->invert) return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
return (CHIN1(cs, ch) && CHIN1(cs, towlower(ch)) && CHIN1(cs, towupper(ch)));
CHIN1(cs, towupper(ch))); else
else
return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
CHIN1(cs, towupper(ch)));
} else
return (CHIN1(cs, ch)); return (CHIN1(cs, ch));
} }
@ -169,7 +176,7 @@ struct re_guts {
int magic; int magic;
# define MAGIC2 ((('R'^0200)<<8)|'E') # define MAGIC2 ((('R'^0200)<<8)|'E')
sop *strip; /* malloced area for strip */ sop *strip; /* malloced area for strip */
int ncsets; /* number of csets in use */ unsigned int ncsets; /* number of csets in use */
cset *sets; /* -> cset [ncsets] */ cset *sets; /* -> cset [ncsets] */
int cflags; /* copy of regcomp() cflags argument */ int cflags; /* copy of regcomp() cflags argument */
sopno nstates; /* = number of sops */ sopno nstates; /* = number of sops */
@ -193,4 +200,5 @@ struct re_guts {
/* misc utilities */ /* misc utilities */
#define OUT (CHAR_MIN - 1) /* a non-character value */ #define OUT (CHAR_MIN - 1) /* a non-character value */
#define ISWORD(c) (iswalnum((wint_t)(c)) || (c) == '_') #define IGN (CHAR_MIN - 2)
#define ISWORD(c) (iswalnum((uch)(c)) || (c) == '_')

View File

@ -1,4 +1,6 @@
/*- /*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994 * Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved. * The Regents of the University of California. All rights reserved.
@ -14,7 +16,7 @@
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
@ -37,7 +39,7 @@
static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94"; static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94";
#endif /* LIBC_SCCS and not lint */ #endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/regex/regexec.c,v 1.8 2007/06/11 03:05:54 delphij Exp $"); __FBSDID("$FreeBSD$");
/* /*
* the outer shell of regexec() * the outer shell of regexec()
@ -46,9 +48,6 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/regexec.c,v 1.8 2007/06/11 03:05:54 delph
* macros that code uses. This lets the same code operate on two different * macros that code uses. This lets the same code operate on two different
* representations for state sets and characters. * representations for state sets and characters.
*/ */
#ifdef __CYGWIN__
#include "winsup.h"
#endif
#include <sys/types.h> #include <sys/types.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -68,9 +67,9 @@ static __inline size_t
xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy) xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
{ {
size_t nr; size_t nr;
wint_t wc; wchar_t wc;
nr = mbrtowi(&wc, s, n, mbs); nr = mbrtowc(&wc, s, n, mbs);
if (wi != NULL) if (wi != NULL)
*wi = wc; *wi = wc;
if (nr == 0) if (nr == 0)
@ -98,8 +97,8 @@ xmbrtowc_dummy(wint_t *wi,
} }
/* macros for manipulating states, small version */ /* macros for manipulating states, small version */
#define states long #define states1 long /* for later use in regexec() decision */
#define states1 states /* for later use in regexec() decision */ #define states states1
#define CLEAR(v) ((v) = 0) #define CLEAR(v) ((v) = 0)
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) #define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
#define SET1(v, n) ((v) |= (unsigned long)1 << (n)) #define SET1(v, n) ((v) |= (unsigned long)1 << (n))
@ -228,9 +227,9 @@ regexec(const regex_t * __restrict preg,
eflags = GOODFLAGS(eflags); eflags = GOODFLAGS(eflags);
if (MB_CUR_MAX > 1) if (MB_CUR_MAX > 1)
return(mmatcher(g, (char *)string, nmatch, pmatch, eflags)); return(mmatcher(g, string, nmatch, pmatch, eflags));
else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE)) else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); return(smatcher(g, string, nmatch, pmatch, eflags));
else else
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); return(lmatcher(g, string, nmatch, pmatch, eflags));
} }

View File

@ -1,4 +1,6 @@
/*- /*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994 * Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved. * The Regents of the University of California. All rights reserved.
@ -14,7 +16,7 @@
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
@ -37,7 +39,7 @@
static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94"; static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94";
#endif /* LIBC_SCCS and not lint */ #endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/regex/regfree.c,v 1.8 2007/06/11 03:05:54 delphij Exp $"); __FBSDID("$FreeBSD$");
#include <sys/types.h> #include <sys/types.h>
#include <stdio.h> #include <stdio.h>
@ -58,7 +60,7 @@ void
regfree(regex_t *preg) regfree(regex_t *preg)
{ {
struct re_guts *g; struct re_guts *g;
int i; unsigned int i;
if (preg->re_magic != MAGIC1) /* oops */ if (preg->re_magic != MAGIC1) /* oops */
return; /* nice to complain, but hard */ return; /* nice to complain, but hard */

View File

@ -1,4 +1,6 @@
/*- /*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994 * Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved. * The Regents of the University of California. All rights reserved.
@ -14,7 +16,7 @@
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution. * documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software * may be used to endorse or promote products derived from this software
* without specific prior written permission. * without specific prior written permission.
* *
@ -31,13 +33,15 @@
* SUCH DAMAGE. * SUCH DAMAGE.
* *
* @(#)utils.h 8.3 (Berkeley) 3/20/94 * @(#)utils.h 8.3 (Berkeley) 3/20/94
* $FreeBSD: src/lib/libc/regex/utils.h,v 1.3 2007/01/09 00:28:04 imp Exp $ * $FreeBSD$
*/ */
/* utility definitions */ /* utility definitions */
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ #define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
#define INFINITY (DUPMAX + 1) #define INFINITY (DUPMAX + 1)
#define NC (CHAR_MAX - CHAR_MIN + 1)
#define NC_MAX (CHAR_MAX - CHAR_MIN + 1)
#define NC ((MB_CUR_MAX) == 1 ? (NC_MAX) : (128))
typedef unsigned char uch; typedef unsigned char uch;
/* switch off assertions (if not already off) if no REDEBUG */ /* switch off assertions (if not already off) if no REDEBUG */