Cygwin: replace regex with latest verbatim FreeBSD version
Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
parent
2a4dd6a239
commit
24f34edc2a
|
@ -1,4 +1,6 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 1992 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
|
@ -14,7 +16,7 @@
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
|
@ -38,32 +40,20 @@
|
|||
#define _REGEX_H_
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
#include <_ansi.h>
|
||||
#include <sys/_types.h>
|
||||
|
||||
/* types */
|
||||
#ifdef __CYGWIN__
|
||||
typedef _off_t regoff_t;
|
||||
|
||||
#define __need_size_t
|
||||
#include <stddef.h>
|
||||
#else /* !__CYGWIN__ */
|
||||
typedef __off_t regoff_t;
|
||||
|
||||
#ifndef _SIZE_T_DECLARED
|
||||
typedef __size_t size_t;
|
||||
#define _SIZE_T_DECLARED
|
||||
#endif
|
||||
#endif /* !__CYGWIN__ */
|
||||
|
||||
typedef struct {
|
||||
int re_magic;
|
||||
size_t re_nsub; /* number of parenthesized subexpressions */
|
||||
#ifdef __CYGWIN__
|
||||
const char *re_endp; /* end pointer for REG_PEND */
|
||||
#else
|
||||
__const char *re_endp; /* end pointer for REG_PEND */
|
||||
#endif
|
||||
struct re_guts *re_g; /* none of your business :-) */
|
||||
} regex_t;
|
||||
|
||||
|
@ -81,12 +71,10 @@ typedef struct {
|
|||
#define REG_NOSPEC 0020
|
||||
#define REG_PEND 0040
|
||||
#define REG_DUMP 0200
|
||||
#define REG_POSIX 0400 /* only POSIX-compliant regex (libregex) */
|
||||
|
||||
/* regerror() flags */
|
||||
#define REG_ENOSYS (-1)
|
||||
#ifdef __CYGWIN__
|
||||
#define REG_NOERROR 0 /* GNU extension */
|
||||
#endif
|
||||
#define REG_NOMATCH 1
|
||||
#define REG_BADPAT 2
|
||||
#define REG_ECOLLATE 3
|
||||
|
|
|
@ -32,6 +32,10 @@ to the following restrictions:
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
|
@ -14,7 +16,7 @@
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
|
@ -31,7 +33,7 @@
|
|||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)cname.h 8.3 (Berkeley) 3/20/94
|
||||
* $FreeBSD: src/lib/libc/regex/cname.h,v 1.4 2007/01/09 00:28:04 imp Exp $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/* character-name table */
|
||||
|
@ -108,7 +110,7 @@ static struct cname {
|
|||
{"four", '4'},
|
||||
{"five", '5'},
|
||||
{"six", '6'},
|
||||
{"seven", '7'},
|
||||
{"seven", '7'},
|
||||
{"eight", '8'},
|
||||
{"nine", '9'},
|
||||
{"colon", ':'},
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
|
@ -14,7 +16,7 @@
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
|
@ -34,7 +36,9 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds Exp $");
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
/*
|
||||
* The matching engine and friends. This file is #included by regexec.c
|
||||
|
@ -44,9 +48,9 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds E
|
|||
*/
|
||||
|
||||
#ifdef SNAMES
|
||||
#define stepback sstepback
|
||||
#define matcher smatcher
|
||||
#define fast sfast
|
||||
#define slow sslow
|
||||
#define walk swalk
|
||||
#define dissect sdissect
|
||||
#define backref sbackref
|
||||
#define step sstep
|
||||
|
@ -55,9 +59,9 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds E
|
|||
#define match smat
|
||||
#endif
|
||||
#ifdef LNAMES
|
||||
#define stepback lstepback
|
||||
#define matcher lmatcher
|
||||
#define fast lfast
|
||||
#define slow lslow
|
||||
#define walk lwalk
|
||||
#define dissect ldissect
|
||||
#define backref lbackref
|
||||
#define step lstep
|
||||
|
@ -66,9 +70,9 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds E
|
|||
#define match lmat
|
||||
#endif
|
||||
#ifdef MNAMES
|
||||
#define stepback mstepback
|
||||
#define matcher mmatcher
|
||||
#define fast mfast
|
||||
#define slow mslow
|
||||
#define walk mwalk
|
||||
#define dissect mdissect
|
||||
#define backref mbackref
|
||||
#define step mstep
|
||||
|
@ -104,9 +108,8 @@ extern "C" {
|
|||
static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
|
||||
static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||
static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
|
||||
static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast);
|
||||
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags);
|
||||
#define MAX_RECURSION 100
|
||||
#define BOL (OUT-1)
|
||||
#define EOL (BOL-1)
|
||||
|
@ -115,10 +118,12 @@ static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_
|
|||
#define BOW (BOL-4)
|
||||
#define EOW (BOL-5)
|
||||
#define BADCHAR (BOL-6)
|
||||
/* When using wint_t, which is defined as unsigned int on BSD,
|
||||
as well as on Cygwin or Linux, the NONCHAR test is broken without
|
||||
the below cast. I'm wondering how this is supposed to work at all... */
|
||||
#define NONCHAR(c) ((int)(c) <= OUT)
|
||||
#define NWBND (BOL-7)
|
||||
#define NONCHAR(c) ((c) <= OUT)
|
||||
/* sflags */
|
||||
#define SBOS 0x0001
|
||||
#define SEOS 0x0002
|
||||
|
||||
#ifdef REDEBUG
|
||||
static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
|
||||
#endif
|
||||
|
@ -144,6 +149,39 @@ static const char *pchar(int ch);
|
|||
#define NOTE(s) /* nothing */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Given a multibyte string pointed to by start, step back nchar characters
|
||||
* from current position pointed to by cur.
|
||||
*/
|
||||
static const char *
|
||||
stepback(const char *start, const char *cur, int nchar)
|
||||
{
|
||||
const char *ret;
|
||||
int wc, mbc;
|
||||
mbstate_t mbs;
|
||||
size_t clen;
|
||||
|
||||
if (MB_CUR_MAX == 1)
|
||||
return ((cur - nchar) > start ? cur - nchar : NULL);
|
||||
|
||||
ret = cur;
|
||||
for (wc = nchar; wc > 0; wc--) {
|
||||
for (mbc = 1; mbc <= MB_CUR_MAX; mbc++) {
|
||||
if ((ret - mbc) < start)
|
||||
return (NULL);
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
clen = mbrtowc(NULL, ret - mbc, mbc, &mbs);
|
||||
if (clen != (size_t)-1 && clen != (size_t)-2)
|
||||
break;
|
||||
}
|
||||
if (mbc > MB_CUR_MAX)
|
||||
return (NULL);
|
||||
ret -= mbc;
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
- matcher - the actual matching engine
|
||||
== static int matcher(struct re_guts *g, const char *string, \
|
||||
|
@ -157,7 +195,7 @@ matcher(struct re_guts *g,
|
|||
int eflags)
|
||||
{
|
||||
const char *endp;
|
||||
int i;
|
||||
size_t i;
|
||||
struct match mv;
|
||||
struct match *m = &mv;
|
||||
const char *dp = NULL;
|
||||
|
@ -247,17 +285,19 @@ matcher(struct re_guts *g,
|
|||
ZAPSTATE(&m->mbs);
|
||||
|
||||
/* Adjust start according to moffset, to speed things up */
|
||||
#ifndef MNAMES
|
||||
/* The code evaluating moffset doesn't seem to work right
|
||||
in the multibyte case. */
|
||||
if (g->moffset > -1)
|
||||
start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
|
||||
#endif
|
||||
if (dp != NULL && g->moffset > -1) {
|
||||
const char *nstart;
|
||||
|
||||
nstart = stepback(start, dp, g->moffset);
|
||||
if (nstart != NULL)
|
||||
start = nstart;
|
||||
}
|
||||
|
||||
SP("mloop", m->st, *start);
|
||||
|
||||
/* this loop does only one repetition except for backrefs */
|
||||
for (;;) {
|
||||
endp = fast(m, start, stop, gf, gl);
|
||||
endp = walk(m, start, stop, gf, gl, true);
|
||||
if (endp == NULL) { /* a miss */
|
||||
if (m->pmatch != NULL)
|
||||
free((char *)m->pmatch);
|
||||
|
@ -273,7 +313,7 @@ matcher(struct re_guts *g,
|
|||
assert(m->coldp != NULL);
|
||||
for (;;) {
|
||||
NOTE("finding start");
|
||||
endp = slow(m, m->coldp, stop, gf, gl);
|
||||
endp = walk(m, m->coldp, stop, gf, gl, false);
|
||||
if (endp != NULL)
|
||||
break;
|
||||
assert(m->coldp < m->endp);
|
||||
|
@ -318,7 +358,7 @@ matcher(struct re_guts *g,
|
|||
if (dp != NULL || endp <= m->coldp)
|
||||
break; /* defeat */
|
||||
NOTE("backoff");
|
||||
endp = slow(m, m->coldp, endp-1, gf, gl);
|
||||
endp = walk(m, m->coldp, endp-1, gf, gl, false);
|
||||
if (endp == NULL)
|
||||
break; /* defeat */
|
||||
/* try it on a shorter possibility */
|
||||
|
@ -391,7 +431,7 @@ dissect(struct match *m,
|
|||
const char *ssp; /* start of string matched by subsubRE */
|
||||
const char *sep; /* end of string matched by subsubRE */
|
||||
const char *oldssp; /* previous ssp */
|
||||
const char *dp __attribute__ ((unused));
|
||||
const char *dp __unused;
|
||||
|
||||
AT("diss", start, stop, startst, stopst);
|
||||
sp = start;
|
||||
|
@ -404,7 +444,7 @@ dissect(struct match *m,
|
|||
es += OPND(m->g->strip[es]);
|
||||
break;
|
||||
case OCH_:
|
||||
while (OP(m->g->strip[es]) != O_CH)
|
||||
while (OP(m->g->strip[es]) != (sop)O_CH)
|
||||
es += OPND(m->g->strip[es]);
|
||||
break;
|
||||
}
|
||||
|
@ -422,6 +462,10 @@ dissect(struct match *m,
|
|||
case OEOL:
|
||||
case OBOW:
|
||||
case OEOW:
|
||||
case OBOS:
|
||||
case OEOS:
|
||||
case OWBND:
|
||||
case ONWBND:
|
||||
break;
|
||||
case OANY:
|
||||
case OANYOF:
|
||||
|
@ -436,10 +480,10 @@ dissect(struct match *m,
|
|||
stp = stop;
|
||||
for (;;) {
|
||||
/* how long could this one be? */
|
||||
rest = slow(m, sp, stp, ss, es);
|
||||
rest = walk(m, sp, stp, ss, es, false);
|
||||
assert(rest != NULL); /* it did match */
|
||||
/* could the rest match the rest? */
|
||||
tail = slow(m, rest, stop, es, stopst);
|
||||
tail = walk(m, rest, stop, es, stopst, false);
|
||||
if (tail == stop)
|
||||
break; /* yes! */
|
||||
/* no -- try a shorter match for this one */
|
||||
|
@ -449,7 +493,7 @@ dissect(struct match *m,
|
|||
ssub = ss + 1;
|
||||
esub = es - 1;
|
||||
/* did innards match? */
|
||||
if (slow(m, sp, rest, ssub, esub) != NULL) {
|
||||
if (walk(m, sp, rest, ssub, esub, false) != NULL) {
|
||||
dp = dissect(m, sp, rest, ssub, esub);
|
||||
assert(dp == rest);
|
||||
} else /* no */
|
||||
|
@ -460,10 +504,10 @@ dissect(struct match *m,
|
|||
stp = stop;
|
||||
for (;;) {
|
||||
/* how long could this one be? */
|
||||
rest = slow(m, sp, stp, ss, es);
|
||||
rest = walk(m, sp, stp, ss, es, false);
|
||||
assert(rest != NULL); /* it did match */
|
||||
/* could the rest match the rest? */
|
||||
tail = slow(m, rest, stop, es, stopst);
|
||||
tail = walk(m, rest, stop, es, stopst, false);
|
||||
if (tail == stop)
|
||||
break; /* yes! */
|
||||
/* no -- try a shorter match for this one */
|
||||
|
@ -475,7 +519,7 @@ dissect(struct match *m,
|
|||
ssp = sp;
|
||||
oldssp = ssp;
|
||||
for (;;) { /* find last match of innards */
|
||||
sep = slow(m, ssp, rest, ssub, esub);
|
||||
sep = walk(m, ssp, rest, ssub, esub, false);
|
||||
if (sep == NULL || sep == ssp)
|
||||
break; /* failed or matched null */
|
||||
oldssp = ssp; /* on to next try */
|
||||
|
@ -487,7 +531,7 @@ dissect(struct match *m,
|
|||
ssp = oldssp;
|
||||
}
|
||||
assert(sep == rest); /* must exhaust substring */
|
||||
assert(slow(m, ssp, sep, ssub, esub) == rest);
|
||||
assert(walk(m, ssp, sep, ssub, esub, false) == rest);
|
||||
dp = dissect(m, ssp, sep, ssub, esub);
|
||||
assert(dp == sep);
|
||||
sp = rest;
|
||||
|
@ -496,10 +540,10 @@ dissect(struct match *m,
|
|||
stp = stop;
|
||||
for (;;) {
|
||||
/* how long could this one be? */
|
||||
rest = slow(m, sp, stp, ss, es);
|
||||
rest = walk(m, sp, stp, ss, es, false);
|
||||
assert(rest != NULL); /* it did match */
|
||||
/* could the rest match the rest? */
|
||||
tail = slow(m, rest, stop, es, stopst);
|
||||
tail = walk(m, rest, stop, es, stopst, false);
|
||||
if (tail == stop)
|
||||
break; /* yes! */
|
||||
/* no -- try a shorter match for this one */
|
||||
|
@ -510,7 +554,7 @@ dissect(struct match *m,
|
|||
esub = ss + OPND(m->g->strip[ss]) - 1;
|
||||
assert(OP(m->g->strip[esub]) == OOR1);
|
||||
for (;;) { /* find first matching branch */
|
||||
if (slow(m, sp, rest, ssub, esub) == rest)
|
||||
if (walk(m, sp, rest, ssub, esub, false) == rest)
|
||||
break; /* it matched all of it */
|
||||
/* that one missed, try next one */
|
||||
assert(OP(m->g->strip[esub]) == OOR1);
|
||||
|
@ -518,7 +562,7 @@ dissect(struct match *m,
|
|||
assert(OP(m->g->strip[esub]) == OOR2);
|
||||
ssub = esub + 1;
|
||||
esub += OPND(m->g->strip[esub]);
|
||||
if (OP(m->g->strip[esub]) == OOR2)
|
||||
if (OP(m->g->strip[esub]) == (sop)OOR2)
|
||||
esub--;
|
||||
else
|
||||
assert(OP(m->g->strip[esub]) == O_CH);
|
||||
|
@ -554,6 +598,17 @@ dissect(struct match *m,
|
|||
return(sp);
|
||||
}
|
||||
|
||||
#define ISBOW(m, sp) \
|
||||
(sp < m->endp && ISWORD(*sp) && \
|
||||
((sp == m->beginp && !(m->eflags®_NOTBOL)) || \
|
||||
(sp > m->offp && !ISWORD(*(sp-1)))))
|
||||
#define ISEOW(m, sp) \
|
||||
(((sp == m->endp && !(m->eflags®_NOTEOL)) || \
|
||||
(sp < m->endp && *sp == '\n' && \
|
||||
(m->g->cflags®_NEWLINE)) || \
|
||||
(sp < m->endp && !ISWORD(*sp)) ) && \
|
||||
(sp > m->beginp && ISWORD(*(sp-1)))) \
|
||||
|
||||
/*
|
||||
- backref - figure out what matched what, figuring in back references
|
||||
== static const char *backref(struct match *m, const char *start, \
|
||||
|
@ -611,10 +666,22 @@ backref(struct match *m,
|
|||
if (wc == BADCHAR || !CHIN(cs, wc))
|
||||
return(NULL);
|
||||
break;
|
||||
case OBOS:
|
||||
if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0)
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
break;
|
||||
case OEOS:
|
||||
if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0)
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
break;
|
||||
case OBOL:
|
||||
if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
|
||||
(sp < m->endp && *(sp-1) == '\n' &&
|
||||
(m->g->cflags®_NEWLINE)) )
|
||||
if ((sp == m->beginp && !(m->eflags®_NOTBOL)) ||
|
||||
(sp > m->offp && sp < m->endp &&
|
||||
*(sp-1) == '\n' && (m->g->cflags®_NEWLINE)))
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
|
@ -627,23 +694,29 @@ backref(struct match *m,
|
|||
else
|
||||
return(NULL);
|
||||
break;
|
||||
case OWBND:
|
||||
if (ISBOW(m, sp) || ISEOW(m, sp))
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
break;
|
||||
case ONWBND:
|
||||
if (((sp == m->beginp) && !ISWORD(*sp)) ||
|
||||
(sp == m->endp && !ISWORD(*(sp - 1))))
|
||||
{ /* yes, beginning/end of subject */ }
|
||||
else if (ISWORD(*(sp - 1)) == ISWORD(*sp))
|
||||
{ /* yes, beginning/end of subject */ }
|
||||
else
|
||||
return(NULL);
|
||||
break;
|
||||
case OBOW:
|
||||
if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
|
||||
(sp < m->endp && *(sp-1) == '\n' &&
|
||||
(m->g->cflags®_NEWLINE)) ||
|
||||
(sp > m->beginp &&
|
||||
!ISWORD(*(sp-1))) ) &&
|
||||
(sp < m->endp && ISWORD(*sp)) )
|
||||
if (ISBOW(m, sp))
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
break;
|
||||
case OEOW:
|
||||
if (( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
|
||||
(sp < m->endp && *sp == '\n' &&
|
||||
(m->g->cflags®_NEWLINE)) ||
|
||||
(sp < m->endp && !ISWORD(*sp)) ) &&
|
||||
(sp > m->beginp && ISWORD(*(sp-1))) )
|
||||
if (ISEOW(m, sp))
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
|
@ -656,7 +729,7 @@ backref(struct match *m,
|
|||
do {
|
||||
assert(OP(s) == OOR2);
|
||||
ss += OPND(s);
|
||||
} while (OP(s = m->g->strip[ss]) != O_CH);
|
||||
} while (OP(s = m->g->strip[ss]) != (sop)O_CH);
|
||||
/* note that the ss++ gets us past the O_CH */
|
||||
break;
|
||||
default: /* have to make a choice */
|
||||
|
@ -689,22 +762,19 @@ backref(struct match *m,
|
|||
ssp = m->offp + m->pmatch[i].rm_so;
|
||||
if (memcmp(sp, ssp, len) != 0)
|
||||
return(NULL);
|
||||
while (m->g->strip[ss] != SOP(O_BACK, i))
|
||||
while (m->g->strip[ss] != (sop)SOP(O_BACK, i))
|
||||
ss++;
|
||||
return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
|
||||
break;
|
||||
case OQUEST_: /* to null or not */
|
||||
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
|
||||
if (dp != NULL)
|
||||
return(dp); /* not */
|
||||
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
|
||||
break;
|
||||
case OPLUS_:
|
||||
assert(m->lastpos != NULL);
|
||||
assert(lev+1 <= m->g->nplus);
|
||||
m->lastpos[lev+1] = sp;
|
||||
return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
|
||||
break;
|
||||
case O_PLUS:
|
||||
if (sp == m->lastpos[lev]) /* last pass matched null */
|
||||
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
|
||||
|
@ -715,7 +785,6 @@ backref(struct match *m,
|
|||
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
|
||||
else
|
||||
return(dp);
|
||||
break;
|
||||
case OCH_: /* find the right one, if any */
|
||||
ssub = ss + 1;
|
||||
esub = ss + OPND(s) - 1;
|
||||
|
@ -725,17 +794,18 @@ backref(struct match *m,
|
|||
if (dp != NULL)
|
||||
return(dp);
|
||||
/* that one missed, try next one */
|
||||
if (OP(m->g->strip[esub]) == O_CH)
|
||||
if (OP(m->g->strip[esub]) == (sop)O_CH)
|
||||
return(NULL); /* there is none */
|
||||
esub++;
|
||||
assert(OP(m->g->strip[esub]) == OOR2);
|
||||
assert(OP(m->g->strip[esub]) == (sop)OOR2);
|
||||
ssub = esub + 1;
|
||||
esub += OPND(m->g->strip[esub]);
|
||||
if (OP(m->g->strip[esub]) == OOR2)
|
||||
if (OP(m->g->strip[esub]) == (sop)OOR2)
|
||||
esub--;
|
||||
else
|
||||
assert(OP(m->g->strip[esub]) == O_CH);
|
||||
}
|
||||
/* NOTREACHED */
|
||||
break;
|
||||
case OLPAREN: /* must undo assignment if rest fails */
|
||||
i = OPND(s);
|
||||
|
@ -747,7 +817,6 @@ backref(struct match *m,
|
|||
return(dp);
|
||||
m->pmatch[i].rm_so = offsave;
|
||||
return(NULL);
|
||||
break;
|
||||
case ORPAREN: /* must undo assignment if rest fails */
|
||||
i = OPND(s);
|
||||
assert(0 < i && i <= m->g->nsub);
|
||||
|
@ -758,7 +827,6 @@ backref(struct match *m,
|
|||
return(dp);
|
||||
m->pmatch[i].rm_eo = offsave;
|
||||
return(NULL);
|
||||
break;
|
||||
default: /* uh oh */
|
||||
assert(nope);
|
||||
break;
|
||||
|
@ -771,141 +839,36 @@ backref(struct match *m,
|
|||
}
|
||||
|
||||
/*
|
||||
- fast - step through the string at top speed
|
||||
== static const char *fast(struct match *m, const char *start, \
|
||||
== const char *stop, sopno startst, sopno stopst);
|
||||
- walk - step through the string either quickly or slowly
|
||||
== static const char *walk(struct match *m, const char *start, \
|
||||
== const char *stop, sopno startst, sopno stopst, bool fast);
|
||||
*/
|
||||
static const char * /* where tentative match ended, or NULL */
|
||||
fast( struct match *m,
|
||||
const char *start,
|
||||
const char *stop,
|
||||
sopno startst,
|
||||
sopno stopst)
|
||||
static const char * /* where it ended, or NULL */
|
||||
walk(struct match *m, const char *start, const char *stop, sopno startst,
|
||||
sopno stopst, bool fast)
|
||||
{
|
||||
states st = m->st;
|
||||
states fresh = m->fresh;
|
||||
states tmp = m->tmp;
|
||||
const char *p = start;
|
||||
wint_t c;
|
||||
wint_t lastc; /* previous c */
|
||||
wint_t flagch;
|
||||
int i;
|
||||
const char *coldp; /* last p after which no match was underway */
|
||||
size_t clen;
|
||||
|
||||
CLEAR(st);
|
||||
SET1(st, startst);
|
||||
SP("fast", st, *p);
|
||||
st = step(m->g, startst, stopst, st, NOTHING, st);
|
||||
ASSIGN(fresh, st);
|
||||
SP("start", st, *p);
|
||||
coldp = NULL;
|
||||
if (start == m->beginp)
|
||||
c = OUT;
|
||||
else {
|
||||
/*
|
||||
* XXX Wrong if the previous character was multi-byte.
|
||||
* Newline never is (in encodings supported by FreeBSD),
|
||||
* so this only breaks the ISWORD tests below.
|
||||
*/
|
||||
c = (uch)*(start - 1);
|
||||
}
|
||||
for (;;) {
|
||||
/* next character */
|
||||
lastc = c;
|
||||
if (p == m->endp) {
|
||||
clen = 0;
|
||||
c = OUT;
|
||||
} else
|
||||
clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
|
||||
if (EQ(st, fresh))
|
||||
coldp = p;
|
||||
|
||||
/* is there an EOL and/or BOL between lastc and c? */
|
||||
flagch = '\0';
|
||||
i = 0;
|
||||
if ( (lastc == '\n' && m->g->cflags®_NEWLINE) ||
|
||||
(lastc == OUT && !(m->eflags®_NOTBOL)) ) {
|
||||
flagch = BOL;
|
||||
i = m->g->nbol;
|
||||
}
|
||||
if ( (c == '\n' && m->g->cflags®_NEWLINE) ||
|
||||
(c == OUT && !(m->eflags®_NOTEOL)) ) {
|
||||
flagch = (flagch == BOL) ? BOLEOL : EOL;
|
||||
i += m->g->neol;
|
||||
}
|
||||
if (i != 0) {
|
||||
for (; i > 0; i--)
|
||||
st = step(m->g, startst, stopst, st, flagch, st);
|
||||
SP("boleol", st, c);
|
||||
}
|
||||
|
||||
/* how about a word boundary? */
|
||||
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
|
||||
(c != OUT && ISWORD(c)) ) {
|
||||
flagch = BOW;
|
||||
}
|
||||
if ( (lastc != OUT && ISWORD(lastc)) &&
|
||||
(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
|
||||
flagch = EOW;
|
||||
}
|
||||
if (flagch == BOW || flagch == EOW) {
|
||||
st = step(m->g, startst, stopst, st, flagch, st);
|
||||
SP("boweow", st, c);
|
||||
}
|
||||
|
||||
/* are we done? */
|
||||
if (ISSET(st, stopst) || p == stop || clen > stop - p)
|
||||
break; /* NOTE BREAK OUT */
|
||||
|
||||
/* no, we must deal with this character */
|
||||
ASSIGN(tmp, st);
|
||||
ASSIGN(st, fresh);
|
||||
assert(c != OUT);
|
||||
st = step(m->g, startst, stopst, tmp, c, st);
|
||||
SP("aft", st, c);
|
||||
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
|
||||
p += clen;
|
||||
}
|
||||
|
||||
assert(coldp != NULL);
|
||||
m->coldp = coldp;
|
||||
if (ISSET(st, stopst))
|
||||
return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
|
||||
else
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
- slow - step through the string more deliberately
|
||||
== static const char *slow(struct match *m, const char *start, \
|
||||
== const char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static const char * /* where it ended */
|
||||
slow( struct match *m,
|
||||
const char *start,
|
||||
const char *stop,
|
||||
sopno startst,
|
||||
sopno stopst)
|
||||
{
|
||||
states st = m->st;
|
||||
states empty = m->empty;
|
||||
states tmp = m->tmp;
|
||||
const char *p = start;
|
||||
wint_t c;
|
||||
wint_t lastc; /* previous c */
|
||||
wint_t flagch;
|
||||
int i;
|
||||
int i, sflags;
|
||||
const char *matchp; /* last p at which a match ended */
|
||||
size_t clen;
|
||||
|
||||
sflags = 0;
|
||||
AT("slow", start, stop, startst, stopst);
|
||||
CLEAR(st);
|
||||
SET1(st, startst);
|
||||
SP("sstart", st, *p);
|
||||
st = step(m->g, startst, stopst, st, NOTHING, st);
|
||||
st = step(m->g, startst, stopst, st, NOTHING, st, sflags);
|
||||
if (fast)
|
||||
ASSIGN(fresh, st);
|
||||
matchp = NULL;
|
||||
if (start == m->beginp)
|
||||
if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL)))
|
||||
c = OUT;
|
||||
else {
|
||||
/*
|
||||
|
@ -918,12 +881,16 @@ slow( struct match *m,
|
|||
for (;;) {
|
||||
/* next character */
|
||||
lastc = c;
|
||||
sflags = 0;
|
||||
if (p == m->endp) {
|
||||
c = OUT;
|
||||
clen = 0;
|
||||
} else
|
||||
clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
|
||||
|
||||
if (fast && EQ(st, fresh))
|
||||
matchp = p;
|
||||
|
||||
/* is there an EOL and/or BOL between lastc and c? */
|
||||
flagch = '\0';
|
||||
i = 0;
|
||||
|
@ -937,9 +904,20 @@ slow( struct match *m,
|
|||
flagch = (flagch == BOL) ? BOLEOL : EOL;
|
||||
i += m->g->neol;
|
||||
}
|
||||
if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) {
|
||||
sflags |= SBOS;
|
||||
/* Step one more for BOS. */
|
||||
i++;
|
||||
}
|
||||
if (c == OUT && (m->eflags & REG_NOTEOL) == 0) {
|
||||
sflags |= SEOS;
|
||||
/* Step one more for EOS. */
|
||||
i++;
|
||||
}
|
||||
if (i != 0) {
|
||||
for (; i > 0; i--)
|
||||
st = step(m->g, startst, stopst, st, flagch, st);
|
||||
st = step(m->g, startst, stopst, st, flagch, st,
|
||||
sflags);
|
||||
SP("sboleol", st, c);
|
||||
}
|
||||
|
||||
|
@ -953,30 +931,56 @@ slow( struct match *m,
|
|||
flagch = EOW;
|
||||
}
|
||||
if (flagch == BOW || flagch == EOW) {
|
||||
st = step(m->g, startst, stopst, st, flagch, st);
|
||||
st = step(m->g, startst, stopst, st, flagch, st, sflags);
|
||||
SP("sboweow", st, c);
|
||||
}
|
||||
if (lastc != OUT && c != OUT &&
|
||||
ISWORD(lastc) == ISWORD(c)) {
|
||||
flagch = NWBND;
|
||||
} else if ((lastc == OUT && !ISWORD(c)) ||
|
||||
(c == OUT && !ISWORD(lastc))) {
|
||||
flagch = NWBND;
|
||||
}
|
||||
if (flagch == NWBND) {
|
||||
st = step(m->g, startst, stopst, st, flagch, st, sflags);
|
||||
SP("snwbnd", st, c);
|
||||
}
|
||||
|
||||
/* are we done? */
|
||||
if (ISSET(st, stopst))
|
||||
matchp = p;
|
||||
if (EQ(st, empty) || p == stop || clen > stop - p)
|
||||
if (ISSET(st, stopst)) {
|
||||
if (fast)
|
||||
break;
|
||||
else
|
||||
matchp = p;
|
||||
}
|
||||
if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
|
||||
break; /* NOTE BREAK OUT */
|
||||
|
||||
/* no, we must deal with this character */
|
||||
ASSIGN(tmp, st);
|
||||
ASSIGN(st, empty);
|
||||
if (fast)
|
||||
ASSIGN(st, fresh);
|
||||
else
|
||||
ASSIGN(st, empty);
|
||||
assert(c != OUT);
|
||||
st = step(m->g, startst, stopst, tmp, c, st);
|
||||
st = step(m->g, startst, stopst, tmp, c, st, sflags);
|
||||
SP("saft", st, c);
|
||||
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
|
||||
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags),
|
||||
st));
|
||||
p += clen;
|
||||
}
|
||||
|
||||
return(matchp);
|
||||
if (fast) {
|
||||
assert(matchp != NULL);
|
||||
m->coldp = matchp;
|
||||
if (ISSET(st, stopst))
|
||||
return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
|
||||
else
|
||||
return (NULL);
|
||||
} else
|
||||
return (matchp);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
- step - map set of states reachable before char to set reachable after
|
||||
== static states step(struct re_guts *g, sopno start, sopno stop, \
|
||||
|
@ -996,7 +1000,8 @@ step(struct re_guts *g,
|
|||
sopno stop, /* state after stop state within strip */
|
||||
states bef, /* states reachable before */
|
||||
wint_t ch, /* character or NONCHAR code */
|
||||
states aft) /* states already known reachable after */
|
||||
states aft, /* states already known reachable after */
|
||||
int sflags) /* state flags */
|
||||
{
|
||||
cset *cs;
|
||||
sop s;
|
||||
|
@ -1017,6 +1022,14 @@ step(struct re_guts *g,
|
|||
if (ch == OPND(s))
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case OBOS:
|
||||
if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0)
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case OEOS:
|
||||
if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0)
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case OBOL:
|
||||
if (ch == BOL || ch == BOLEOL)
|
||||
FWD(aft, bef, 1);
|
||||
|
@ -1033,6 +1046,14 @@ step(struct re_guts *g,
|
|||
if (ch == EOW)
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case OWBND:
|
||||
if (ch == BOW || ch == EOW)
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case ONWBND:
|
||||
if (ch == NWBND)
|
||||
FWD(aft, aft, 1);
|
||||
break;
|
||||
case OANY:
|
||||
if (!NONCHAR(ch))
|
||||
FWD(aft, bef, 1);
|
||||
|
@ -1072,22 +1093,22 @@ step(struct re_guts *g,
|
|||
break;
|
||||
case OCH_: /* mark the first two branches */
|
||||
FWD(aft, aft, 1);
|
||||
assert(OP(g->strip[pc+OPND(s)]) == OOR2);
|
||||
assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
|
||||
FWD(aft, aft, OPND(s));
|
||||
break;
|
||||
case OOR1: /* done a branch, find the O_CH */
|
||||
if (ISSTATEIN(aft, here)) {
|
||||
for (look = 1;
|
||||
OP(s = g->strip[pc+look]) != O_CH;
|
||||
look += OPND(s))
|
||||
assert(OP(s) == OOR2);
|
||||
OP(s = g->strip[pc+look]) != (sop)O_CH;
|
||||
look += OPND(s))
|
||||
assert(OP(s) == (sop)OOR2);
|
||||
FWD(aft, aft, look + 1);
|
||||
}
|
||||
break;
|
||||
case OOR2: /* propagate OCH_'s marking */
|
||||
FWD(aft, aft, 1);
|
||||
if (OP(g->strip[pc+OPND(s)]) != O_CH) {
|
||||
assert(OP(g->strip[pc+OPND(s)]) == OOR2);
|
||||
if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) {
|
||||
assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
|
||||
FWD(aft, aft, OPND(s));
|
||||
}
|
||||
break;
|
||||
|
@ -1119,7 +1140,7 @@ print(struct match *m,
|
|||
FILE *d)
|
||||
{
|
||||
struct re_guts *g = m->g;
|
||||
int i;
|
||||
sopno i;
|
||||
int first = 1;
|
||||
|
||||
if (!(m->eflags®_TRACE))
|
||||
|
@ -1130,7 +1151,7 @@ print(struct match *m,
|
|||
fprintf(d, " %s", pchar(ch));
|
||||
for (i = 0; i < g->nstates; i++)
|
||||
if (ISSET(st, i)) {
|
||||
fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
|
||||
fprintf(d, "%s%lu", (first) ? "\t" : ", ", i);
|
||||
first = 0;
|
||||
}
|
||||
fprintf(d, "\n");
|
||||
|
@ -1186,9 +1207,9 @@ pchar(int ch)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#undef stepback
|
||||
#undef matcher
|
||||
#undef fast
|
||||
#undef slow
|
||||
#undef walk
|
||||
#undef dissect
|
||||
#undef backref
|
||||
#undef step
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,6 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
|
@ -14,7 +16,7 @@
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
|
@ -37,7 +39,7 @@
|
|||
static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD: src/lib/libc/regex/regerror.c,v 1.11 2007/06/11 03:05:54 delphij Exp $");
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
|
@ -54,7 +56,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
/* === regerror.c === */
|
||||
static char *regatoi(const regex_t *preg, char *localbuf);
|
||||
static const char *regatoi(const regex_t *preg, char *localbuf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -83,13 +85,8 @@ static char *regatoi(const regex_t *preg, char *localbuf);
|
|||
*/
|
||||
static struct rerr {
|
||||
int code;
|
||||
#ifdef __CYGWIN__ /* Avoid whining compiler */
|
||||
const char *name;
|
||||
const char *explain;
|
||||
#else
|
||||
char *name;
|
||||
char *explain;
|
||||
#endif
|
||||
} rerrs[] = {
|
||||
{REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"},
|
||||
{REG_BADPAT, "REG_BADPAT", "invalid regular expression"},
|
||||
|
@ -125,11 +122,7 @@ regerror(int errcode,
|
|||
struct rerr *r;
|
||||
size_t len;
|
||||
int target = errcode &~ REG_ITOA;
|
||||
#ifdef __CYGWIN__ /* Avoid whining compiler */
|
||||
const char *s;
|
||||
#else
|
||||
char *s;
|
||||
#endif
|
||||
char convbuf[50];
|
||||
|
||||
if (errcode == REG_ATOI)
|
||||
|
@ -167,7 +160,7 @@ regerror(int errcode,
|
|||
- regatoi - internal routine to implement REG_ATOI
|
||||
== static char *regatoi(const regex_t *preg, char *localbuf);
|
||||
*/
|
||||
static char *
|
||||
static const char *
|
||||
regatoi(const regex_t *preg, char *localbuf)
|
||||
{
|
||||
struct rerr *r;
|
||||
|
@ -176,14 +169,7 @@ regatoi(const regex_t *preg, char *localbuf)
|
|||
if (strcmp(r->name, preg->re_endp) == 0)
|
||||
break;
|
||||
if (r->code == 0)
|
||||
#ifdef __CYGWIN__ /* Avoid whining compiler */
|
||||
{
|
||||
static char null[] = "0";
|
||||
return null;
|
||||
}
|
||||
#else
|
||||
return("0");
|
||||
#endif
|
||||
|
||||
sprintf(localbuf, "%d", r->code);
|
||||
return(localbuf);
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\" 4. Neither the name of the University nor the names of its contributors
|
||||
.\" 3. Neither the name of the University nor the names of its contributors
|
||||
.\" may be used to endorse or promote products derived from this software
|
||||
.\" without specific prior written permission.
|
||||
.\"
|
||||
|
@ -30,9 +30,9 @@
|
|||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)regex.3 8.4 (Berkeley) 3/20/94
|
||||
.\" $FreeBSD: src/lib/libc/regex/regex.3,v 1.21 2007/01/09 00:28:04 imp Exp $
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd August 17, 2005
|
||||
.Dd April 15, 2017
|
||||
.Dt REGEX 3
|
||||
.Os
|
||||
.Sh NAME
|
||||
|
@ -183,6 +183,17 @@ compatible with but not specified by
|
|||
.St -p1003.2 ,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
.It Dv REG_POSIX
|
||||
Compile only
|
||||
.St -p1003.2
|
||||
compliant expressions.
|
||||
This flag has no effect unless linking against
|
||||
.Nm libregex .
|
||||
This is an extension,
|
||||
compatible with but not specified by
|
||||
.St -p1003.2 ,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
.El
|
||||
.Pp
|
||||
When successful,
|
||||
|
@ -235,11 +246,16 @@ The
|
|||
argument is the bitwise OR of zero or more of the following flags:
|
||||
.Bl -tag -width REG_STARTEND
|
||||
.It Dv REG_NOTBOL
|
||||
The first character of
|
||||
the string
|
||||
is not the beginning of a line, so the
|
||||
.Ql ^\&
|
||||
anchor should not match before it.
|
||||
The first character of the string is treated as the continuation
|
||||
of a line.
|
||||
This means that the anchors
|
||||
.Ql ^\& ,
|
||||
.Ql [[:<:]] ,
|
||||
and
|
||||
.Ql \e<
|
||||
do not match before it; but see
|
||||
.Dv REG_STARTEND
|
||||
below.
|
||||
This does not affect the behavior of newlines under
|
||||
.Dv REG_NEWLINE .
|
||||
.It Dv REG_NOTEOL
|
||||
|
@ -247,19 +263,16 @@ The NUL terminating
|
|||
the string
|
||||
does not end a line, so the
|
||||
.Ql $\&
|
||||
anchor should not match before it.
|
||||
anchor does not match before it.
|
||||
This does not affect the behavior of newlines under
|
||||
.Dv REG_NEWLINE .
|
||||
.It Dv REG_STARTEND
|
||||
The string is considered to start at
|
||||
.Fa string
|
||||
+
|
||||
.Fa pmatch Ns [0]. Ns Va rm_so
|
||||
and to have a terminating NUL located at
|
||||
.Fa string
|
||||
+
|
||||
.Fa pmatch Ns [0]. Ns Va rm_eo
|
||||
(there need not actually be a NUL at that location),
|
||||
.Fa string No +
|
||||
.Fa pmatch Ns [0]. Ns Fa rm_so
|
||||
and to end before the byte located at
|
||||
.Fa string No +
|
||||
.Fa pmatch Ns [0]. Ns Fa rm_eo ,
|
||||
regardless of the value of
|
||||
.Fa nmatch .
|
||||
See below for the definition of
|
||||
|
@ -271,13 +284,37 @@ compatible with but not specified by
|
|||
.St -p1003.2 ,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
Note that a non-zero
|
||||
.Va rm_so
|
||||
does not imply
|
||||
.Dv REG_NOTBOL ;
|
||||
.Dv REG_STARTEND
|
||||
affects only the location of the string,
|
||||
not how it is matched.
|
||||
.Pp
|
||||
Without
|
||||
.Dv REG_NOTBOL ,
|
||||
the position
|
||||
.Fa rm_so
|
||||
is considered the beginning of a line, such that
|
||||
.Ql ^
|
||||
matches before it, and the beginning of a word if there is a word
|
||||
character at this position, such that
|
||||
.Ql [[:<:]]
|
||||
and
|
||||
.Ql \e<
|
||||
match before it.
|
||||
.Pp
|
||||
With
|
||||
.Dv REG_NOTBOL ,
|
||||
the character at position
|
||||
.Fa rm_so
|
||||
is treated as the continuation of a line, and if
|
||||
.Fa rm_so
|
||||
is greater than 0, the preceding character is taken into consideration.
|
||||
If the preceding character is a newline and the regular expression was compiled
|
||||
with
|
||||
.Dv REG_NEWLINE ,
|
||||
.Ql ^
|
||||
matches before the string; if the preceding character is not a word character
|
||||
but the string starts with a word character,
|
||||
.Ql [[:<:]]
|
||||
and
|
||||
.Ql \e<
|
||||
match before the string.
|
||||
.El
|
||||
.Pp
|
||||
See
|
||||
|
@ -420,10 +457,12 @@ it should have been the result from the most recent
|
|||
using that
|
||||
.Ft regex_t .
|
||||
The
|
||||
.Fn ( regerror
|
||||
.Po
|
||||
.Fn regerror
|
||||
may be able to supply a more detailed message using information
|
||||
from the
|
||||
.Ft regex_t . )
|
||||
.Ft regex_t .
|
||||
.Pc
|
||||
The
|
||||
.Fn regerror
|
||||
function
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\" 3. All advertising materials mentioning features or use of this software
|
||||
.\" must display the following acknowledgement:
|
||||
.\" This product includes software developed by the University of
|
||||
.\" California, Berkeley and its contributors.
|
||||
.\" 4. Neither the name of the University nor the names of its contributors
|
||||
.\" may be used to endorse or promote products derived from this software
|
||||
.\" without specific prior written permission.
|
||||
|
@ -30,9 +34,9 @@
|
|||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)re_format.7 8.3 (Berkeley) 3/20/94
|
||||
.\" $FreeBSD: src/lib/libc/regex/re_format.7,v 1.12 2008/09/05 17:41:20 keramida Exp $
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd March 20, 1994
|
||||
.Dd June 30, 2014
|
||||
.Dt RE_FORMAT 7
|
||||
.Os
|
||||
.Sh NAME
|
||||
|
@ -271,7 +275,6 @@ and
|
|||
stands for the list of all characters belonging to that
|
||||
class.
|
||||
Standard character class names are:
|
||||
.Pp
|
||||
.Bl -column "alnum" "digit" "xdigit" -offset indent
|
||||
.It Em "alnum digit punct"
|
||||
.It Em "alpha graph space"
|
||||
|
@ -311,6 +314,13 @@ compatible with but not specified by
|
|||
.St -p1003.2 ,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
The additional word delimiters
|
||||
.Ql \e<
|
||||
and
|
||||
.Ql \e>
|
||||
are provided to ease compatibility with traditional
|
||||
SVR4
|
||||
systems but are not portable and should be avoided.
|
||||
.Pp
|
||||
In the event that an RE could match more than one substring of a given
|
||||
string,
|
||||
|
@ -382,10 +392,12 @@ and
|
|||
.Ql ?\&
|
||||
are ordinary characters, and their functionality
|
||||
can be expressed using bounds
|
||||
.No ( Ql {1,}
|
||||
.Po
|
||||
.Ql {1,}
|
||||
or
|
||||
.Ql {0,1}
|
||||
respectively).
|
||||
respectively
|
||||
.Pc .
|
||||
Also note that
|
||||
.Ql x+
|
||||
in modern REs is equivalent to
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
|
@ -14,7 +16,7 @@
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
|
@ -31,21 +33,21 @@
|
|||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
|
||||
* $FreeBSD: src/lib/libc/regex/regex2.h,v 1.11 2007/01/09 00:28:04 imp Exp $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* First, the stuff that ends up in the outside-world include file
|
||||
= typedef off_t regoff_t;
|
||||
= typedef struct {
|
||||
= int re_magic;
|
||||
= size_t re_nsub; // number of parenthesized subexpressions
|
||||
= const char *re_endp; // end pointer for REG_PEND
|
||||
= struct re_guts *re_g; // none of your business :-)
|
||||
= int re_magic;
|
||||
= size_t re_nsub; // number of parenthesized subexpressions
|
||||
= const char *re_endp; // end pointer for REG_PEND
|
||||
= struct re_guts *re_g; // none of your business :-)
|
||||
= } regex_t;
|
||||
= typedef struct {
|
||||
= regoff_t rm_so; // start of match
|
||||
= regoff_t rm_eo; // end of match
|
||||
= regoff_t rm_so; // start of match
|
||||
= regoff_t rm_eo; // end of match
|
||||
= } regmatch_t;
|
||||
*/
|
||||
/*
|
||||
|
@ -73,7 +75,7 @@
|
|||
* immediately *preceding* "execution" of that operator.
|
||||
*/
|
||||
typedef unsigned long sop; /* strip operator */
|
||||
typedef long sopno;
|
||||
typedef unsigned long sopno;
|
||||
#define OPRMASK 0xf8000000L
|
||||
#define OPDMASK 0x07ffffffL
|
||||
#define OPSHIFT ((unsigned)27)
|
||||
|
@ -102,6 +104,10 @@ typedef long sopno;
|
|||
#define O_CH (18L<<OPSHIFT) /* end choice back to OOR1 */
|
||||
#define OBOW (19L<<OPSHIFT) /* begin word - */
|
||||
#define OEOW (20L<<OPSHIFT) /* end word - */
|
||||
#define OBOS (21L<<OPSHIFT) /* begin subj. - */
|
||||
#define OEOS (22L<<OPSHIFT) /* end subj. - */
|
||||
#define OWBND (23L<<OPSHIFT) /* word bound - */
|
||||
#define ONWBND (24L<<OPSHIFT) /* not bound - */
|
||||
|
||||
/*
|
||||
* Structures for [] character-set representation.
|
||||
|
@ -111,13 +117,13 @@ typedef struct {
|
|||
wint_t max;
|
||||
} crange;
|
||||
typedef struct {
|
||||
unsigned char bmp[NC / 8];
|
||||
unsigned char bmp[NC_MAX / 8];
|
||||
wctype_t *types;
|
||||
int ntypes;
|
||||
unsigned int ntypes;
|
||||
wint_t *wides;
|
||||
int nwides;
|
||||
unsigned int nwides;
|
||||
crange *ranges;
|
||||
int nranges;
|
||||
unsigned int nranges;
|
||||
int invert;
|
||||
int icase;
|
||||
} cset;
|
||||
|
@ -125,15 +131,20 @@ typedef struct {
|
|||
static int
|
||||
CHIN1(cset *cs, wint_t ch)
|
||||
{
|
||||
int i;
|
||||
unsigned int i;
|
||||
|
||||
assert(ch >= 0);
|
||||
if (ch < NC)
|
||||
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
|
||||
cs->invert);
|
||||
for (i = 0; i < cs->nwides; i++)
|
||||
if (ch == cs->wides[i])
|
||||
for (i = 0; i < cs->nwides; i++) {
|
||||
if (cs->icase) {
|
||||
if (ch == towlower(cs->wides[i]) ||
|
||||
ch == towupper(cs->wides[i]))
|
||||
return (!cs->invert);
|
||||
} else if (ch == cs->wides[i])
|
||||
return (!cs->invert);
|
||||
}
|
||||
for (i = 0; i < cs->nranges; i++)
|
||||
if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max)
|
||||
return (!cs->invert);
|
||||
|
@ -151,14 +162,10 @@ CHIN(cset *cs, wint_t ch)
|
|||
if (ch < NC)
|
||||
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
|
||||
cs->invert);
|
||||
else if (cs->icase) {
|
||||
if (cs->invert)
|
||||
return (CHIN1(cs, ch) && CHIN1(cs, towlower(ch)) &&
|
||||
CHIN1(cs, towupper(ch)));
|
||||
else
|
||||
return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
|
||||
CHIN1(cs, towupper(ch)));
|
||||
} else
|
||||
else if (cs->icase)
|
||||
return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
|
||||
CHIN1(cs, towupper(ch)));
|
||||
else
|
||||
return (CHIN1(cs, ch));
|
||||
}
|
||||
|
||||
|
@ -169,7 +176,7 @@ struct re_guts {
|
|||
int magic;
|
||||
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
||||
sop *strip; /* malloced area for strip */
|
||||
int ncsets; /* number of csets in use */
|
||||
unsigned int ncsets; /* number of csets in use */
|
||||
cset *sets; /* -> cset [ncsets] */
|
||||
int cflags; /* copy of regcomp() cflags argument */
|
||||
sopno nstates; /* = number of sops */
|
||||
|
@ -193,4 +200,5 @@ struct re_guts {
|
|||
|
||||
/* misc utilities */
|
||||
#define OUT (CHAR_MIN - 1) /* a non-character value */
|
||||
#define ISWORD(c) (iswalnum((wint_t)(c)) || (c) == '_')
|
||||
#define IGN (CHAR_MIN - 2)
|
||||
#define ISWORD(c) (iswalnum((uch)(c)) || (c) == '_')
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
|
@ -14,7 +16,7 @@
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
|
@ -37,7 +39,7 @@
|
|||
static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD: src/lib/libc/regex/regexec.c,v 1.8 2007/06/11 03:05:54 delphij Exp $");
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
/*
|
||||
* the outer shell of regexec()
|
||||
|
@ -46,9 +48,6 @@ __FBSDID("$FreeBSD: src/lib/libc/regex/regexec.c,v 1.8 2007/06/11 03:05:54 delph
|
|||
* macros that code uses. This lets the same code operate on two different
|
||||
* representations for state sets and characters.
|
||||
*/
|
||||
#ifdef __CYGWIN__
|
||||
#include "winsup.h"
|
||||
#endif
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -68,9 +67,9 @@ static __inline size_t
|
|||
xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
|
||||
{
|
||||
size_t nr;
|
||||
wint_t wc;
|
||||
wchar_t wc;
|
||||
|
||||
nr = mbrtowi(&wc, s, n, mbs);
|
||||
nr = mbrtowc(&wc, s, n, mbs);
|
||||
if (wi != NULL)
|
||||
*wi = wc;
|
||||
if (nr == 0)
|
||||
|
@ -98,8 +97,8 @@ xmbrtowc_dummy(wint_t *wi,
|
|||
}
|
||||
|
||||
/* macros for manipulating states, small version */
|
||||
#define states long
|
||||
#define states1 states /* for later use in regexec() decision */
|
||||
#define states1 long /* for later use in regexec() decision */
|
||||
#define states states1
|
||||
#define CLEAR(v) ((v) = 0)
|
||||
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
|
||||
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
|
||||
|
@ -228,9 +227,9 @@ regexec(const regex_t * __restrict preg,
|
|||
eflags = GOODFLAGS(eflags);
|
||||
|
||||
if (MB_CUR_MAX > 1)
|
||||
return(mmatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
return(mmatcher(g, string, nmatch, pmatch, eflags));
|
||||
else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
|
||||
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
return(smatcher(g, string, nmatch, pmatch, eflags));
|
||||
else
|
||||
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
return(lmatcher(g, string, nmatch, pmatch, eflags));
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
|
@ -14,7 +16,7 @@
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
|
@ -37,7 +39,7 @@
|
|||
static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD: src/lib/libc/regex/regfree.c,v 1.8 2007/06/11 03:05:54 delphij Exp $");
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
|
@ -58,7 +60,7 @@ void
|
|||
regfree(regex_t *preg)
|
||||
{
|
||||
struct re_guts *g;
|
||||
int i;
|
||||
unsigned int i;
|
||||
|
||||
if (preg->re_magic != MAGIC1) /* oops */
|
||||
return; /* nice to complain, but hard */
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
/*-
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
|
@ -14,7 +16,7 @@
|
|||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
|
@ -31,13 +33,15 @@
|
|||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)utils.h 8.3 (Berkeley) 3/20/94
|
||||
* $FreeBSD: src/lib/libc/regex/utils.h,v 1.3 2007/01/09 00:28:04 imp Exp $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/* utility definitions */
|
||||
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
|
||||
#define INFINITY (DUPMAX + 1)
|
||||
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
||||
|
||||
#define NC_MAX (CHAR_MAX - CHAR_MIN + 1)
|
||||
#define NC ((MB_CUR_MAX) == 1 ? (NC_MAX) : (128))
|
||||
typedef unsigned char uch;
|
||||
|
||||
/* switch off assertions (if not already off) if no REDEBUG */
|
||||
|
|
Loading…
Reference in New Issue