summaryrefslogtreecommitdiff
path: root/usr.sbin
diff options
context:
space:
mode:
authorRobert Nagy <robert@cvs.openbsd.org>2011-11-12 11:22:41 +0000
committerRobert Nagy <robert@cvs.openbsd.org>2011-11-12 11:22:41 +0000
commit0cafaf711f9f476ae96f4ad6b5973353ad1e7a15 (patch)
treee2053190eaa5cadca518f436ae31f95e055986b6 /usr.sbin
parenta5bca9542bfc307e96e23a0792364839f0f970d0 (diff)
sync to pcre 8.20
Diffstat (limited to 'usr.sbin')
-rw-r--r--usr.sbin/nginx/src/pcre/LICENCE26
-rw-r--r--usr.sbin/nginx/src/pcre/config.h27
-rw-r--r--usr.sbin/nginx/src/pcre/pcre.h28
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_compile.c69
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_exec.c409
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_fullinfo.c8
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_internal.h97
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_tables.c28
8 files changed, 506 insertions, 186 deletions
diff --git a/usr.sbin/nginx/src/pcre/LICENCE b/usr.sbin/nginx/src/pcre/LICENCE
index 65a7ec75343..ae7bbcf8cfe 100644
--- a/usr.sbin/nginx/src/pcre/LICENCE
+++ b/usr.sbin/nginx/src/pcre/LICENCE
@@ -9,7 +9,9 @@ specified below. The documentation for PCRE, supplied in the "doc"
directory, is distributed under the same terms as the software itself.
The basic library functions are written in C and are freestanding. Also
-included in the distribution is a set of C++ wrapper functions.
+included in the distribution is a set of C++ wrapper functions, and a
+just-in-time compiler that can be used to optimize pattern matching. These
+are both optional features that can be omitted when the library is built.
THE BASIC LIBRARY FUNCTIONS
@@ -26,6 +28,28 @@ Copyright (c) 1997-2011 University of Cambridge
All rights reserved.
+PCRE JUST-IN-TIME COMPILATION SUPPORT
+-------------------------------------
+
+Written by: Zoltan Herczeg
+Email local part: hzmester
+Emain domain: freemail.hu
+
+Copyright(c) 2010-2011 Zoltan Herczeg
+All rights reserved.
+
+
+STACK-LESS JUST-IN-TIME COMPILER
+--------------------------------
+
+Written by: Zoltan Herczeg
+Email local part: hzmester
+Emain domain: freemail.hu
+
+Copyright(c) 2009-2011 Zoltan Herczeg
+All rights reserved.
+
+
THE C++ WRAPPER FUNCTIONS
-------------------------
diff --git a/usr.sbin/nginx/src/pcre/config.h b/usr.sbin/nginx/src/pcre/config.h
index 25bd8064d84..b55d68b23b5 100644
--- a/usr.sbin/nginx/src/pcre/config.h
+++ b/usr.sbin/nginx/src/pcre/config.h
@@ -81,7 +81,7 @@ them both to 0; an emulation function will be used. */
#define HAVE_STRERROR 1
/* Define to 1 if you have the <string> header file. */
-/* #undef HAVE_STRING */
+#define HAVE_STRING 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
@@ -96,7 +96,7 @@ them both to 0; an emulation function will be used. */
/* #undef HAVE_STRTOLL */
/* Define to 1 if you have `strtoq'. */
-/* #undef HAVE_STRTOQ */
+#define HAVE_STRTOQ 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
@@ -190,7 +190,7 @@ them both to 0; an emulation function will be used. */
#define PACKAGE_NAME "PCRE"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE 8.13"
+#define PACKAGE_STRING "PCRE 8.20"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre"
@@ -199,7 +199,16 @@ them both to 0; an emulation function will be used. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
-#define PACKAGE_VERSION "8.13"
+#define PACKAGE_VERSION "8.20"
+
+/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
+ pcregrep to hold parts of the file it is searching. On systems that support
+ it, "configure" can be used to override the default, which is 8192. This is
+ also the minimum value. The actual amount of memory used by pcregrep is
+ three times this number, because it allows for the buffering of "before"
+ and "after" lines. */
+#define PCREGREP_BUFSIZE 20480
+
/* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition
@@ -228,6 +237,9 @@ them both to 0; an emulation function will be used. */
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
+/* Define to enable support for Just-In-Time compiling. */
+/* #undef SUPPORT_JIT */
+
/* Define to allow pcregrep to be linked with libbz2, so that it is able to
handle .bz2 files. */
/* #undef SUPPORT_LIBBZ2 */
@@ -239,7 +251,10 @@ them both to 0; an emulation function will be used. */
handle .gz files. */
/* #undef SUPPORT_LIBZ */
-/* Define to enable support for Unicode properties */
+/* Define to enable JIT support in pcregrep. */
+/* #undef SUPPORT_PCREGREP_JIT */
+
+/* Define to enable support for Unicode properties. */
#define SUPPORT_UCP /**/
/* Define to enable support for the UTF-8 Unicode encoding. This will work
@@ -249,7 +264,7 @@ them both to 0; an emulation function will be used. */
#define SUPPORT_UTF8 /**/
/* Version number of package */
-#define VERSION "8.13"
+#define VERSION "8.20"
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
diff --git a/usr.sbin/nginx/src/pcre/pcre.h b/usr.sbin/nginx/src/pcre/pcre.h
index 20d6c0b914f..42a109a65dc 100644
--- a/usr.sbin/nginx/src/pcre/pcre.h
+++ b/usr.sbin/nginx/src/pcre/pcre.h
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE_MAJOR 8
-#define PCRE_MINOR 13
+#define PCRE_MINOR 20
#define PCRE_PRERELEASE
-#define PCRE_DATE 2011-08-16
+#define PCRE_DATE 2011-10-21
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
@@ -164,6 +164,7 @@ compile-time only bits for runtime options, or vice versa. */
#define PCRE_ERROR_BADOFFSET (-24)
#define PCRE_ERROR_SHORTUTF8 (-25)
#define PCRE_ERROR_RECURSELOOP (-26)
+#define PCRE_ERROR_JIT_STACKLIMIT (-27)
/* Specific error codes for UTF-8 validity checks */
@@ -209,6 +210,7 @@ compile-time only bits for runtime options, or vice versa. */
#define PCRE_INFO_JCHANGED 13
#define PCRE_INFO_HASCRORLF 14
#define PCRE_INFO_MINLENGTH 15
+#define PCRE_INFO_JIT 16
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
@@ -222,6 +224,12 @@ compatible. */
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
#define PCRE_CONFIG_BSR 8
+#define PCRE_CONFIG_JIT 9
+
+/* Request types for pcre_study(). Do not re-arrange, in order to remain
+compatible. */
+
+#define PCRE_STUDY_JIT_COMPILE 0x0001
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
@@ -232,12 +240,16 @@ these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
#define PCRE_EXTRA_MARK 0x0020
+#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040
/* Types */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
+struct real_pcre_jit_stack; /* declaration; the definition is private */
+typedef struct real_pcre_jit_stack pcre_jit_stack;
+
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
@@ -258,6 +270,7 @@ typedef struct pcre_extra {
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
unsigned char **mark; /* For passing back a mark pointer */
+ void *executable_jit; /* Contains a pointer to a compiled jit code */
} pcre_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
@@ -305,6 +318,10 @@ PCRE_EXP_DECL void pcre_stack_free(void *);
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
#endif /* VPCOMPAT */
+/* User defined callback which provides a stack just before the match starts. */
+
+typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
+
/* Exported PCRE functions */
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
@@ -337,8 +354,15 @@ PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
+PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
PCRE_EXP_DECL const char *pcre_version(void);
+/* JIT compiler related functions. */
+
+PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
+PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
+PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, pcre_jit_callback, void *);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/usr.sbin/nginx/src/pcre/pcre_compile.c b/usr.sbin/nginx/src/pcre/pcre_compile.c
index 9fe396ca99d..4f2a9ece171 100644
--- a/usr.sbin/nginx/src/pcre/pcre_compile.c
+++ b/usr.sbin/nginx/src/pcre/pcre_compile.c
@@ -1506,6 +1506,7 @@ for (;;)
case OP_CBRA:
case OP_BRA:
case OP_ONCE:
+ case OP_ONCE_NC:
case OP_COND:
d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);
if (d < 0) return d;
@@ -1761,7 +1762,7 @@ for (;;)
break;
case OP_THEN_ARG:
- code += code[1+LINK_SIZE];
+ code += code[1];
break;
}
@@ -1880,7 +1881,7 @@ for (;;)
break;
case OP_THEN_ARG:
- code += code[1+LINK_SIZE];
+ code += code[1];
break;
}
@@ -2045,7 +2046,8 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
if (c == OP_BRA || c == OP_BRAPOS ||
c == OP_CBRA || c == OP_CBRAPOS ||
- c == OP_ONCE || c == OP_COND)
+ c == OP_ONCE || c == OP_ONCE_NC ||
+ c == OP_COND)
{
BOOL empty_branch;
if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */
@@ -2217,7 +2219,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
break;
case OP_THEN_ARG:
- code += code[1+LINK_SIZE];
+ code += code[1];
break;
/* None of the remaining opcodes are required to match a character. */
@@ -2295,8 +2297,13 @@ I think.
A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
It seems that the appearance of a nested POSIX class supersedes an apparent
external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
-a digit. Also, unescaped square brackets may also appear as part of class
-names. For example, [:a[:abc]b:] gives unknown class "[:abc]b:]"in Perl.
+a digit.
+
+In Perl, unescaped square brackets may also appear as part of class names. For
+example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for
+[:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not
+seem right at all. PCRE does not allow closing square brackets in POSIX class
+names.
Arguments:
ptr pointer to the initial [
@@ -2314,6 +2321,7 @@ for (++ptr; *ptr != 0; ptr++)
{
if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
ptr++;
+ else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
else
{
if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
@@ -3086,7 +3094,6 @@ uschar *class_utf8data_base;
uschar utf8_char[6];
#else
BOOL utf8 = FALSE;
-uschar *utf8_char = NULL;
#endif
#ifdef PCRE_DEBUG
@@ -3137,6 +3144,7 @@ for (;; ptr++)
int subfirstbyte;
int terminator;
int mclength;
+ int tempbracount;
uschar mcbuffer[8];
/* Get next byte in the pattern */
@@ -4025,7 +4033,7 @@ for (;; ptr++)
if ((options & PCRE_CASELESS) != 0)
{
unsigned int othercase;
- if ((othercase = UCD_OTHERCASE(c)) != (unsigned int)c)
+ if ((othercase = UCD_OTHERCASE(c)) != (unsigned int)c)
{
*class_utf8data++ = XCL_SINGLE;
class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
@@ -4835,8 +4843,10 @@ for (;; ptr++)
uschar *ketcode = code - 1 - LINK_SIZE;
uschar *bracode = ketcode - GET(ketcode, 1);
- if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;
- if (*bracode == OP_ONCE)
+ if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
+ possessive_quantifier) *bracode = OP_BRA;
+
+ if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
*ketcode = OP_KETRMAX + repeat_type;
else
{
@@ -5040,6 +5050,9 @@ for (;; ptr++)
PUT2INC(code, 0, oc->number);
}
*code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
+
+ /* Do not set firstbyte after *ACCEPT */
+ if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
}
/* Handle other cases with/without an argument */
@@ -5052,11 +5065,7 @@ for (;; ptr++)
goto FAILED;
}
*code = verbs[i].op;
- if (*code++ == OP_THEN)
- {
- PUT(code, 0, code - bcptr->current_branch - 1);
- code += LINK_SIZE;
- }
+ if (*code++ == OP_THEN) cd->external_flags |= PCRE_HASTHEN;
}
else
@@ -5067,11 +5076,7 @@ for (;; ptr++)
goto FAILED;
}
*code = verbs[i].op_arg;
- if (*code++ == OP_THEN_ARG)
- {
- PUT(code, 0, code - bcptr->current_branch - 1);
- code += LINK_SIZE;
- }
+ if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;
*code++ = arglen;
memcpy(code, arg, arglen);
code += arglen;
@@ -5906,6 +5911,7 @@ for (;; ptr++)
*code = bravalue;
tempcode = code;
tempreqvary = cd->req_varyopt; /* Save value before bracket */
+ tempbracount = cd->bracount; /* Save value before bracket */
length_prevgroup = 0; /* Initialize for pre-compile phase */
if (!compile_regex(
@@ -5928,15 +5934,20 @@ for (;; ptr++)
))
goto FAILED;
+ /* If this was an atomic group and there are no capturing groups within it,
+ generate OP_ONCE_NC instead of OP_ONCE. */
+
+ if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
+ *code = OP_ONCE_NC;
+
if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
cd->assert_depth -= 1;
/* At the end of compiling, code is still pointing to the start of the
- group, while tempcode has been updated to point past the end of the group
- and any option resetting that may follow it. The pattern pointer (ptr)
- is on the bracket. */
+ group, while tempcode has been updated to point past the end of the group.
+ The pattern pointer (ptr) is on the bracket.
- /* If this is a conditional bracket, check that there are no more than
+ If this is a conditional bracket, check that there are no more than
two branches in the group, or just one if it's a DEFINE group. We do this
in the real compile phase, not in the pre-pass, where the whole group may
not be available. */
@@ -6335,7 +6346,7 @@ for (;; ptr++)
else firstbyte = reqbyte = REQ_NONE;
}
- /* firstbyte was previously set; we can set reqbyte only the length is
+ /* firstbyte was previously set; we can set reqbyte only if the length is
1 or the matching is caseful. */
else
@@ -6727,7 +6738,8 @@ do {
/* Other brackets */
- else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
+ else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC ||
+ op == OP_COND)
{
if (!is_anchored(scode, bracket_map, backref_map)) return FALSE;
}
@@ -6831,7 +6843,7 @@ do {
/* Other brackets */
- else if (op == OP_ASSERT || op == OP_ONCE)
+ else if (op == OP_ASSERT || op == OP_ONCE || op == OP_ONCE_NC)
{
if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
}
@@ -6901,6 +6913,7 @@ do {
case OP_SCBRAPOS:
case OP_ASSERT:
case OP_ONCE:
+ case OP_ONCE_NC:
case OP_COND:
if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
return -1;
@@ -7282,7 +7295,7 @@ re->top_bracket = cd->bracount;
re->top_backref = cd->top_backref;
re->flags = cd->external_flags;
-if (cd->had_accept) reqbyte = -1; /* Must disable after (*ACCEPT) */
+if (cd->had_accept) reqbyte = REQ_NONE; /* Must disable after (*ACCEPT) */
/* If not reached end of pattern on success, there's an excess bracket. */
diff --git a/usr.sbin/nginx/src/pcre/pcre_exec.c b/usr.sbin/nginx/src/pcre/pcre_exec.c
index b1ab3875bba..d390ff422b1 100644
--- a/usr.sbin/nginx/src/pcre/pcre_exec.c
+++ b/usr.sbin/nginx/src/pcre/pcre_exec.c
@@ -277,7 +277,7 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
- RM61, RM62, RM63 };
+ RM61, RM62, RM63, RM64, RM65, RM66 };
/* These versions of the macros use the stack, as normal. There are debugging
versions and production versions. Note that the "rw" argument of RMATCH isn't
@@ -775,26 +775,106 @@ for (;;)
md->start_match_ptr = ecode + 2;
RRETURN(MATCH_SKIP_ARG);
- /* For THEN (and THEN_ARG) we pass back the address of the bracket or
- the alt that is at the start of the current branch. This makes it possible
- to skip back past alternatives that precede the THEN within the current
- branch. */
+ /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
+ the branch in which it occurs can be determined. Overload the start of
+ match pointer to do this. */
case OP_THEN:
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
eptrb, RM54);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->start_match_ptr = ecode - GET(ecode, 1);
+ md->start_match_ptr = ecode;
MRRETURN(MATCH_THEN);
case OP_THEN_ARG:
- RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
- offset_top, md, eptrb, RM58);
+ RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,
+ md, eptrb, RM58);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->start_match_ptr = ecode - GET(ecode, 1);
- md->mark = ecode + LINK_SIZE + 2;
+ md->start_match_ptr = ecode;
+ md->mark = ecode + 2;
RRETURN(MATCH_THEN);
+ /* Handle an atomic group that does not contain any capturing parentheses.
+ This can be handled like an assertion. Prior to 8.13, all atomic groups
+ were handled this way. In 8.13, the code was changed as below for ONCE, so
+ that backups pass through the group and thereby reset captured values.
+ However, this uses a lot more stack, so in 8.20, atomic groups that do not
+ contain any captures generate OP_ONCE_NC, which can be handled in the old,
+ less stack intensive way.
+
+ Check the alternative branches in turn - the matching won't pass the KET
+ for this kind of subpattern. If any one branch matches, we carry on as at
+ the end of a normal bracket, leaving the subject pointer, but resetting
+ the start-of-match value in case it was changed by \K. */
+
+ case OP_ONCE_NC:
+ prev = ecode;
+ saved_eptr = eptr;
+ do
+ {
+ RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
+ if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
+ {
+ mstart = md->start_match_ptr;
+ break;
+ }
+ if (rrc == MATCH_THEN)
+ {
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ }
+
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ ecode += GET(ecode,1);
+ }
+ while (*ecode == OP_ALT);
+
+ /* If hit the end of the group (which could be repeated), fail */
+
+ if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
+
+ /* Continue as from after the group, updating the offsets high water
+ mark, since extracts may have been taken. */
+
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+
+ offset_top = md->end_offset_top;
+ eptr = md->end_match_ptr;
+
+ /* For a non-repeating ket, just continue at this level. This also
+ happens for a repeating ket if no characters were matched in the group.
+ This is the forcible breaking of infinite loops as implemented in Perl
+ 5.005. */
+
+ if (*ecode == OP_KET || eptr == saved_eptr)
+ {
+ ecode += 1+LINK_SIZE;
+ break;
+ }
+
+ /* The repeating kets try the rest of the pattern or restart from the
+ preceding bracket, in the appropriate order. The second "call" of match()
+ uses tail recursion, to avoid using another stack frame. */
+
+ if (*ecode == OP_KETRMIN)
+ {
+ RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ ecode = prev;
+ goto TAIL_RECURSE;
+ }
+ else /* OP_KETRMAX */
+ {
+ md->match_function_type = MATCH_CBEGROUP;
+ RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ ecode += 1 + LINK_SIZE;
+ goto TAIL_RECURSE;
+ }
+ /* Control never gets here */
+
/* Handle a capturing bracket, other than those that are possessive with an
unlimited repeat. If there is space in the offset vector, save the current
subject position in the working slot at the top of the vector. We mustn't
@@ -838,9 +918,29 @@ for (;;)
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
eptrb, RM1);
if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
- if (rrc != MATCH_NOMATCH &&
- (rrc != MATCH_THEN || md->start_match_ptr != ecode))
- RRETURN(rrc);
+
+ /* If we backed up to a THEN, check whether it is within the current
+ branch by comparing the address of the THEN that is passed back with
+ the end of the branch. If it is within the current branch, and the
+ branch is one of two or more alternatives (it either starts or ends
+ with OP_ALT), we have reached the limit of THEN's action, so convert
+ the return code to NOMATCH, which will cause normal backtracking to
+ happen from now on. Otherwise, THEN is passed back to an outer
+ alternative. This implements Perl's treatment of parenthesized groups,
+ where a group not containing | does not affect the current alternative,
+ that is, (X) is NOT the same as (X|(*F)). */
+
+ if (rrc == MATCH_THEN)
+ {
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ }
+
+ /* Anything other than NOMATCH is passed back. */
+
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
md->capture_last = save_capture_last;
ecode += GET(ecode, 1);
if (*ecode != OP_ALT) break;
@@ -851,11 +951,10 @@ for (;;)
md->offset_vector[offset+1] = save_offset2;
md->offset_vector[md->offset_end - number] = save_offset3;
- /* At this point, rrc will be one of MATCH_ONCE, MATCH_NOMATCH, or
- MATCH_THEN. */
+ /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
- if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
- RRETURN(((rrc == MATCH_ONCE)? MATCH_ONCE:MATCH_NOMATCH));
+ if (md->mark == NULL) md->mark = markptr;
+ RRETURN(rrc);
}
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
@@ -870,12 +969,17 @@ for (;;)
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
/* Non-capturing or atomic group, except for possessive with unlimited
- repeat. Loop for all the alternatives. When we get to the final alternative
- within the brackets, we used to return the result of a recursive call to
- match() whatever happened so it was possible to reduce stack usage by
- turning this into a tail recursion, except in the case of a possibly empty
- group. However, now that there is the possiblity of (*THEN) occurring in
- the final alternative, this optimization is no longer possible.
+ repeat and ONCE group with no captures. Loop for all the alternatives.
+
+ When we get to the final alternative within the brackets, we used to return
+ the result of a recursive call to match() whatever happened so it was
+ possible to reduce stack usage by turning this into a tail recursion,
+ except in the case of a possibly empty group. However, now that there is
+ the possiblity of (*THEN) occurring in the final alternative, this
+ optimization is no longer always possible.
+
+ We can optimize if we know there are no (*THEN)s in the pattern; at present
+ this is the best that can be done.
MATCH_ONCE is returned when the end of an atomic group is successfully
reached, but subsequent matching fails. It passes back up the tree (causing
@@ -892,10 +996,34 @@ for (;;)
for (;;)
{
if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
+
+ /* If this is not a possibly empty group, and there are no (*THEN)s in
+ the pattern, and this is the final alternative, optimize as described
+ above. */
+
+ else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
+ {
+ ecode += _pcre_OP_lengths[*ecode];
+ goto TAIL_RECURSE;
+ }
+
+ /* In all other cases, we have to make another call to match(). */
+
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
RM2);
- if (rrc != MATCH_NOMATCH &&
- (rrc != MATCH_THEN || md->start_match_ptr != ecode))
+
+ /* See comment in the code for capturing groups above about handling
+ THEN. */
+
+ if (rrc == MATCH_THEN)
+ {
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ }
+
+ if (rrc != MATCH_NOMATCH)
{
if (rrc == MATCH_ONCE)
{
@@ -912,7 +1040,8 @@ for (;;)
ecode += GET(ecode, 1);
if (*ecode != OP_ALT) break;
}
- if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
+
+ if (md->mark == NULL) md->mark = markptr;
RRETURN(MATCH_NOMATCH);
/* Handle possessive capturing brackets with an unlimited repeat. We come
@@ -975,9 +1104,19 @@ for (;;)
matched_once = TRUE;
continue;
}
- if (rrc != MATCH_NOMATCH &&
- (rrc != MATCH_THEN || md->start_match_ptr != ecode))
- RRETURN(rrc);
+
+ /* See comment in the code for capturing groups above about handling
+ THEN. */
+
+ if (rrc == MATCH_THEN)
+ {
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ }
+
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
md->capture_last = save_capture_last;
ecode += GET(ecode, 1);
if (*ecode != OP_ALT) break;
@@ -990,7 +1129,7 @@ for (;;)
md->offset_vector[md->offset_end - number] = save_offset3;
}
- if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
+ if (md->mark == NULL) md->mark = markptr;
if (allow_zero || matched_once)
{
ecode += 1 + LINK_SIZE;
@@ -1037,9 +1176,19 @@ for (;;)
matched_once = TRUE;
continue;
}
- if (rrc != MATCH_NOMATCH &&
- (rrc != MATCH_THEN || md->start_match_ptr != ecode))
- RRETURN(rrc);
+
+ /* See comment in the code for capturing groups above about handling
+ THEN. */
+
+ if (rrc == MATCH_THEN)
+ {
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ }
+
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
ecode += GET(ecode, 1);
if (*ecode != OP_ALT) break;
}
@@ -1251,8 +1400,11 @@ for (;;)
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
}
- else if (rrc != MATCH_NOMATCH &&
- (rrc != MATCH_THEN || md->start_match_ptr != ecode))
+
+ /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
+ assertion; it is therefore treated as NOMATCH. */
+
+ else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
{
RRETURN(rrc); /* Need braces because of following else */
}
@@ -1263,23 +1415,32 @@ for (;;)
}
}
- /* We are now at the branch that is to be obeyed. As there is only one,
- we used to use tail recursion to avoid using another stack frame, except
- when there was unlimited repeat of a possibly empty group. However, that
- strategy no longer works because of the possibilty of (*THEN) being
- encountered in the branch. A recursive call to match() is always required,
- unless the second alternative doesn't exist, in which case we can just
- plough on. */
+ /* We are now at the branch that is to be obeyed. As there is only one, can
+ use tail recursion to avoid using another stack frame, except when there is
+ unlimited repeat of a possibly empty group. In the latter case, a recursive
+ call to match() is always required, unless the second alternative doesn't
+ exist, in which case we can just plough on. Note that, for compatibility
+ with Perl, the | in a conditional group is NOT treated as creating two
+ alternatives. If a THEN is encountered in the branch, it propagates out to
+ the enclosing alternative (unless nested in a deeper set of alternatives,
+ of course). */
if (condition || *ecode == OP_ALT)
{
- if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;
+ if (op != OP_SCOND)
+ {
+ ecode += 1 + LINK_SIZE;
+ goto TAIL_RECURSE;
+ }
+
+ md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
- if (rrc == MATCH_THEN && md->start_match_ptr == ecode)
- rrc = MATCH_NOMATCH;
RRETURN(rrc);
}
- else /* Condition false & no alternative */
+
+ /* Condition false & no alternative; continue after the group. */
+
+ else
{
ecode += 1 + LINK_SIZE;
}
@@ -1369,9 +1530,11 @@ for (;;)
markptr = md->mark;
break;
}
- if (rrc != MATCH_NOMATCH &&
- (rrc != MATCH_THEN || md->start_match_ptr != ecode))
- RRETURN(rrc);
+
+ /* PCRE does not allow THEN to escape beyond an assertion; it is treated
+ as NOMATCH. */
+
+ if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
ecode += GET(ecode, 1);
}
while (*ecode == OP_ALT);
@@ -1412,9 +1575,11 @@ for (;;)
do ecode += GET(ecode,1); while (*ecode == OP_ALT);
break;
}
- if (rrc != MATCH_NOMATCH &&
- (rrc != MATCH_THEN || md->start_match_ptr != ecode))
- RRETURN(rrc);
+
+ /* PCRE does not allow THEN to escape beyond an assertion; it is treated
+ as NOMATCH. */
+
+ if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
ecode += GET(ecode,1);
}
while (*ecode == OP_ALT);
@@ -1556,10 +1721,10 @@ for (;;)
md, eptrb, RM6);
memcpy(md->offset_vector, new_recursive.offset_save,
new_recursive.saved_max * sizeof(int));
+ md->recursive = new_recursive.prevrec;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{
DPRINTF(("Recursion matched\n"));
- md->recursive = new_recursive.prevrec;
if (new_recursive.offset_save != stacksave)
(pcre_free)(new_recursive.offset_save);
@@ -1571,8 +1736,11 @@ for (;;)
mstart = md->start_match_ptr;
goto RECURSION_MATCHED; /* Exit loop; end processing */
}
- else if (rrc != MATCH_NOMATCH &&
- (rrc != MATCH_THEN || md->start_match_ptr != ecode))
+
+ /* PCRE does not allow THEN to escape beyond a recursion; it is treated
+ as NOMATCH. */
+
+ else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
{
DPRINTF(("Recursion gave error %d\n", rrc));
if (new_recursive.offset_save != stacksave)
@@ -1658,15 +1826,15 @@ for (;;)
}
else saved_eptr = NULL;
- /* If we are at the end of an assertion group, stop matching and return
- MATCH_MATCH, but record the current high water mark for use by positive
- assertions. We also need to record the match start in case it was changed
- by \K. */
+ /* If we are at the end of an assertion group or a non-capturing atomic
+ group, stop matching and return MATCH_MATCH, but record the current high
+ water mark for use by positive assertions. We also need to record the match
+ start in case it was changed by \K. */
- if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
- *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT)
+ if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
+ *prev == OP_ONCE_NC)
{
- md->end_match_ptr = eptr; /* For ONCE */
+ md->end_match_ptr = eptr; /* For ONCE_NC */
md->end_offset_top = offset_top;
md->start_match_ptr = mstart;
MRRETURN(MATCH_MATCH); /* Sets md->mark */
@@ -1734,11 +1902,11 @@ for (;;)
/* For an ordinary non-repeating ket, just continue at this level. This
also happens for a repeating ket if no characters were matched in the
group. This is the forcible breaking of infinite loops as implemented in
- Perl 5.005. For a non-repeating atomic group, establish a backup point by
- processing the rest of the pattern at a lower level. If this results in a
- NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby
- bypassing intermediate backup points, but resetting any captures that
- happened along the way. */
+ Perl 5.005. For a non-repeating atomic group that includes captures,
+ establish a backup point by processing the rest of the pattern at a lower
+ level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
+ original OP_ONCE level, thereby bypassing intermediate backup points, but
+ resetting any captures that happened along the way. */
if (*ecode == OP_KET || eptr == saved_eptr)
{
@@ -5659,7 +5827,8 @@ switch (frame->Xwhere)
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
- LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63)
+ LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
+ LBL(65) LBL(66)
#ifdef SUPPORT_UTF8
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
LBL(32) LBL(34) LBL(42) LBL(46)
@@ -5761,7 +5930,7 @@ pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
{
-int rc, ocount;
+int rc, ocount, arg_offset_max;
int first_byte = -1;
int req_byte = -1;
int req_byte2 = -1;
@@ -5797,8 +5966,60 @@ if (re == NULL || subject == NULL ||
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
-/* This information is for finding all the numbers associated with a given
-name, for condition testing. */
+/* These two settings are used in the code for checking a UTF-8 string that
+follows immediately afterwards. Other values in the md block are used only
+during "normal" pcre_exec() processing, not when the JIT support is in use,
+so they are set up later. */
+
+utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
+md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
+ ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
+
+/* Check a UTF-8 string if required. Pass back the character offset and error
+code for an invalid string if a results vector is available. */
+
+#ifdef SUPPORT_UTF8
+if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
+ {
+ int erroroffset;
+ int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
+ if (errorcode != 0)
+ {
+ if (offsetcount >= 2)
+ {
+ offsets[0] = erroroffset;
+ offsets[1] = errorcode;
+ }
+ return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
+ PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+ }
+
+ /* Check that a start_offset points to the start of a UTF-8 character. */
+ if (start_offset > 0 && start_offset < length &&
+ (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
+ return PCRE_ERROR_BADUTF8_OFFSET;
+ }
+#endif
+
+/* If the pattern was successfully studied with JIT support, run the JIT
+executable instead of the rest of this function. Most options must be set at
+compile time for the JIT code to be usable. Fallback to the normal code path if
+an unsupported flag is set. In particular, JIT does not support partial
+matching. */
+
+#ifdef SUPPORT_JIT
+if (extra_data != NULL
+ && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
+ && extra_data->executable_jit != NULL
+ && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
+ PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
+ return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
+ start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
+ ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
+#endif
+
+/* Carry on with non-JIT matching. This information is for finding all the
+numbers associated with a given name, for condition testing. */
md->name_table = (uschar *)re + re->name_table_offset;
md->name_count = re->name_count;
@@ -5865,7 +6086,6 @@ md->end_subject = md->start_subject + length;
end_subject = md->end_subject;
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
-utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
md->use_ucp = (re->options & PCRE_UCP) != 0;
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
@@ -5876,14 +6096,12 @@ md->notbol = (options & PCRE_NOTBOL) != 0;
md->noteol = (options & PCRE_NOTEOL) != 0;
md->notempty = (options & PCRE_NOTEMPTY) != 0;
md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
-md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
- ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
-
md->hitend = FALSE;
md->mark = NULL; /* In case never set */
md->recursive = NULL; /* No recursion at top level */
+md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
md->lcc = tables + lcc_offset;
md->ctypes = tables + ctypes_offset;
@@ -5961,39 +6179,13 @@ defined (though never set). So there's no harm in leaving this code. */
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
return PCRE_ERROR_BADPARTIAL;
-/* Check a UTF-8 string if required. Pass back the character offset and error
-code for an invalid string if a results vector is available. */
-
-#ifdef SUPPORT_UTF8
-if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
- {
- int erroroffset;
- int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
- if (errorcode != 0)
- {
- if (offsetcount >= 2)
- {
- offsets[0] = erroroffset;
- offsets[1] = errorcode;
- }
- return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
- PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
- }
-
- /* Check that a start_offset points to the start of a UTF-8 character. */
-
- if (start_offset > 0 && start_offset < length &&
- (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
- return PCRE_ERROR_BADUTF8_OFFSET;
- }
-#endif
-
/* If the expression has got more back references than the offsets supplied can
hold, we get a temporary chunk of working store to use during the matching.
Otherwise, we can use the vector supplied, rounding down its size to a multiple
of 3. */
ocount = offsetcount - (offsetcount % 3);
+arg_offset_max = (2*ocount)/3;
if (re->top_backref > 0 && re->top_backref >= ocount/3)
{
@@ -6173,7 +6365,7 @@ for(;;)
/* The following two optimizations are disabled for partial matching or if
disabling is explicitly requested. */
- if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
+ if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
{
/* If the pattern was studied, a minimum subject length may be set. This is
a lower bound; no actual string of that length may actually match the
@@ -6368,21 +6560,22 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
{
if (using_temporary_offsets)
{
- if (offsetcount >= 4)
+ if (arg_offset_max >= 4)
{
memcpy(offsets + 2, md->offset_vector + 2,
- (offsetcount - 2) * sizeof(int));
+ (arg_offset_max - 2) * sizeof(int));
DPRINTF(("Copied offsets from temporary memory\n"));
}
- if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
+ if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
DPRINTF(("Freeing temporary memory\n"));
(pcre_free)(md->offset_vector);
}
- /* Set the return code to the number of captured strings, or 0 if there are
+ /* Set the return code to the number of captured strings, or 0 if there were
too many to fit into the vector. */
- rc = md->offset_overflow? 0 : md->end_offset_top/2;
+ rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
+ 0 : md->end_offset_top/2;
/* If there is space in the offset vector, set any unused pairs at the end of
the pattern to -1 for backwards compatibility. It is documented that this
diff --git a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
index e25fc50623e..b08067de738 100644
--- a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
+++ b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2009 University of Cambridge
+ Copyright (c) 1997-2011 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -129,6 +129,12 @@ switch (what)
(int)study->minlength : -1;
break;
+ case PCRE_INFO_JIT:
+ *((int *)where) = extra_data != NULL &&
+ (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
+ extra_data->executable_jit != NULL;
+ break;
+
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
diff --git a/usr.sbin/nginx/src/pcre/pcre_internal.h b/usr.sbin/nginx/src/pcre/pcre_internal.h
index 7f35828176e..faf1b766ae4 100644
--- a/usr.sbin/nginx/src/pcre/pcre_internal.h
+++ b/usr.sbin/nginx/src/pcre/pcre_internal.h
@@ -594,6 +594,7 @@ compatibility. */
#define PCRE_STARTLINE 0x0008 /* start after \n for multiline */
#define PCRE_JCHANGED 0x0010 /* j option used in regex */
#define PCRE_HASCRORLF 0x0020 /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN 0x0040 /* pattern contains (*THEN) */
/* Flags for the "extra" block produced by pcre_study(). */
@@ -624,7 +625,8 @@ time, run time, or study time, respectively. */
PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
PCRE_NO_START_OPTIMIZE)
-#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
+#define PUBLIC_STUDY_OPTIONS \
+ PCRE_STUDY_JIT_COMPILE
/* Magic number to provide a small check against being handed junk. Also used
to detect whether a pattern was compiled on a host of different endianness. */
@@ -1454,60 +1456,61 @@ enum {
OP_ASSERTBACK, /* 121 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */
- /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the
- assertions, with ONCE first, as there's a test for >= ONCE for a subpattern
- that isn't an assertion. The POS versions must immediately follow the non-POS
- versions in each case. */
+ /* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
+ after the assertions, with ONCE first, as there's a test for >= ONCE for a
+ subpattern that isn't an assertion. The POS versions must immediately follow
+ the non-POS versions in each case. */
- OP_ONCE, /* 123 Atomic group */
- OP_BRA, /* 124 Start of non-capturing bracket */
- OP_BRAPOS, /* 125 Ditto, with unlimited, possessive repeat */
- OP_CBRA, /* 126 Start of capturing bracket */
- OP_CBRAPOS, /* 127 Ditto, with unlimited, possessive repeat */
- OP_COND, /* 128 Conditional group */
+ OP_ONCE, /* 123 Atomic group, contains captures */
+ OP_ONCE_NC, /* 124 Atomic group containing no captures */
+ OP_BRA, /* 125 Start of non-capturing bracket */
+ OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */
+ OP_CBRA, /* 127 Start of capturing bracket */
+ OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */
+ OP_COND, /* 129 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
- OP_SBRA, /* 129 Start of non-capturing bracket, check empty */
- OP_SBRAPOS, /* 130 Ditto, with unlimited, possessive repeat */
- OP_SCBRA, /* 131 Start of capturing bracket, check empty */
- OP_SCBRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
- OP_SCOND, /* 133 Conditional group, check empty */
+ OP_SBRA, /* 130 Start of non-capturing bracket, check empty */
+ OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */
+ OP_SCBRA, /* 132 Start of capturing bracket, check empty */
+ OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
+ OP_SCOND, /* 134 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
- OP_CREF, /* 134 Used to hold a capture number as condition */
- OP_NCREF, /* 135 Same, but generated by a name reference*/
- OP_RREF, /* 136 Used to hold a recursion number as condition */
- OP_NRREF, /* 137 Same, but generated by a name reference*/
- OP_DEF, /* 138 The DEFINE condition */
+ OP_CREF, /* 135 Used to hold a capture number as condition */
+ OP_NCREF, /* 136 Same, but generated by a name reference*/
+ OP_RREF, /* 137 Used to hold a recursion number as condition */
+ OP_NRREF, /* 138 Same, but generated by a name reference*/
+ OP_DEF, /* 139 The DEFINE condition */
- OP_BRAZERO, /* 139 These two must remain together and in this */
- OP_BRAMINZERO, /* 140 order. */
- OP_BRAPOSZERO, /* 141 */
+ OP_BRAZERO, /* 140 These two must remain together and in this */
+ OP_BRAMINZERO, /* 141 order. */
+ OP_BRAPOSZERO, /* 142 */
/* These are backtracking control verbs */
- OP_MARK, /* 142 always has an argument */
- OP_PRUNE, /* 143 */
- OP_PRUNE_ARG, /* 144 same, but with argument */
- OP_SKIP, /* 145 */
- OP_SKIP_ARG, /* 146 same, but with argument */
- OP_THEN, /* 147 */
- OP_THEN_ARG, /* 148 same, but with argument */
- OP_COMMIT, /* 149 */
+ OP_MARK, /* 143 always has an argument */
+ OP_PRUNE, /* 144 */
+ OP_PRUNE_ARG, /* 145 same, but with argument */
+ OP_SKIP, /* 146 */
+ OP_SKIP_ARG, /* 147 same, but with argument */
+ OP_THEN, /* 148 */
+ OP_THEN_ARG, /* 149 same, but with argument */
+ OP_COMMIT, /* 150 */
/* These are forced failure and success verbs */
- OP_FAIL, /* 150 */
- OP_ACCEPT, /* 151 */
- OP_ASSERT_ACCEPT, /* 152 Used inside assertions */
- OP_CLOSE, /* 153 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 151 */
+ OP_ACCEPT, /* 152 */
+ OP_ASSERT_ACCEPT, /* 153 Used inside assertions */
+ OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 154 */
+ OP_SKIPZERO, /* 155 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -1551,7 +1554,7 @@ some cases doesn't actually use these names at all). */
"Recurse", "Callout", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
- "Once", \
+ "Once", "Once_NC", \
"Bra", "BraPos", "CBra", "CBraPos", \
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
@@ -1625,6 +1628,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE, /* Assert behind */ \
1+LINK_SIZE, /* Assert behind not */ \
1+LINK_SIZE, /* ONCE */ \
+ 1+LINK_SIZE, /* ONCE_NC */ \
1+LINK_SIZE, /* BRA */ \
1+LINK_SIZE, /* BRAPOS */ \
3+LINK_SIZE, /* CBRA */ \
@@ -1641,7 +1645,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
1, 3, /* SKIP, SKIP_ARG */ \
- 1+LINK_SIZE, 3+LINK_SIZE, /* THEN, THEN_ARG */ \
+ 1, 3, /* THEN, THEN_ARG */ \
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
3, 1 /* CLOSE, SKIPZERO */
@@ -1819,6 +1823,7 @@ typedef struct match_data {
BOOL notempty_atstart; /* Empty string match at start not wanted */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
+ BOOL hasthen; /* Pattern contains (*THEN) */
const uschar *start_code; /* For use when recursing */
USPTR start_subject; /* Start of the subject string */
USPTR end_subject; /* End of the subject string */
@@ -1912,6 +1917,10 @@ extern const int _pcre_utf8_table2[];
extern const int _pcre_utf8_table3[];
extern const uschar _pcre_utf8_table4[];
+#ifdef SUPPORT_JIT
+extern const uschar _pcre_utf8_char_sizes[];
+#endif
+
extern const int _pcre_utf8_table1_size;
extern const char _pcre_utt_names[];
@@ -1936,6 +1945,12 @@ extern int _pcre_valid_utf8(USPTR, int, int *);
extern BOOL _pcre_was_newline(USPTR, int, USPTR, int *, BOOL);
extern BOOL _pcre_xclass(int, const uschar *);
+#ifdef SUPPORT_JIT
+extern void _pcre_jit_compile(const real_pcre *, pcre_extra *);
+extern int _pcre_jit_exec(const real_pcre *, void *, PCRE_SPTR,
+ int, int, int, int, int *, int);
+extern void _pcre_jit_free(void *);
+#endif
/* Unicode character database (UCD) */
@@ -1949,7 +1964,9 @@ extern const ucd_record _pcre_ucd_records[];
extern const uschar _pcre_ucd_stage1[];
extern const pcre_uint16 _pcre_ucd_stage2[];
extern const int _pcre_ucp_gentype[];
-
+#ifdef SUPPORT_JIT
+extern const int _pcre_ucp_typerange[];
+#endif
/* UCD access macros */
diff --git a/usr.sbin/nginx/src/pcre/pcre_tables.c b/usr.sbin/nginx/src/pcre/pcre_tables.c
index e3e6dc192f5..45c221181ac 100644
--- a/usr.sbin/nginx/src/pcre/pcre_tables.c
+++ b/usr.sbin/nginx/src/pcre/pcre_tables.c
@@ -87,6 +87,19 @@ const uschar _pcre_utf8_table4[] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+#ifdef SUPPORT_JIT
+/* Full table of the number of extra bytes when the
+character code is greater or equal than 0xc0.
+See _pcre_utf8_table4 above. */
+
+const uschar _pcre_utf8_char_sizes[] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,
+};
+#endif
+
/* Table to translate from particular type value to the general value. */
const int _pcre_ucp_gentype[] = {
@@ -100,6 +113,21 @@ const int _pcre_ucp_gentype[] = {
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
+#ifdef SUPPORT_JIT
+/* This table reverses _pcre_ucp_gentype. We can save the cost
+of a memory load. */
+
+const int _pcre_ucp_typerange[] = {
+ ucp_Cc, ucp_Cs,
+ ucp_Ll, ucp_Lu,
+ ucp_Mc, ucp_Mn,
+ ucp_Nd, ucp_No,
+ ucp_Pc, ucp_Ps,
+ ucp_Sc, ucp_So,
+ ucp_Zl, ucp_Zs,
+};
+#endif
+
/* The pcre_utt[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first