Projects
openEuler:24.03:SP1:Everything
pcre2
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 5
View file
_service:tar_scm:pcre2.spec
Changed
@@ -1,6 +1,6 @@ Name: pcre2 Version: 10.42 -Release: 9 +Release: 10 Summary: Perl Compatible Regular Expressions License: BSD URL: http://www.pcre.org/ @@ -31,6 +31,8 @@ Patch6020: backport-Fixing-an-issue-using-empty-character-sets-in-jit.patch Patch6021: backport-pcre2grep-document-better-possible-multiline-matchin.patch Patch6022: backport-Remove-incorrect-optimization-in-DFA-matching-when-p.patch +Patch6023: backport-Implement-PCRE2_EXTRA_CASELESS_RESTRICT-and-related-.patch +Patch6024: backport-Additional-PCRE2_EXTRA_ASCII_xxx-code.patch BuildRequires: autoconf libtool automake coreutils gcc make readline-devel Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools @@ -148,6 +150,9 @@ %{_pkgdocdir}/html/ %changelog +* Thu Oct 31 2024 xujing <xujing125@huawei.com> - 10.42-10 +- DESC:sync patches to fix grep testcase failed + * Tue Sep 03 2024 dongyuzhen <dongyuzhen@h-partners.com> - 10.42-9 - DESC:Remove incorrect optimization in DFA matching when partial matching and (*F) are involved
View file
_service:tar_scm:backport-Additional-PCRE2_EXTRA_ASCII_xxx-code.patch
Added
@@ -0,0 +1,1489 @@ +From a6089462a460a9f6c2db63a86e1c09fabaa81499 Mon Sep 17 00:00:00 2001 +From: Philip Hazel <Philip.Hazel@gmail.com> +Date: Wed, 1 Feb 2023 17:42:29 +0000 +Subject: PATCH Additional PCRE2_EXTRA_ASCII_xxx code + +Conflict:NA +Reference:https://github.com/PCRE2Project/pcre2/commit/a6089462a460a9f6c2db63a86e1c09fabaa81499 + +--- + src/pcre2.h.in | 4 + + src/pcre2_compile.c | 375 ++++++++++++++++++++++++++----------------- + src/pcre2test.c | 21 ++- + testdata/testinput5 | 133 +++++++++++++++ + testdata/testinput7 | 133 +++++++++++++++ + testdata/testoutput5 | 179 +++++++++++++++++++++ + testdata/testoutput7 | 179 +++++++++++++++++++++ + 7 files changed, 869 insertions(+), 155 deletions(-) + +diff --git a/src/pcre2.h.in b/src/pcre2.h.in +index 11419a38..7202c633 100644 +--- a/src/pcre2.h.in ++++ b/src/pcre2.h.in +@@ -154,6 +154,10 @@ D is inspected during pcre2_dfa_match() execution + #define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ + #define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */ + #define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */ ++#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */ ++#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */ ++#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ ++#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ + + /* These are for pcre2_jit_compile(). */ + +diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c +index ed2fe8a7..b8a9e098 100644 +--- a/src/pcre2_compile.c ++++ b/src/pcre2_compile.c +@@ -123,7 +123,7 @@ static unsigned int + #endif + + static int +- compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *, ++ compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *, + uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, + compile_block *, PCRE2_SIZE *); + +@@ -694,8 +694,8 @@ static uint32_t chartypeoffset = { + now all in a single string, to reduce the number of relocations when a shared + library is dynamically loaded. The list of lengths is terminated by a zero + length entry. The first three must be alpha, lower, upper, as this is assumed +-for handling case independence. The indices for graph, print, and punct are +-needed, so identify them. */ ++for handling case independence. The indices for several classes are needed, so ++identify them. */ + + static const char posix_names = + STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 +@@ -785,7 +785,8 @@ are allowed. */ + (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \ + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \ + PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \ +- PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) ++ PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \ ++ PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX) + + /* Compile time error code numbers. They are given names so that they can more + easily be tracked. When a new number is added, the tables called eint1 and +@@ -1059,9 +1060,9 @@ for (;;) + case META_SKIP: fprintf(stderr, "META (*SKIP)"); break; + case META_THEN: fprintf(stderr, "META (*THEN)"); break; + +- case META_OPTIONS: +- fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr0, pptr1); +- pptr += 2; ++ case META_OPTIONS: ++ fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr0, pptr1); ++ pptr += 2; + break; + + case META_LOOKBEHIND: +@@ -1494,7 +1495,7 @@ Arguments: + chptr points to a returned data character + errorcodeptr points to the errorcode variable (containing zero) + options the current options bits +- xoptions the current extra options bits ++ xoptions the current extra options bits + isclass TRUE if inside a character class + cb compile data block or NULL when called from pcre2_substitute() + +@@ -2536,6 +2537,85 @@ return parsed_pattern; + + + ++/************************************************* ++* Handle \d, \D, \s, \S, \w, \W * ++*************************************************/ ++ ++/* This function is called from parse_regex() below, both for freestanding ++escapes, and those within classes, to handle those escapes that may change when ++Unicode property support is requested. Note that PCRE2_UCP will never be set ++without Unicode support because that is checked when pcre2_compile() is called. ++ ++Arguments: ++ escape the ESC_... value ++ parsed_pattern where to add the code ++ options options bits ++ xoptions extra options bits ++ ++Returns: updated value of parsed_pattern ++*/ ++static uint32_t * ++handle_escdsw(int escape, uint32_t *parsed_pattern, uint32_t options, ++ uint32_t xoptions) ++{ ++uint32_t ascii_option = 0; ++uint32_t prop = ESC_p; ++ ++switch(escape) ++ { ++ case ESC_D: ++ prop = ESC_P; ++ /* Fall through */ ++ case ESC_d: ++ ascii_option = PCRE2_EXTRA_ASCII_BSD; ++ break; ++ ++ case ESC_S: ++ prop = ESC_P; ++ /* Fall through */ ++ case ESC_s: ++ ascii_option = PCRE2_EXTRA_ASCII_BSS; ++ break; ++ ++ case ESC_W: ++ prop = ESC_P; ++ /* Fall through */ ++ case ESC_w: ++ ascii_option = PCRE2_EXTRA_ASCII_BSW; ++ break; ++ } ++ ++if ((options & PCRE2_UCP) == 0 || (xoptions & ascii_option) != 0) ++ { ++ *parsed_pattern++ = META_ESCAPE + escape; ++ } ++else ++ { ++ *parsed_pattern++ = META_ESCAPE + prop; ++ switch(escape) ++ { ++ case ESC_d: ++ case ESC_D: ++ *parsed_pattern++ = (PT_PC << 16) | ucp_Nd; ++ break; ++ ++ case ESC_s: ++ case ESC_S: ++ *parsed_pattern++ = PT_SPACE << 16; ++ break; ++ ++ case ESC_w: ++ case ESC_W: ++ *parsed_pattern++ = PT_WORD << 16; ++ break; ++ } ++ } ++ ++return parsed_pattern; ++} ++ ++ ++ + /************************************************* + * Parse regex and identify named groups * + *************************************************/ +@@ -2564,7 +2644,7 @@ typedef struct nest_save { + uint16_t max_group; + uint16_t flags; + uint32_t options; +- uint32_t xoptions; ++ uint32_t xoptions; + } nest_save; + + #define NSF_RESET 0x0001u +@@ -2579,8 +2659,11 @@ the main compiling phase. */ + #define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \ + PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ + PCRE2_UNGREEDY) +- +-#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT) ++ ++#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT) ++ ++#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT| \ ++ PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW) + + /* States used for analyzing ranges in character classes. The two OK values + must be last. */ +@@ -3115,9 +3198,7 @@ while (ptr < ptrend) + *parsed_pattern++ = META_ESCAPE + escape; + break; + +- /* Escapes that change in UCP mode. Note that PCRE2_UCP will never be set +- without Unicode support because it is checked when pcre2_compile() is +- called. */ ++ /* Escapes that may change in UCP mode. */ + + case ESC_d: + case ESC_D: +@@ -3126,33 +3207,8 @@ while (ptr < ptrend) + case ESC_w: + case ESC_W: + okquantifier = TRUE; +- if ((options & PCRE2_UCP) == 0) +- { +- *parsed_pattern++ = META_ESCAPE + escape; +- } +- else +- { +- *parsed_pattern++ = META_ESCAPE + +- ((escape == ESC_d || escape == ESC_s || escape == ESC_w)? +- ESC_p : ESC_P); +- switch(escape) +- { +- case ESC_d: +- case ESC_D: +- *parsed_pattern++ = (PT_PC << 16) | ucp_Nd; +- break; +- +- case ESC_s: +- case ESC_S: +- *parsed_pattern++ = PT_SPACE << 16; +- break; +- +- case ESC_w: +- case ESC_W: +- *parsed_pattern++ = PT_WORD << 16; +- break; +- } +- } ++ parsed_pattern = handle_escdsw(escape, parsed_pattern, options, ++ xoptions); + break; + + /* Unicode property matching */ +@@ -3515,18 +3571,22 @@ while (ptr < ptrend) + + class_range_state = RANGE_NO; + +- /* When PCRE2_UCP is set, some of the POSIX classes are converted to +- use Unicode properties \p or \P or, in one case, \h or \H. The +- substitutes table has two values per class, containing the type and +- value of a \p or \P item. The special cases are specified with a +- negative type: a non-zero value causes \h or \H to be used, and a zero +- value falls through to behave like a non-UCP POSIX class. */ ++ /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some ++ of the POSIX classes are converted to use Unicode properties \p or \P ++ or, in one case, \h or \H. The substitutes table has two values per ++ class, containing the type and value of a \p or \P item. The special ++ cases are specified with a negative type: a non-zero value causes \h or ++ \H to be used, and a zero value falls through to behave like a non-UCP ++ POSIX class. There are now also some extra options that force ASCII for ++ some classes. */ + + #ifdef SUPPORT_UNICODE +- if ((options & PCRE2_UCP) != 0) ++ if ((options & PCRE2_UCP) != 0 && ++ (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) + { + int ptype = posix_substitutes2*posix_class; + int pvalue = posix_substitutes2*posix_class + 1; ++ + if (ptype >= 0) + { + *parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_P : ESC_p); +@@ -3664,7 +3724,7 @@ while (ptr < ptrend) + *parsed_pattern++ = META_ESCAPE + escape; + break; + +- /* These escapes are converted to Unicode property tests when ++ /* These escapes may be converted to Unicode property tests when + PCRE2_UCP is set. */ + + case ESC_d: +@@ -3673,33 +3733,8 @@ while (ptr < ptrend) + case ESC_S: + case ESC_w: + case ESC_W: +- if ((options & PCRE2_UCP) == 0) +- { +- *parsed_pattern++ = META_ESCAPE + escape; +- } +- else +- { +- *parsed_pattern++ = META_ESCAPE + +- ((escape == ESC_d || escape == ESC_s || escape == ESC_w)? +- ESC_p : ESC_P); +- switch(escape) +- { +- case ESC_d: +- case ESC_D: +- *parsed_pattern++ = (PT_PC << 16) | ucp_Nd; +- break; +- +- case ESC_s: +- case ESC_S: +- *parsed_pattern++ = PT_SPACE << 16; +- break; +- +- case ESC_w: +- case ESC_W: +- *parsed_pattern++ = PT_WORD << 16; +- break; +- } +- } ++ parsed_pattern = handle_escdsw(escape, parsed_pattern, options, ++ xoptions); + break; + + /* Explicit Unicode property matching */ +@@ -4052,7 +4087,7 @@ while (ptr < ptrend) + { + BOOL hyphenok = TRUE; + uint32_t oldoptions = options; +- uint32_t oldxoptions = xoptions; ++ uint32_t oldxoptions = xoptions; + + top_nest->reset_group = 0; + top_nest->max_group = 0; +@@ -4067,7 +4102,7 @@ while (ptr < ptrend) + { + options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| + PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE); +- xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT); ++ xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT); + hyphenok = FALSE; + ptr++; + } +@@ -4085,10 +4120,44 @@ while (ptr < ptrend) + goto FAILED; + } + optset = &unset; +- xoptset = &xunset; ++ xoptset = &xunset; + hyphenok = FALSE; + break; + ++ /* There are some two-character sequences that start with 'a'. */ ++ ++ case CHAR_a: ++ if (ptr < ptrend) ++ { ++ if (*ptr == CHAR_D) ++ { ++ *xoptset |= PCRE2_EXTRA_ASCII_BSD; ++ ptr++; ++ break; ++ } ++ if (*ptr == CHAR_P) ++ { ++ *xoptset |= PCRE2_EXTRA_ASCII_POSIX; ++ ptr++; ++ break; ++ } ++ if (*ptr == CHAR_S) ++ { ++ *xoptset |= PCRE2_EXTRA_ASCII_BSS; ++ ptr++; ++ break; ++ } ++ if (*ptr == CHAR_W) ++ { ++ *xoptset |= PCRE2_EXTRA_ASCII_BSW; ++ ptr++; ++ break; ++ } ++ } ++ *xoptset |= PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| ++ PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX; ++ break; ++ + case CHAR_J: /* Record that it changed in the external options */ + *optset |= PCRE2_DUPNAMES; + cb->external_flags |= PCRE2_JCHANGED; +@@ -4097,7 +4166,7 @@ while (ptr < ptrend) + case CHAR_i: *optset |= PCRE2_CASELESS; break; + case CHAR_m: *optset |= PCRE2_MULTILINE; break; + case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break; +- case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break; ++ case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break; + case CHAR_s: *optset |= PCRE2_DOTALL; break; + case CHAR_U: *optset |= PCRE2_UNGREEDY; break; + +@@ -4757,7 +4826,7 @@ while (ptr < ptrend) + if (top_nest != NULL && top_nest->nest_depth == nest_depth) + { + options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options; +- xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions; ++ xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions; + if ((top_nest->flags & NSF_RESET) != 0 && + top_nest->max_group > cb->bracount) + cb->bracount = top_nest->max_group; +@@ -5019,7 +5088,7 @@ Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits +- xoptions the extra options bits ++ xoptions the extra options bits + cb compile data + start start of range character + end end of range character +@@ -5030,7 +5099,7 @@ Returns: the number of < 256 characters added + + static unsigned int + add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, +- uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start, ++ uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start, + uint32_t end) + { + uint32_t c; +@@ -5039,7 +5108,7 @@ unsigned int n8 = 0; + + /* If caseless matching is required, scan the range and process alternate + cases. In Unicode, there are 8-bit characters that have alternate cases that +-are greater than 255 and vice-versa (though these may be ignored if caseless ++are greater than 255 and vice-versa (though these may be ignored if caseless + restriction is in force). Sometimes we can just extend the original range. */ + + if ((options & PCRE2_CASELESS) != 0) +@@ -5053,17 +5122,17 @@ if ((options & PCRE2_CASELESS) != 0) + options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ + c = start; + +- while ((rc = get_othercase_range(&c, end, &oc, &od, ++ while ((rc = get_othercase_range(&c, end, &oc, &od, + (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0) + { + /* Handle a single character that has more than one other case. */ + +- if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, ++ if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, + options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc); + + /* Do nothing if the other case range is within the original range. */ + +- else if (oc >= cb->class_range_start && od <= cb->class_range_end) ++ else if (oc >= cb->class_range_start && od <= cb->class_range_end) + continue; + + /* Extend the original range if there is overlap, noting that if oc < c, +@@ -5178,7 +5247,7 @@ Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits +- xoptions the extra options bits ++ xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + except character to omit; this is used when adding lists of +@@ -5191,7 +5260,7 @@ Returns: the number of < 256 characters added + + static unsigned int + add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, +- uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p, ++ uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p, + unsigned int except) + { + unsigned int n8 = 0; +@@ -5201,7 +5270,7 @@ while (p0 < NOTACHAR) + if (p0 != except) + { + while(pn+1 == p0 + n + 1) n++; +- n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, ++ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, + p0, pn); + } + p += n + 1; +@@ -5223,7 +5292,7 @@ Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits +- xoptions the extra options bits ++ xoptions the extra options bits + cb compile data + start start of range character + end end of range character +@@ -5238,7 +5307,7 @@ add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, + { + cb->class_range_start = start; + cb->class_range_end = end; +-return add_to_class_internal(classbits, uchardptr, options, xoptions, cb, ++return add_to_class_internal(classbits, uchardptr, options, xoptions, cb, + start, end); + } + +@@ -5257,7 +5326,7 @@ Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits +- xoptions the extra options bits ++ xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + except character to omit; this is used when adding lists of +@@ -5281,7 +5350,7 @@ while (p0 < NOTACHAR) + while(pn+1 == p0 + n + 1) n++; + cb->class_range_start = p0; + cb->class_range_end = pn; +- n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, ++ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, + p0, pn); + } + p += n + 1; +@@ -5302,7 +5371,7 @@ Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits +- xoptions the extra options bits ++ xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + +@@ -5412,7 +5481,7 @@ real compile phase. The value of lengthptr distinguishes the two phases. + + Arguments: + optionsptr pointer to the option bits +- xoptionsptr pointer to the extra option bits ++ xoptionsptr pointer to the extra option bits + codeptr points to the pointer to the current code point + pptrptr points to the current parsed pattern pointer + errorcodeptr points to error code variable +@@ -5431,10 +5500,10 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero + */ + + static int +-compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr, +- PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr, +- uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, +- uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, ++compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr, ++ PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr, ++ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, ++ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, + PCRE2_SIZE *lengthptr) + { + int bravalue = 0; +@@ -5757,8 +5826,8 @@ for (;; pptr++) + uint32_t c = pptr1; + + #ifdef SUPPORT_UNICODE +- if (UCD_CASESET(c) == 0 || +- ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && ++ if (UCD_CASESET(c) == 0 || ++ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && + c < 128 && pptr2 < 128)) + #endif + { +@@ -5851,41 +5920,45 @@ for (;; pptr++) + XCL_PROP/XCL_NOTPROP directly, which is done here. */ + + #ifdef SUPPORT_UNICODE +- if ((options & PCRE2_UCP) != 0) switch(posix_class) ++ if ((options & PCRE2_UCP) != 0 && ++ (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) + { +- case PC_GRAPH: +- case PC_PRINT: +- case PC_PUNCT: +- *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; +- *class_uchardata++ = (PCRE2_UCHAR) +- ((posix_class == PC_GRAPH)? PT_PXGRAPH : +- (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT); +- *class_uchardata++ = 0; +- xclass_has_prop = TRUE; +- goto CONTINUE_CLASS; +- +- /* For the other POSIX classes (ascii, xdigit) we are going to +- fall through to the non-UCP case and build a bit map for +- characters with code points less than 256. However, if we are in +- a negated POSIX class, characters with code points greater than +- 255 must either all match or all not match, depending on whether +- the whole class is not or is negated. For example, for +- :^ascii:... they must all match, whereas for ^:^xdigit:... +- they must not. +- +- In the special case where there are no xclass items, this is +- automatically handled by the use of OP_CLASS or OP_NCLASS, but an +- explicit range is needed for OP_XCLASS. Setting a flag here +- causes the range to be generated later when it is known that +- OP_XCLASS is required. In the 8-bit library this is relevant only in +- utf mode, since no wide characters can exist otherwise. */ ++ switch(posix_class) ++ { ++ case PC_GRAPH: ++ case PC_PRINT: ++ case PC_PUNCT: ++ *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; ++ *class_uchardata++ = (PCRE2_UCHAR) ++ ((posix_class == PC_GRAPH)? PT_PXGRAPH : ++ (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT); ++ *class_uchardata++ = 0; ++ xclass_has_prop = TRUE; ++ goto CONTINUE_CLASS; ++ ++ /* For the other POSIX classes (ascii, xdigit) we are going to ++ fall through to the non-UCP case and build a bit map for ++ characters with code points less than 256. However, if we are in ++ a negated POSIX class, characters with code points greater than ++ 255 must either all match or all not match, depending on whether ++ the whole class is not or is negated. For example, for ++ :^ascii:... they must all match, whereas for ^:^xdigit:... ++ they must not. ++ ++ In the special case where there are no xclass items, this is ++ automatically handled by the use of OP_CLASS or OP_NCLASS, but an ++ explicit range is needed for OP_XCLASS. Setting a flag here ++ causes the range to be generated later when it is known that ++ OP_XCLASS is required. In the 8-bit library this is relevant only in ++ utf mode, since no wide characters can exist otherwise. */ + +- default: ++ default: + #if PCRE2_CODE_UNIT_WIDTH == 8 +- if (utf) ++ if (utf) + #endif +- match_all_or_no_wide_chars |= local_negate; +- break; ++ match_all_or_no_wide_chars |= local_negate; ++ break; ++ } + } + #endif /* SUPPORT_UNICODE */ + +@@ -6011,7 +6084,7 @@ for (;; pptr++) + + case ESC_h: + (void)add_list_to_class(classbits, &class_uchardata, +- options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list), ++ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list), + NOTACHAR); + break; + +@@ -6022,7 +6095,7 @@ for (;; pptr++) + + case ESC_v: + (void)add_list_to_class(classbits, &class_uchardata, +- options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list), ++ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list), + NOTACHAR); + break; + +@@ -6102,7 +6175,7 @@ for (;; pptr++) + if (C <= CHAR_i) + { + class_has_8bitchar += +- add_to_class(classbits, &class_uchardata, options, xoptions, ++ add_to_class(classbits, &class_uchardata, options, xoptions, + cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc); + C = CHAR_j; + } +@@ -6110,7 +6183,7 @@ for (;; pptr++) + if (C <= D && C <= CHAR_r) + { + class_has_8bitchar += +- add_to_class(classbits, &class_uchardata, options, xoptions, ++ add_to_class(classbits, &class_uchardata, options, xoptions, + cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc); + C = CHAR_s; + } +@@ -6118,7 +6191,7 @@ for (;; pptr++) + if (C <= D) + { + class_has_8bitchar += +- add_to_class(classbits, &class_uchardata, options, xoptions, ++ add_to_class(classbits, &class_uchardata, options, xoptions, + cb, C + uc, D + uc); + } + } +@@ -6126,7 +6199,7 @@ for (;; pptr++) + #endif + /* Not an EBCDIC special range */ + +- class_has_8bitchar += add_to_class(classbits, &class_uchardata, ++ class_has_8bitchar += add_to_class(classbits, &class_uchardata, + options, xoptions, cb, c, d); + goto CONTINUE_CLASS; /* Go get the next char in the class */ + } /* End of range handling */ +@@ -6135,7 +6208,7 @@ for (;; pptr++) + /* Handle a single character. */ + + class_has_8bitchar += +- add_to_class(classbits, &class_uchardata, options, xoptions, cb, ++ add_to_class(classbits, &class_uchardata, options, xoptions, cb, + meta, meta); + } + +@@ -6621,7 +6694,7 @@ for (;; pptr++) + if ((group_return = + compile_regex( + options, /* The options state */ +- xoptions, /* The extra options state */ ++ xoptions, /* The extra options state */ + &tempcode, /* Where to put code (updated) */ + &pptr, /* Input pointer (updated) */ + errorcodeptr, /* Where to put an error message */ +@@ -8020,7 +8093,7 @@ for (;; pptr++) + { + uint32_t caseset = UCD_CASESET(meta); + if (caseset != 0 && +- ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || ++ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || + PRIV(ucd_caseless_sets)caseset > 127)) + { + *code++ = OP_PROP; +@@ -8137,7 +8210,7 @@ the two phases. + + Arguments: + options option bits, including any changes for this subpattern +- xoptions extra option bits, ditto ++ xoptions extra option bits, ditto + codeptr -> the address of the current code pointer + pptrptr -> the address of the current parsed pattern pointer + errorcodeptr -> pointer to error code variable +@@ -8157,10 +8230,10 @@ Returns: 0 There has been an error + */ + + static int +-compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr, +- uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits, +- uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, +- uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, ++compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr, ++ uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits, ++ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, ++ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, + PCRE2_SIZE *lengthptr) + { + PCRE2_UCHAR *code = *codeptr; +@@ -8257,7 +8330,7 @@ for (;;) + into the length. */ + + if ((branch_return = +- compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr, ++ compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr, + &branchfirstcu, &branchfirstcuflags, &branchreqcu, &branchreqcuflags, + &bc, cb, (lengthptr == NULL)? NULL : &length)) == 0) + return 0; +@@ -10292,7 +10365,7 @@ code = cworkspace; + *code = OP_BRA; + + (void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr, +- &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, ++ &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, + &length); + + if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */ +@@ -10390,8 +10463,8 @@ of the function here. */ + pptr = cb.parsed_pattern; + code = (PCRE2_UCHAR *)codestart; + *code = OP_BRA; +-regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code, +- &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, ++regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code, ++ &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, + &cb, NULL); + if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY; + re->top_bracket = cb.bracount; +diff --git a/src/pcre2test.c b/src/pcre2test.c +index 169c6181..6bae5bb5 100644 +--- a/src/pcre2test.c ++++ b/src/pcre2test.c +@@ -628,6 +628,9 @@ typedef struct modstruct { + PCRE2_SIZE offset; + } modstruct; + ++#define PCRE2_EXTRA_ASCII_ALL (PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| \ ++ PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX) ++ + static modstruct modlist = { + { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) }, + { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) }, +@@ -642,6 +645,11 @@ static modstruct modlist = { + { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) }, + { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) }, + { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, ++ { "ascii_all", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_ALL, CO(extra_options) }, ++ { "ascii_bsd", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSD, CO(extra_options) }, ++ { "ascii_bss", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSS, CO(extra_options) }, ++ { "ascii_bsw", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSW, CO(extra_options) }, ++ { "ascii_posix", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_POSIX, CO(extra_options) }, + { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, + { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) }, + { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, +@@ -839,6 +847,7 @@ typedef struct c1modstruct { + static c1modstruct c1modlist = { + { "bincode", 'B', -1 }, + { "info", 'I', -1 }, ++ { "ascii_all", 'a', -1 }, + { "global", 'g', -1 }, + { "caseless", 'i', -1 }, + { "multiline", 'm', -1 }, +@@ -4283,15 +4292,19 @@ show_compile_extra_options(uint32_t options, const char *before, + const char *after) + { + if (options == 0) fprintf(outfile, "%s <none>%s", before, after); +-else fprintf(outfile, "%s%s%s%s%s%s%s%s%s", ++else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s", + before, + ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "", ++ ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "", ++ ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "", ++ ((options & PCRE2_EXTRA_ASCII_BSS) != 0)? " ascii_bss" : "", ++ ((options & PCRE2_EXTRA_ASCII_BSW) != 0)? " ascii_bsw" : "", ++ ((options & PCRE2_EXTRA_ASCII_POSIX) != 0)? " ascii_posix" : "", + ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "", +- ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "", ++ ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "", ++ ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "", + ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "", + ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "", +- ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "", +- ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "", + after); + } + +diff --git a/testdata/testinput5 b/testdata/testinput5 +index b8174230..6e186cf0 100644 +--- a/testdata/testinput5 ++++ b/testdata/testinput5 +@@ -2309,4 +2309,137 @@ + + # End caseless restrict tests + ++# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. ++ ++# DIGITS ++ ++/\d+/i,utf ++ 123\x{660}456 ++ ++/\d+/i,utf,ucp ++ 123\x{660}456 ++ ++/\d+/i,utf,ucp,ascii_bsd ++ 123\x{660}456 ++ ++/\d+/i,utf ++ 123\x{660}456 ++ ++/\d+/i,utf,ucp ++ 123\x{660}456 ++ ++/\d+/i,utf,ucp,ascii_bsd ++ 123\x{660}456 ++ ++/\d(?aD)\d(?-aD)\d/utf,ucp ++ \x{660}9\x{660} ++\= Expect no match ++ \x{660}\x{660}\x{660} ++ ++/\d(?a)\d(?-a)\d/utf,ucp ++ \x{660}9\x{660} ++\= Expect no match ++ \x{660}\x{660}\x{660} ++ ++# SPACES ++ ++/>\s+</i,utf ++ > < ++\= Expect no match ++ >\x{a0} < ++ ++/>\s+</i,utf,ucp ++ > < ++ >\x{a0} < ++ ++/>\s+</i,utf,ucp,ascii_bss ++ > < ++\= Expect no match ++ >\x{a0} < ++ ++/>\s+</i,utf ++ > < ++\= Expect no match ++ >\x{a0} < ++ ++/>\s+</i,utf,ucp ++ > < ++ >\x{a0} < ++ ++/>\s+</i,utf,ucp,ascii_bss ++ > < ++\= Expect no match ++ >\x{a0} < ++ ++/>\s(?aS)\s(?-aS)\s</utf,ucp ++ >\x{a0} \x{a0}< ++\= Expect no match ++ >\x{a0}\x{a0}\x{a0}< ++ ++/>\s(?a)\s(?-a)\s</utf,ucp ++ >\x{a0} \x{a0}< ++\= Expect no match ++ >\x{a0}\x{a0}\x{a0}< ++ ++# WORDS ++ ++/\w+/i,utf ++ 123\x{660}abc ++ ++/\w+/i,utf,ucp ++ 123\x{660}abc ++ ++/\w+/i,utf,ucp,ascii_bsw ++ 123\x{660}abc ++ ++/\w+/i,utf ++ 123\x{660}abc ++ ++/\w+/i,utf,ucp ++ 123\x{660}abc ++ ++/\w+/i,utf,ucp,ascii_bsw ++ 123\x{660}abc ++ ++/\w(?aW)\w(?-aW)\w/utf,ucp ++ \x{660}A\x{c0} ++\= Expect no match ++ \x{660}\x{c0}\x{c0} ++ ++/\w(?a)\w(?-a)\w/utf,ucp ++ \x{660}A\x{c0} ++\= Expect no match ++ \x{660}\x{c0}\x{c0} ++ ++# POSIX ++ ++/:digit:+/utf,ucp ++ 123\x{660}456 ++ ++/:digit:+/utf,ucp,ascii_posix ++ 123\x{660}456 ++ ++/>:space:+</utf,ucp ++ >\x{a0} \x{a0}< ++ >\x{a0}\x{a0}\x{a0}< ++ ++/>:space:+</utf,ucp,ascii_posix ++\= Expect no match ++ >\x{a0} \x{a0}< ++ ++/(?aP):alnum:+/i,ucp,utf ++ abcáxyz ++ abc\x{660}xyz ++ ++/(?aP):alnum:\d+/i,ucp,utf ++ abc\x{660}xyz ++ ++# VARIOUS ++ ++/\d\s\w+/a,ucp,utf ++ 9 A\x{660}À ++ 9 AÀ\x{660} ++ ++# End PCRE2_EXTRA_ASCII_xxx tests ++ + # End of testinput5 +diff --git a/testdata/testinput7 b/testdata/testinput7 +index 991de885..64a37ad2 100644 +--- a/testdata/testinput7 ++++ b/testdata/testinput7 +@@ -2328,4 +2328,137 @@ + + # End caseless restrict tests + ++# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. ++ ++# DIGITS ++ ++/\d+/i,utf ++ 123\x{660}456 ++ ++/\d+/i,utf,ucp ++ 123\x{660}456 ++ ++/\d+/i,utf,ucp,ascii_bsd ++ 123\x{660}456 ++ ++/\d+/i,utf ++ 123\x{660}456 ++ ++/\d+/i,utf,ucp ++ 123\x{660}456 ++ ++/\d+/i,utf,ucp,ascii_bsd ++ 123\x{660}456 ++ ++/\d(?aD)\d(?-aD)\d/utf,ucp ++ \x{660}9\x{660} ++\= Expect no match ++ \x{660}\x{660}\x{660} ++ ++/\d(?a)\d(?-a)\d/utf,ucp ++ \x{660}9\x{660} ++\= Expect no match ++ \x{660}\x{660}\x{660} ++ ++# SPACES ++ ++/>\s+</i,utf ++ > < ++\= Expect no match ++ >\x{a0} < ++ ++/>\s+</i,utf,ucp ++ > < ++ >\x{a0} < ++ ++/>\s+</i,utf,ucp,ascii_bss ++ > < ++\= Expect no match ++ >\x{a0} < ++ ++/>\s+</i,utf ++ > < ++\= Expect no match ++ >\x{a0} < ++ ++/>\s+</i,utf,ucp ++ > < ++ >\x{a0} < ++ ++/>\s+</i,utf,ucp,ascii_bss ++ > < ++\= Expect no match ++ >\x{a0} < ++ ++/>\s(?aS)\s(?-aS)\s</utf,ucp ++ >\x{a0} \x{a0}< ++\= Expect no match ++ >\x{a0}\x{a0}\x{a0}< ++ ++/>\s(?a)\s(?-a)\s</utf,ucp ++ >\x{a0} \x{a0}< ++\= Expect no match ++ >\x{a0}\x{a0}\x{a0}< ++ ++# WORDS ++ ++/\w+/i,utf ++ 123\x{660}abc ++ ++/\w+/i,utf,ucp ++ 123\x{660}abc ++ ++/\w+/i,utf,ucp,ascii_bsw ++ 123\x{660}abc ++ ++/\w+/i,utf ++ 123\x{660}abc ++ ++/\w+/i,utf,ucp ++ 123\x{660}abc ++ ++/\w+/i,utf,ucp,ascii_bsw ++ 123\x{660}abc ++ ++/\w(?aW)\w(?-aW)\w/utf,ucp ++ \x{660}A\x{c0} ++\= Expect no match ++ \x{660}\x{c0}\x{c0} ++ ++/\w(?a)\w(?-a)\w/utf,ucp ++ \x{660}A\x{c0} ++\= Expect no match ++ \x{660}\x{c0}\x{c0} ++ ++# POSIX ++ ++/:digit:+/utf,ucp ++ 123\x{660}456 ++ ++/:digit:+/utf,ucp,ascii_posix ++ 123\x{660}456 ++ ++/>:space:+</utf,ucp ++ >\x{a0} \x{a0}< ++ >\x{a0}\x{a0}\x{a0}< ++ ++/>:space:+</utf,ucp,ascii_posix ++\= Expect no match ++ >\x{a0} \x{a0}< ++ ++/(?aP):alnum:+/i,ucp,utf ++ abcáxyz ++ abc\x{660}xyz ++ ++/(?aP):alnum:\d+/i,ucp,utf ++ abc\x{660}xyz ++ ++# VARIOUS ++ ++/\d\s\w+/a,ucp,utf ++ 9 A\x{660}À ++ 9 AÀ\x{660} ++ ++# End PCRE2_EXTRA_ASCII_xxx tests ++ + # End of testinput7 +diff --git a/testdata/testoutput5 b/testdata/testoutput5 +index db42a117..26972f70 100644 +--- a/testdata/testoutput5 ++++ b/testdata/testoutput5 +@@ -5196,4 +5196,183 @@ No match + + # End caseless restrict tests + ++# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. ++ ++# DIGITS ++ ++/\d+/i,utf ++ 123\x{660}456 ++ 0: 123 ++ ++/\d+/i,utf,ucp ++ 123\x{660}456 ++ 0: 123\x{660}456 ++ ++/\d+/i,utf,ucp,ascii_bsd ++ 123\x{660}456 ++ 0: 123 ++ ++/\d+/i,utf ++ 123\x{660}456 ++ 0: 123 ++ ++/\d+/i,utf,ucp ++ 123\x{660}456 ++ 0: 123\x{660}456 ++ ++/\d+/i,utf,ucp,ascii_bsd ++ 123\x{660}456 ++ 0: 123 ++ ++/\d(?aD)\d(?-aD)\d/utf,ucp ++ \x{660}9\x{660} ++ 0: \x{660}9\x{660} ++\= Expect no match ++ \x{660}\x{660}\x{660} ++No match ++ ++/\d(?a)\d(?-a)\d/utf,ucp ++ \x{660}9\x{660} ++ 0: \x{660}9\x{660} ++\= Expect no match ++ \x{660}\x{660}\x{660} ++No match ++ ++# SPACES ++ ++/>\s+</i,utf ++ > < ++ 0: > < ++\= Expect no match ++ >\x{a0} < ++No match ++ ++/>\s+</i,utf,ucp ++ > < ++ 0: > < ++ >\x{a0} < ++ 0: >\x{a0} < ++ ++/>\s+</i,utf,ucp,ascii_bss ++ > < ++ 0: > < ++\= Expect no match ++ >\x{a0} < ++No match ++ ++/>\s+</i,utf ++ > < ++ 0: > < ++\= Expect no match ++ >\x{a0} < ++No match ++ ++/>\s+</i,utf,ucp ++ > < ++ 0: > < ++ >\x{a0} < ++ 0: >\x{a0} < ++ ++/>\s+</i,utf,ucp,ascii_bss ++ > < ++ 0: > < ++\= Expect no match ++ >\x{a0} < ++No match ++ ++/>\s(?aS)\s(?-aS)\s</utf,ucp ++ >\x{a0} \x{a0}< ++ 0: >\x{a0} \x{a0}< ++\= Expect no match ++ >\x{a0}\x{a0}\x{a0}< ++No match ++ ++/>\s(?a)\s(?-a)\s</utf,ucp ++ >\x{a0} \x{a0}< ++ 0: >\x{a0} \x{a0}< ++\= Expect no match ++ >\x{a0}\x{a0}\x{a0}< ++No match ++ ++# WORDS ++ ++/\w+/i,utf ++ 123\x{660}abc ++ 0: 123 ++ ++/\w+/i,utf,ucp ++ 123\x{660}abc ++ 0: 123\x{660}abc ++ ++/\w+/i,utf,ucp,ascii_bsw ++ 123\x{660}abc ++ 0: 123 ++ ++/\w+/i,utf ++ 123\x{660}abc ++ 0: 123 ++ ++/\w+/i,utf,ucp ++ 123\x{660}abc ++ 0: 123\x{660}abc ++ ++/\w+/i,utf,ucp,ascii_bsw ++ 123\x{660}abc ++ 0: 123 ++ ++/\w(?aW)\w(?-aW)\w/utf,ucp ++ \x{660}A\x{c0} ++ 0: \x{660}A\x{c0} ++\= Expect no match ++ \x{660}\x{c0}\x{c0} ++No match ++ ++/\w(?a)\w(?-a)\w/utf,ucp ++ \x{660}A\x{c0} ++ 0: \x{660}A\x{c0} ++\= Expect no match ++ \x{660}\x{c0}\x{c0} ++No match ++ ++# POSIX ++ ++/:digit:+/utf,ucp ++ 123\x{660}456 ++ 0: 123\x{660}456 ++ ++/:digit:+/utf,ucp,ascii_posix ++ 123\x{660}456 ++ 0: 123 ++ ++/>:space:+</utf,ucp ++ >\x{a0} \x{a0}< ++ 0: >\x{a0} \x{a0}< ++ >\x{a0}\x{a0}\x{a0}< ++ 0: >\x{a0}\x{a0}\x{a0}< ++ ++/>:space:+</utf,ucp,ascii_posix ++\= Expect no match ++ >\x{a0} \x{a0}< ++No match ++ ++/(?aP):alnum:+/i,ucp,utf ++ abcáxyz ++ 0: abc ++ abc\x{660}xyz ++ 0: abc ++ ++/(?aP):alnum:\d+/i,ucp,utf ++ abc\x{660}xyz ++ 0: abc\x{660}xyz ++ ++# VARIOUS ++ ++/\d\s\w+/a,ucp,utf ++ 9 A\x{660}À ++ 0: 9 A ++ 9 AÀ\x{660} ++ 0: 9 A ++ ++# End PCRE2_EXTRA_ASCII_xxx tests ++ + # End of testinput5 +diff --git a/testdata/testoutput7 b/testdata/testoutput7 +index c2291a10..c830748c 100644 +--- a/testdata/testoutput7 ++++ b/testdata/testoutput7 +@@ -3936,4 +3936,183 @@ No match + + # End caseless restrict tests + ++# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. ++ ++# DIGITS ++ ++/\d+/i,utf ++ 123\x{660}456 ++ 0: 123 ++ ++/\d+/i,utf,ucp ++ 123\x{660}456 ++ 0: 123\x{660}456 ++ ++/\d+/i,utf,ucp,ascii_bsd ++ 123\x{660}456 ++ 0: 123 ++ ++/\d+/i,utf ++ 123\x{660}456 ++ 0: 123 ++ ++/\d+/i,utf,ucp ++ 123\x{660}456 ++ 0: 123\x{660}456 ++ ++/\d+/i,utf,ucp,ascii_bsd ++ 123\x{660}456 ++ 0: 123 ++ ++/\d(?aD)\d(?-aD)\d/utf,ucp ++ \x{660}9\x{660} ++ 0: \x{660}9\x{660} ++\= Expect no match ++ \x{660}\x{660}\x{660} ++No match ++ ++/\d(?a)\d(?-a)\d/utf,ucp ++ \x{660}9\x{660} ++ 0: \x{660}9\x{660} ++\= Expect no match ++ \x{660}\x{660}\x{660} ++No match ++ ++# SPACES ++ ++/>\s+</i,utf ++ > < ++ 0: > < ++\= Expect no match ++ >\x{a0} < ++No match ++ ++/>\s+</i,utf,ucp ++ > < ++ 0: > < ++ >\x{a0} < ++ 0: >\x{a0} < ++ ++/>\s+</i,utf,ucp,ascii_bss ++ > < ++ 0: > < ++\= Expect no match ++ >\x{a0} < ++No match ++ ++/>\s+</i,utf ++ > < ++ 0: > < ++\= Expect no match ++ >\x{a0} < ++No match ++ ++/>\s+</i,utf,ucp ++ > < ++ 0: > < ++ >\x{a0} < ++ 0: >\x{a0} < ++ ++/>\s+</i,utf,ucp,ascii_bss ++ > < ++ 0: > < ++\= Expect no match ++ >\x{a0} < ++No match ++ ++/>\s(?aS)\s(?-aS)\s</utf,ucp ++ >\x{a0} \x{a0}< ++ 0: >\x{a0} \x{a0}< ++\= Expect no match ++ >\x{a0}\x{a0}\x{a0}< ++No match ++ ++/>\s(?a)\s(?-a)\s</utf,ucp ++ >\x{a0} \x{a0}< ++ 0: >\x{a0} \x{a0}< ++\= Expect no match ++ >\x{a0}\x{a0}\x{a0}< ++No match ++ ++# WORDS ++ ++/\w+/i,utf ++ 123\x{660}abc ++ 0: 123 ++ ++/\w+/i,utf,ucp ++ 123\x{660}abc ++ 0: 123\x{660}abc ++ ++/\w+/i,utf,ucp,ascii_bsw ++ 123\x{660}abc ++ 0: 123 ++ ++/\w+/i,utf ++ 123\x{660}abc ++ 0: 123 ++ ++/\w+/i,utf,ucp ++ 123\x{660}abc ++ 0: 123\x{660}abc ++ ++/\w+/i,utf,ucp,ascii_bsw ++ 123\x{660}abc ++ 0: 123 ++ ++/\w(?aW)\w(?-aW)\w/utf,ucp ++ \x{660}A\x{c0} ++ 0: \x{660}A\x{c0} ++\= Expect no match ++ \x{660}\x{c0}\x{c0} ++No match ++ ++/\w(?a)\w(?-a)\w/utf,ucp ++ \x{660}A\x{c0} ++ 0: \x{660}A\x{c0} ++\= Expect no match ++ \x{660}\x{c0}\x{c0} ++No match ++ ++# POSIX ++ ++/:digit:+/utf,ucp ++ 123\x{660}456 ++ 0: 123\x{660}456 ++ ++/:digit:+/utf,ucp,ascii_posix ++ 123\x{660}456 ++ 0: 123 ++ ++/>:space:+</utf,ucp ++ >\x{a0} \x{a0}< ++ 0: >\x{a0} \x{a0}< ++ >\x{a0}\x{a0}\x{a0}< ++ 0: >\x{a0}\x{a0}\x{a0}< ++ ++/>:space:+</utf,ucp,ascii_posix ++\= Expect no match ++ >\x{a0} \x{a0}< ++No match ++ ++/(?aP):alnum:+/i,ucp,utf ++ abcáxyz ++ 0: abc ++ abc\x{660}xyz ++ 0: abc ++ ++/(?aP):alnum:\d+/i,ucp,utf ++ abc\x{660}xyz ++ 0: abc\x{660}xyz ++ ++# VARIOUS ++ ++/\d\s\w+/a,ucp,utf ++ 9 A\x{660}À ++ 0: 9 A ++ 9 AÀ\x{660} ++ 0: 9 A ++ ++# End PCRE2_EXTRA_ASCII_xxx tests ++ + # End of testinput7 +-- +2.23.0 +
View file
_service:tar_scm:backport-Implement-PCRE2_EXTRA_CASELESS_RESTRICT-and-related-.patch
Added
@@ -0,0 +1,1649 @@ +From 9a4fd79230cf583153bec4b4749a1864a55c89fb Mon Sep 17 00:00:00 2001 +From: Philip Hazel <Philip.Hazel@gmail.com> +Date: Sun, 29 Jan 2023 16:46:24 +0000 +Subject: PATCH Implement PCRE2_EXTRA_CASELESS_RESTRICT and related features + +Conflict:don't modify ChangeLog; don't modify maint/* because files don't +exist; adapt context; +Reference:https://github.com/PCRE2Project/pcre2/commit/c13d54f6581fa51a270a1ec40b1b7626d686dec1 + +--- + HACKING | 10 +- + src/pcre2.h.in | 3 +- + src/pcre2_compile.c | 259 ++++++++++++++++++++++++-------------- + src/pcre2_ucd.c | 6 +- + src/pcre2test.c | 21 ++-- + testdata/testinput5 | 97 ++++++++++++++ + testdata/testinput7 | 97 ++++++++++++++ + testdata/testoutput5 | 180 ++++++++++++++++++++++++++ + testdata/testoutput7 | 180 ++++++++++++++++++++++++++ + testdata/testoutput8-16-2 | 2 +- + testdata/testoutput8-8-2 | 2 +- + 11 files changed, 742 insertions(+), 115 deletions(-) + +diff --git a/HACKING b/HACKING +index 2f194db..88ebad5 100644 +--- a/HACKING ++++ b/HACKING +@@ -1,4 +1,4 @@ +-Technical Notes about PCRE2 ++Technical notes about PCRE2 + --------------------------- + + These are very rough technical notes that record potentially useful information +@@ -248,7 +248,6 @@ by a length and an offset into the pattern to specify the name. + The following have one data item that follows in the next vector element: + + META_BIGVALUE Next is a literal >= META_END +-META_OPTIONS (?i) and friends (data is new option bits) + META_POSIX POSIX class item (data identifies the class) + META_POSIX_NEG negative POSIX class item (ditto) + +@@ -298,6 +297,11 @@ META_MINMAX {n,m} repeat + META_MINMAX_PLUS {n,m}+ repeat + META_MINMAX_QUERY {n,m}? repeat + ++This one is followed by two elements, giving the new option settings for the ++main and extra options, respectively. ++ ++META_OPTIONS (?i) and friends ++ + This one is followed by three elements. The first is 0 for '>' and 1 for '>='; + the next two are the major and minor numbers: + +@@ -827,4 +831,4 @@ not a real opcode, but is used to check at compile time that tables indexed by + opcode are the correct length, in order to catch updating errors. + + Philip Hazel +-April 2022 ++January 2023 +diff --git a/src/pcre2.h.in b/src/pcre2.h.in +index 7b8818d..60c2905 100644 +--- a/src/pcre2.h.in ++++ b/src/pcre2.h.in +@@ -5,7 +5,7 @@ + /* This is the public header file for the PCRE library, second API, to be + #included by applications that call PCRE2 functions. + +- Copyright (c) 2016-2021 University of Cambridge ++ Copyright (c) 2016-2023 University of Cambridge + + ----------------------------------------------------------------------------- + Redistribution and use in source and binary forms, with or without +@@ -153,6 +153,7 @@ D is inspected during pcre2_dfa_match() execution + #define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ + #define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ + #define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */ ++#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */ + + /* These are for pcre2_jit_compile(). */ + +diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c +index 99ffd29..464c9db 100644 +--- a/src/pcre2_compile.c ++++ b/src/pcre2_compile.c +@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge +- New API code Copyright (c) 2016-2022 University of Cambridge ++ New API code Copyright (c) 2016-2023 University of Cambridge + + ----------------------------------------------------------------------------- + Redistribution and use in source and binary forms, with or without +@@ -118,13 +118,13 @@ them will be able to (i.e. assume a 64-bit world). */ + + #ifdef SUPPORT_UNICODE + static unsigned int +- add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, ++ add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, uint32_t, + compile_block *, const uint32_t *, unsigned int); + #endif + + static int +- compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t, +- uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, ++ compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *, ++ uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, + compile_block *, PCRE2_SIZE *); + + static int +@@ -779,7 +779,7 @@ are allowed. */ + PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_UCP|PCRE2_UNGREEDY) + + #define PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS \ +- (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD) ++ (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_CASELESS_RESTRICT) + + #define PUBLIC_COMPILE_EXTRA_OPTIONS \ + (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \ +@@ -1059,7 +1059,10 @@ for (;;) + case META_SKIP: fprintf(stderr, "META (*SKIP)"); break; + case META_THEN: fprintf(stderr, "META (*THEN)"); break; + +- case META_OPTIONS: fprintf(stderr, "META_OPTIONS 0x%02x", *pptr++); break; ++ case META_OPTIONS: ++ fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr0, pptr1); ++ pptr += 2; ++ break; + + case META_LOOKBEHIND: + fprintf(stderr, "META (?<= %d offset=", meta_arg); +@@ -1491,6 +1494,7 @@ Arguments: + chptr points to a returned data character + errorcodeptr points to the errorcode variable (containing zero) + options the current options bits ++ xoptions the current extra options bits + isclass TRUE if inside a character class + cb compile data block or NULL when called from pcre2_substitute() + +@@ -1502,7 +1506,7 @@ Returns: zero => a data character + + int + PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr, +- int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclass, ++ int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass, + compile_block *cb) + { + BOOL utf = (options & PCRE2_UTF) != 0; +@@ -1539,7 +1543,7 @@ else if ((i = escapesc - ESCAPES_FIRST) != 0) + if (i > 0) + { + c = (uint32_t)i; +- if (c == CHAR_CR && (extra_options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0) ++ if (c == CHAR_CR && (xoptions & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0) + c = CHAR_LF; + } + else /* Negative table entry */ +@@ -1603,7 +1607,7 @@ else + PCRE2_SPTR oldptr; + BOOL overflow; + BOOL alt_bsux = +- ((options & PCRE2_ALT_BSUX) | (extra_options & PCRE2_EXTRA_ALT_BSUX)) != 0; ++ ((options & PCRE2_ALT_BSUX) | (xoptions & PCRE2_EXTRA_ALT_BSUX)) != 0; + + /* Filter calls from pcre2_substitute(). */ + +@@ -1641,7 +1645,7 @@ else + + if (ptr >= ptrend) break; + if (*ptr == CHAR_LEFT_CURLY_BRACKET && +- (extra_options & PCRE2_EXTRA_ALT_BSUX) != 0) ++ (xoptions & PCRE2_EXTRA_ALT_BSUX) != 0) + { + PCRE2_SPTR hptr = ptr + 1; + cc = 0; +@@ -1685,7 +1689,7 @@ else + if (c > 0x10ffffU) *errorcodeptr = ERR77; + else + if (c >= 0xd800 && c <= 0xdfff && +- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) ++ (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) + *errorcodeptr = ERR73; + } + else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77; +@@ -1880,7 +1884,7 @@ else + else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) + { + if (utf && c >= 0xd800 && c <= 0xdfff && +- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) ++ (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) + { + ptr--; + *errorcodeptr = ERR73; +@@ -1953,7 +1957,7 @@ else + else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) + { + if (utf && c >= 0xd800 && c <= 0xdfff && +- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) ++ (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) + { + ptr--; + *errorcodeptr = ERR73; +@@ -2564,6 +2568,7 @@ typedef struct nest_save { + uint16_t max_group; + uint16_t flags; + uint32_t options; ++ uint32_t xoptions; + } nest_save; + + #define NSF_RESET 0x0001u +@@ -2578,6 +2583,8 @@ the main compiling phase. */ + #define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \ + PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ + PCRE2_UNGREEDY) ++ ++#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT) + + /* States used for analyzing ranges in character classes. The two OK values + must be last. */ +@@ -2617,7 +2624,7 @@ uint32_t *this_parsed_item = NULL; + uint32_t *prev_parsed_item = NULL; + uint32_t meta_quantifier = 0; + uint32_t add_after_mark = 0; +-uint32_t extra_options = cb->cx->extra_options; ++uint32_t xoptions = cb->cx->extra_options; + uint16_t nest_depth = 0; + int after_manual_callout = 0; + int expect_cond_assert = 0; +@@ -2641,12 +2648,12 @@ nest_save *top_nest, *end_nests; + /* Insert leading items for word and line matching (features provided for the + benefit of pcre2grep). */ + +-if ((extra_options & PCRE2_EXTRA_MATCH_LINE) != 0) ++if ((xoptions & PCRE2_EXTRA_MATCH_LINE) != 0) + { + *parsed_pattern++ = META_CIRCUMFLEX; + *parsed_pattern++ = META_NOCAPTURE; + } +-else if ((extra_options & PCRE2_EXTRA_MATCH_WORD) != 0) ++else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0) + { + *parsed_pattern++ = META_ESCAPE + ESC_b; + *parsed_pattern++ = META_NOCAPTURE; +@@ -2697,6 +2704,7 @@ while (ptr < ptrend) + int prev_expect_cond_assert; + uint32_t min_repeat = 0, max_repeat = 0; + uint32_t set, unset, *optset; ++ uint32_t xset, xunset, *xoptset; + uint32_t terminator; + uint32_t prev_meta_quantifier; + BOOL prev_okquantifier; +@@ -2834,7 +2842,7 @@ while (ptr < ptrend) + if ((options & PCRE2_ALT_VERBNAMES) != 0) + { + escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, +- cb->cx->extra_options, FALSE, cb); ++ xoptions, FALSE, cb); + if (errorcode != 0) goto FAILED; + } + else escape = 0; /* Treat all as literal */ +@@ -3029,11 +3037,11 @@ while (ptr < ptrend) + case CHAR_BACKSLASH: + tempptr = ptr; + escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, +- cb->cx->extra_options, FALSE, cb); ++ xoptions, FALSE, cb); + if (errorcode != 0) + { + ESCAPE_FAILED: +- if ((extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) ++ if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) + goto FAILED; + ptr = tempptr; + if (ptr >= ptrend) c = CHAR_BACKSLASH; else +@@ -3607,11 +3615,11 @@ while (ptr < ptrend) + { + tempptr = ptr; + escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, +- cb->cx->extra_options, TRUE, cb); ++ xoptions, TRUE, cb); + + if (errorcode != 0) + { +- if ((extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) ++ if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) + goto FAILED; + ptr = tempptr; + if (ptr >= ptrend) c = CHAR_BACKSLASH; else +@@ -3910,6 +3918,7 @@ while (ptr < ptrend) + top_nest->nest_depth = nest_depth; + top_nest->flags = NSF_ATOMICSR; + top_nest->options = options & PARSE_TRACKED_OPTIONS; ++ top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; + } + break; + #else /* SUPPORT_UNICODE */ +@@ -4042,6 +4051,7 @@ while (ptr < ptrend) + top_nest->nest_depth = nest_depth; + top_nest->flags = 0; + top_nest->options = options & PARSE_TRACKED_OPTIONS; ++ top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; + + /* Start of non-capturing group that resets the capture count for each + branch. */ +@@ -4056,24 +4066,28 @@ while (ptr < ptrend) + ptr++; + } + +- /* Scan for options imnsxJU to be set or unset. */ ++ /* Scan for options imnrsxJU to be set or unset. */ + + else + { + BOOL hyphenok = TRUE; + uint32_t oldoptions = options; ++ uint32_t oldxoptions = xoptions; + + top_nest->reset_group = 0; + top_nest->max_group = 0; + set = unset = 0; + optset = &set; ++ xset = xunset = 0; ++ xoptset = &xset; + +- /* ^ at the start unsets imnsx and disables the subsequent use of - */ ++ /* ^ at the start unsets irmnsx and disables the subsequent use of - */ + + if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT) + { + options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| + PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE); ++ xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT); + hyphenok = FALSE; + ptr++; + } +@@ -4091,6 +4105,7 @@ while (ptr < ptrend) + goto FAILED; + } + optset = &unset; ++ xoptset = &xunset; + hyphenok = FALSE; + break; + +@@ -4102,6 +4117,7 @@ while (ptr < ptrend) + case CHAR_i: *optset |= PCRE2_CASELESS; break; + case CHAR_m: *optset |= PCRE2_MULTILINE; break; + case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break; ++ case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break; + case CHAR_s: *optset |= PCRE2_DOTALL; break; + case CHAR_U: *optset |= PCRE2_UNGREEDY; break; + +@@ -4132,6 +4148,7 @@ while (ptr < ptrend) + unset |= PCRE2_EXTENDED_MORE; + + options = (options | set) & (~unset); ++ xoptions = (xoptions | xset) & (~xunset); + + /* If the options ended with ')' this is not the start of a nested + group with option changes, so the options change at this level. +@@ -4152,10 +4169,11 @@ while (ptr < ptrend) + + /* If nothing changed, no need to record. */ + +- if (options != oldoptions) ++ if (options != oldoptions || xoptions != oldxoptions) + { + *parsed_pattern++ = META_OPTIONS; + *parsed_pattern++ = options; ++ *parsed_pattern++ = xoptions; + } + } /* End options processing */ + break; /* End default case after (? */ +@@ -4625,6 +4643,7 @@ while (ptr < ptrend) + top_nest->nest_depth = nest_depth; + top_nest->flags = NSF_CONDASSERT; + top_nest->options = options & PARSE_TRACKED_OPTIONS; ++ top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; + } + break; + +@@ -4758,6 +4777,7 @@ while (ptr < ptrend) + if (top_nest != NULL && top_nest->nest_depth == nest_depth) + { + options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options; ++ xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions; + if ((top_nest->flags & NSF_RESET) != 0 && + top_nest->max_group > cb->bracount) + cb->bracount = top_nest->max_group; +@@ -4800,12 +4820,12 @@ parsed_pattern = manage_callouts(ptr, &previous_callout, auto_callout, + /* Insert trailing items for word and line matching (features provided for the + benefit of pcre2grep). */ + +-if ((extra_options & PCRE2_EXTRA_MATCH_LINE) != 0) ++if ((xoptions & PCRE2_EXTRA_MATCH_LINE) != 0) + { + *parsed_pattern++ = META_KET; + *parsed_pattern++ = META_DOLLAR; + } +-else if ((extra_options & PCRE2_EXTRA_MATCH_WORD) != 0) ++else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0) + { + *parsed_pattern++ = META_KET; + *parsed_pattern++ = META_ESCAPE + ESC_b; +@@ -4933,7 +4953,8 @@ for (;;) + * Get othercase range * + *************************************************/ + +-/* This function is passed the start and end of a class range in UCP mode. It ++/* This function is passed the start and end of a class range in UCP mode. For ++single characters the range may be just one character long. The function + searches up the characters, looking for ranges of characters in the "other" + case. Each call returns the next one, updating the start address. A character + with multiple other cases is returned on its own with a special return value. +@@ -4947,18 +4968,19 @@ Arguments: + Yield: -1 when no more + 0 when a range is returned + >0 the CASESET offset for char with multiple other cases +- in this case, ocptr contains the original ++ for this return, *ocptr contains the original + */ + + static int + get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr, +- uint32_t *odptr) ++ uint32_t *odptr, BOOL restricted) + { + uint32_t c, othercase, next; + unsigned int co; + + /* Find the first character that has an other case. If it has multiple other +-cases, return its case offset value. In 32-bit mode, a value ++cases, return its case offset value. When CASELESS_RESTRICT is set, ignore the ++multi-case entries that begin with ASCII values. In 32-bit mode, a value + greater than the Unicode maximum ends the range. */ + + for (c = *cptr; c <= d; c++) +@@ -4966,12 +4988,19 @@ for (c = *cptr; c <= d; c++) + #if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) return -1; + #endif +- if ((co = UCD_CASESET(c)) != 0) ++ if ((co = UCD_CASESET(c)) != 0 && ++ (!restricted || PRIV(ucd_caseless_sets)co > 127)) + { + *ocptr = c++; /* Character that has the set */ + *cptr = c; /* Rest of input range */ + return (int)co; + } ++ ++ /* This is not a valid multiple-case character. Check that the single other ++ case is different to the original. We don't need to check "restricted" here ++ because the non-ASCII characters with multiple cases that include an ASCII ++ character don't have a different "othercase". */ ++ + if ((othercase = UCD_OTHERCASE(c)) != c) break; + } + +@@ -5012,7 +5041,8 @@ add_to_class(). + Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data +- options the options word ++ options the options bits ++ xoptions the extra options bits + cb compile data + start start of range character + end end of range character +@@ -5023,7 +5053,8 @@ Returns: the number of < 256 characters added + + static unsigned int + add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, +- uint32_t options, compile_block *cb, uint32_t start, uint32_t end) ++ uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start, ++ uint32_t end) + { + uint32_t c; + uint32_t classbits_end = (end <= 0xff ? end : 0xff); +@@ -5031,8 +5062,8 @@ unsigned int n8 = 0; + + /* If caseless matching is required, scan the range and process alternate + cases. In Unicode, there are 8-bit characters that have alternate cases that +-are greater than 255 and vice-versa. Sometimes we can just extend the original +-range. */ ++are greater than 255 and vice-versa (though these may be ignored if caseless ++restriction is in force). Sometimes we can just extend the original range. */ + + if ((options & PCRE2_CASELESS) != 0) + { +@@ -5045,20 +5076,23 @@ if ((options & PCRE2_CASELESS) != 0) + options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ + c = start; + +- while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0) ++ while ((rc = get_othercase_range(&c, end, &oc, &od, ++ (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0) + { + /* Handle a single character that has more than one other case. */ + +- if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, options, cb, +- PRIV(ucd_caseless_sets) + rc, oc); ++ if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, ++ options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc); + + /* Do nothing if the other case range is within the original range. */ + +- else if (oc >= cb->class_range_start && od <= cb->class_range_end) continue; ++ else if (oc >= cb->class_range_start && od <= cb->class_range_end) ++ continue; + +- /* Extend the original range if there is overlap, noting that if oc < c, we +- can't have od > end because a subrange is always shorter than the basic +- range. Otherwise, use a recursive call to add the additional range. */ ++ /* Extend the original range if there is overlap, noting that if oc < c, ++ we can't have od > end because a subrange is always shorter than the ++ basic range. Otherwise, use a recursive call to add the additional range. ++ */ + + else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ + else if (od > end && oc <= end + 1) +@@ -5066,7 +5100,8 @@ if ((options & PCRE2_CASELESS) != 0) + end = od; /* Extend upwards */ + if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); + } +- else n8 += add_to_class_internal(classbits, uchardptr, options, cb, oc, od); ++ else n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, ++ cb, oc, od); + } + } + else +@@ -5165,7 +5200,8 @@ add_to_class_internal(), with which it is mutually recursive. + Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data +- options the options word ++ options the options bits ++ xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + except character to omit; this is used when adding lists of +@@ -5178,7 +5214,8 @@ Returns: the number of < 256 characters added + + static unsigned int + add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, +- uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except) ++ uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p, ++ unsigned int except) + { + unsigned int n8 = 0; + while (p0 < NOTACHAR) +@@ -5187,7 +5224,8 @@ while (p0 < NOTACHAR) + if (p0 != except) + { + while(pn+1 == p0 + n + 1) n++; +- n8 += add_to_class_internal(classbits, uchardptr, options, cb, p0, pn); ++ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, ++ p0, pn); + } + p += n + 1; + } +@@ -5207,7 +5245,8 @@ to avoid duplication when handling case-independence. + Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data +- options the options word ++ options the options bits ++ xoptions the extra options bits + cb compile data + start start of range character + end end of range character +@@ -5218,11 +5257,12 @@ Returns: the number of < 256 characters added + + static unsigned int + add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, +- compile_block *cb, uint32_t start, uint32_t end) ++ uint32_t xoptions, compile_block *cb, uint32_t start, uint32_t end) + { + cb->class_range_start = start; + cb->class_range_end = end; +-return add_to_class_internal(classbits, uchardptr, options, cb, start, end); ++return add_to_class_internal(classbits, uchardptr, options, xoptions, cb, ++ start, end); + } + + +@@ -5239,7 +5279,8 @@ case-independence. + Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data +- options the options word ++ options the options bits ++ xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + except character to omit; this is used when adding lists of +@@ -5252,7 +5293,7 @@ Returns: the number of < 256 characters added + + static unsigned int + add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, +- compile_block *cb, const uint32_t *p, unsigned int except) ++ uint32_t xoptions, compile_block *cb, const uint32_t *p, unsigned int except) + { + unsigned int n8 = 0; + while (p0 < NOTACHAR) +@@ -5263,7 +5304,8 @@ while (p0 < NOTACHAR) + while(pn+1 == p0 + n + 1) n++; + cb->class_range_start = p0; + cb->class_range_end = pn; +- n8 += add_to_class_internal(classbits, uchardptr, options, cb, p0, pn); ++ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, ++ p0, pn); + } + p += n + 1; + } +@@ -5282,7 +5324,8 @@ vertical whitespace to a class. The list must be in order. + Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data +- options the options word ++ options the options bits ++ xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + +@@ -5292,16 +5335,16 @@ Returns: the number of < 256 characters added + + static unsigned int + add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, +- uint32_t options, compile_block *cb, const uint32_t *p) ++ uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p) + { + BOOL utf = (options & PCRE2_UTF) != 0; + unsigned int n8 = 0; + if (p0 > 0) +- n8 += add_to_class(classbits, uchardptr, options, cb, 0, p0 - 1); ++ n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, 0, p0 - 1); + while (p0 < NOTACHAR) + { + while (p1 == p0 + 1) p++; +- n8 += add_to_class(classbits, uchardptr, options, cb, p0 + 1, ++ n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, p0 + 1, + (p1 == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p1 - 1); + p++; + } +@@ -5392,6 +5435,7 @@ real compile phase. The value of lengthptr distinguishes the two phases. + + Arguments: + optionsptr pointer to the option bits ++ xoptionsptr pointer to the extra option bits + codeptr points to the pointer to the current code point + pptrptr points to the current parsed pattern pointer + errorcodeptr points to error code variable +@@ -5410,10 +5454,11 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero + */ + + static int +-compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, +- int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr, +- uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr, +- compile_block *cb, PCRE2_SIZE *lengthptr) ++compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr, ++ PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr, ++ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, ++ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, ++ PCRE2_SIZE *lengthptr) + { + int bravalue = 0; + int okreturn = -1; +@@ -5422,6 +5467,7 @@ uint32_t repeat_min = 0, repeat_max = 0; /* To please picky compilers */ + uint32_t greedy_default, greedy_non_default; + uint32_t repeat_type, op_type; + uint32_t options = *optionsptr; /* May change dynamically */ ++uint32_t xoptions = *xoptionsptr; /* May change dynamically */ + uint32_t firstcu, reqcu; + uint32_t zeroreqcu, zerofirstcu; + uint32_t escape; +@@ -5447,8 +5493,8 @@ const uint8_t *cbits = cb->cbits; + uint8_t classbits32; + + /* We can fish out the UTF setting once and for all into a BOOL, but we must +-not do this for other options (e.g. PCRE2_EXTENDED) because they may change +-dynamically as we process the pattern. */ ++not do this for other options (e.g. PCRE2_EXTENDED) that may change dynamically ++as we process the pattern. */ + + #ifdef SUPPORT_UNICODE + BOOL utf = (options & PCRE2_UTF) != 0; +@@ -5699,11 +5745,14 @@ for (;; pptr++) + + /* For caseless UTF or UCP mode, check whether this character has more + than one other case. If so, generate a special OP_NOTPROP item instead of +- OP_NOTI. */ ++ OP_NOTI. When restricted by PCRE2_EXTRA_CASELESS_RESTRICT, ignore any ++ caseless set that starts with an ASCII character. */ + + #ifdef SUPPORT_UNICODE + if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 && +- (d = UCD_CASESET(c)) != 0) ++ (d = UCD_CASESET(c)) != 0 && ++ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || ++ PRIV(ucd_caseless_sets)d > 127)) + { + *code++ = OP_NOTPROP; + *code++ = PT_CLIST; +@@ -5711,7 +5760,7 @@ for (;; pptr++) + break; /* We are finished with this class */ + } + #endif +- /* Char has only one other case, or UCP not available */ ++ /* Char has only one other (usable) case, or UCP not available */ + + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT; + code += PUTCHAR(c, code); +@@ -5721,7 +5770,9 @@ for (;; pptr++) + /* Handle character classes that contain more than just one literal + character. If there are exactly two characters in a positive class, see if + they are case partners. This can be optimized to generate a caseless single +- character match (which also sets first/required code units if relevant). */ ++ character match (which also sets first/required code units if relevant). ++ When casing restrictions apply, ignore a caseless set if both characters ++ are ASCII. */ + + if (meta == META_CLASS && pptr1 < META_END && pptr2 < META_END && + pptr3 == META_CLASS_END) +@@ -5729,7 +5780,9 @@ for (;; pptr++) + uint32_t c = pptr1; + + #ifdef SUPPORT_UNICODE +- if (UCD_CASESET(c) == 0) ++ if (UCD_CASESET(c) == 0 || ++ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && ++ c < 128 && pptr2 < 128)) + #endif + { + uint32_t d; +@@ -5981,22 +6034,24 @@ for (;; pptr++) + + case ESC_h: + (void)add_list_to_class(classbits, &class_uchardata, +- options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR); ++ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list), ++ NOTACHAR); + break; + + case ESC_H: + (void)add_not_list_to_class(classbits, &class_uchardata, +- options & ~PCRE2_CASELESS, cb, PRIV(hspace_list)); ++ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list)); + break; + + case ESC_v: + (void)add_list_to_class(classbits, &class_uchardata, +- options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR); ++ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list), ++ NOTACHAR); + break; + + case ESC_V: + (void)add_not_list_to_class(classbits, &class_uchardata, +- options & ~PCRE2_CASELESS, cb, PRIV(vspace_list)); ++ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list)); + break; + + /* If Unicode is not supported, \P and \p are not allowed and are +@@ -6070,32 +6125,32 @@ for (;; pptr++) + if (C <= CHAR_i) + { + class_has_8bitchar += +- add_to_class(classbits, &class_uchardata, options, cb, C + uc, +- ((D < CHAR_i)? D : CHAR_i) + uc); ++ add_to_class(classbits, &class_uchardata, options, xoptions, ++ cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc); + C = CHAR_j; + } + + if (C <= D && C <= CHAR_r) + { + class_has_8bitchar += +- add_to_class(classbits, &class_uchardata, options, cb, C + uc, +- ((D < CHAR_r)? D : CHAR_r) + uc); ++ add_to_class(classbits, &class_uchardata, options, xoptions, ++ cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc); + C = CHAR_s; + } + + if (C <= D) + { + class_has_8bitchar += +- add_to_class(classbits, &class_uchardata, options, cb, C + uc, +- D + uc); ++ add_to_class(classbits, &class_uchardata, options, xoptions, ++ cb, C + uc, D + uc); + } + } + else + #endif + /* Not an EBCDIC special range */ + +- class_has_8bitchar += +- add_to_class(classbits, &class_uchardata, options, cb, c, d); ++ class_has_8bitchar += add_to_class(classbits, &class_uchardata, ++ options, xoptions, cb, c, d); + goto CONTINUE_CLASS; /* Go get the next char in the class */ + } /* End of range handling */ + +@@ -6103,7 +6158,8 @@ for (;; pptr++) + /* Handle a single character. */ + + class_has_8bitchar += +- add_to_class(classbits, &class_uchardata, options, cb, meta, meta); ++ add_to_class(classbits, &class_uchardata, options, xoptions, cb, ++ meta, meta); + } + + /* Continue to the next item in the class. */ +@@ -6341,6 +6397,7 @@ for (;; pptr++) + + case META_OPTIONS: + *optionsptr = options = *(++pptr); ++ *xoptionsptr = xoptions = *(++pptr); + greedy_default = ((options & PCRE2_UNGREEDY) != 0); + greedy_non_default = greedy_default ^ 1; + req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; +@@ -6586,7 +6643,8 @@ for (;; pptr++) + + if ((group_return = + compile_regex( +- options, /* The option state */ ++ options, /* The options state */ ++ xoptions, /* The extra options state */ + &tempcode, /* Where to put code (updated) */ + &pptr, /* Input pointer (updated) */ + errorcodeptr, /* Where to put an error message */ +@@ -7925,7 +7983,7 @@ for (;; pptr++) + done. However, there's an option, in case anyone was relying on it. */ + + if (cb->assert_depth > 0 && meta_arg == ESC_K && +- (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0) ++ (xoptions & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0) + { + *errorcodeptr = ERR99; + return 0; +@@ -7977,13 +8035,16 @@ for (;; pptr++) + + /* For caseless UTF or UCP mode, check whether this character has more than + one other case. If so, generate a special OP_PROP item instead of OP_CHARI. +- */ ++ When casing restrictions apply, ignore caseless sets that start with an ++ ASCII character. */ + + #ifdef SUPPORT_UNICODE + if ((utf||ucp) && (options & PCRE2_CASELESS) != 0) + { + uint32_t caseset = UCD_CASESET(meta); +- if (caseset != 0) ++ if (caseset != 0 && ++ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || ++ PRIV(ucd_caseless_sets)caseset > 127)) + { + *code++ = OP_PROP; + *code++ = PT_CLIST; +@@ -8099,6 +8160,7 @@ the two phases. + + Arguments: + options option bits, including any changes for this subpattern ++ xoptions extra option bits, ditto + codeptr -> the address of the current code pointer + pptrptr -> the address of the current parsed pattern pointer + errorcodeptr -> pointer to error code variable +@@ -8118,10 +8180,11 @@ Returns: 0 There has been an error + */ + + static int +-compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, +- int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr, +- uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr, +- branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr) ++compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr, ++ uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits, ++ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, ++ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, ++ PCRE2_SIZE *lengthptr) + { + PCRE2_UCHAR *code = *codeptr; + PCRE2_UCHAR *last_branch = code; +@@ -8217,9 +8280,9 @@ for (;;) + into the length. */ + + if ((branch_return = +- compile_branch(&options, &code, &pptr, errorcodeptr, &branchfirstcu, +- &branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc, +- cb, (lengthptr == NULL)? NULL : &length)) == 0) ++ compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr, ++ &branchfirstcu, &branchfirstcuflags, &branchreqcu, &branchreqcuflags, ++ &bc, cb, (lengthptr == NULL)? NULL : &length)) == 0) + return 0; + + /* If a branch can match an empty string, so can the whole group. */ +@@ -9224,7 +9287,7 @@ for (;; pptr++) + break; + + case META_OPTIONS: +- pptr += 1; ++ pptr += 2; + break; + + case META_BIGVALUE: +@@ -9727,7 +9790,6 @@ for (; *pptr != META_END; pptr++) + break; + + case META_BIGVALUE: +- case META_OPTIONS: + case META_POSIX: + case META_POSIX_NEG: + pptr += 1; +@@ -9736,6 +9798,7 @@ for (; *pptr != META_END; pptr++) + case META_MINMAX: + case META_MINMAX_QUERY: + case META_MINMAX_PLUS: ++ case META_OPTIONS: + pptr += 2; + break; + +@@ -10251,8 +10314,9 @@ pptr = cb.parsed_pattern; + code = cworkspace; + *code = OP_BRA; + +-(void)compile_regex(cb.external_options, &code, &pptr, &errorcode, 0, &firstcu, +- &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, &length); ++(void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr, ++ &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, ++ &length); + + if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */ + +@@ -10349,8 +10413,9 @@ of the function here. */ + pptr = cb.parsed_pattern; + code = (PCRE2_UCHAR *)codestart; + *code = OP_BRA; +-regexrc = compile_regex(re->overall_options, &code, &pptr, &errorcode, 0, +- &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, NULL); ++regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code, ++ &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, ++ &cb, NULL); + if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY; + re->top_bracket = cb.bracount; + re->top_backref = cb.top_backref; +diff --git a/src/pcre2_ucd.c b/src/pcre2_ucd.c +index 5e0fc37..a72944c 100644 +--- a/src/pcre2_ucd.c ++++ b/src/pcre2_ucd.c +@@ -68,7 +68,7 @@ the tables when not needed. But don't leave a totally empty module because some + compilers barf at that. Instead, just supply some small dummy tables. */ + + #ifndef SUPPORT_UNICODE +-const ucd_record PRIV(ucd_records) = {{0,0,0,0,0,0,0 }}; ++const ucd_record PRIV(ucd_records) = {{0,0,0,0,0,0,0}}; + const uint16_t PRIV(ucd_stage1) = {0}; + const uint16_t PRIV(ucd_stage2) = {0}; + const uint32_t PRIV(ucd_caseless_sets) = {0}; +@@ -498,7 +498,7 @@ const ucd_record PRIV(ucd_records) = { /* 16908 bytes, record size 12 */ + { 0, 5, 12, 0, 0, 18432, 60, }, /* 70 */ + { 0, 5, 12, 0, 0, 18432, 80, }, /* 71 */ + { 0, 9, 12, 0, -121, 18432, 74, }, /* 72 */ +- { 0, 5, 12, 1, -268, 18432, 70, }, /* 73 */ ++ { 0, 5, 12, 1, 0, 18432, 70, }, /* 73 */ + { 0, 5, 12, 0, 195, 18432, 76, }, /* 74 */ + { 0, 9, 12, 0, 210, 18432, 74, }, /* 75 */ + { 0, 9, 12, 0, 206, 18432, 74, }, /* 76 */ +@@ -1155,7 +1155,7 @@ const ucd_record PRIV(ucd_records) = { /* 16908 bytes, record size 12 */ + { 69, 26, 14, 0, 0, 28672, 236, }, /* 727 */ + { 1, 9, 12, 96, -7517, 18432, 74, }, /* 728 */ + { 69, 26, 12, 0, 0, 28672, 118, }, /* 729 */ +- { 0, 9, 12, 100, -8383, 18432, 74, }, /* 730 */ ++ { 0, 9, 12, 100, 0, 18432, 74, }, /* 730 */ + { 0, 9, 12, 104, -8262, 18432, 74, }, /* 731 */ + { 69, 26, 12, 0, 0, 14336, 238, }, /* 732 */ + { 0, 9, 12, 0, 28, 18432, 74, }, /* 733 */ +diff --git a/src/pcre2test.c b/src/pcre2test.c +index 4fa5884..e768798 100644 +--- a/src/pcre2test.c ++++ b/src/pcre2test.c +@@ -653,6 +653,7 @@ static modstruct modlist = { + { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) }, + { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, + { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, ++ { "caseless_restrict", MOD_CTC, MOD_OPT, PCRE2_EXTRA_CASELESS_RESTRICT, CO(extra_options) }, + { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) }, + { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) }, + { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) }, +@@ -833,14 +834,15 @@ typedef struct c1modstruct { + } c1modstruct; + + static c1modstruct c1modlist = { +- { "bincode", 'B', -1 }, +- { "info", 'I', -1 }, +- { "global", 'g', -1 }, +- { "caseless", 'i', -1 }, +- { "multiline", 'm', -1 }, +- { "no_auto_capture", 'n', -1 }, +- { "dotall", 's', -1 }, +- { "extended", 'x', -1 } ++ { "bincode", 'B', -1 }, ++ { "info", 'I', -1 }, ++ { "global", 'g', -1 }, ++ { "caseless", 'i', -1 }, ++ { "multiline", 'm', -1 }, ++ { "no_auto_capture", 'n', -1 }, ++ { "caseless_restrict", 'r', -1 }, ++ { "dotall", 's', -1 }, ++ { "extended", 'x', -1 } + }; + + #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct) +@@ -4257,7 +4259,7 @@ show_compile_extra_options(uint32_t options, const char *before, + const char *after) + { + if (options == 0) fprintf(outfile, "%s <none>%s", before, after); +-else fprintf(outfile, "%s%s%s%s%s%s%s%s", ++else fprintf(outfile, "%s%s%s%s%s%s%s%s%s", + before, + ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "", + ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "", +@@ -4265,6 +4267,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s", + ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "", + ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "", + ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "", ++ ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "", + after); + } + +diff --git a/testdata/testinput5 b/testdata/testinput5 +index 6bd352f..b817423 100644 +--- a/testdata/testinput5 ++++ b/testdata/testinput5 +@@ -2212,4 +2212,101 @@ + + /\p{\2b:xäigi:t:_/ + ++# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without ++# the restriction. ++ ++/AskZ/i,utf,caseless_restrict ++ AskZ ++ aSKz ++\= Expect no match ++ A\x{17f}kZ ++ As\x{212a}Z ++ ++/AskZ/i,utf ++ AskZ ++ aSKz ++ A\x{17f}kZ ++ As\x{212a}Z ++ ++/A\x{17f}\x{212a}Z/ir,utf ++ \= Expect no match ++ AskZ ++ ++/A\x{17f}\x{212a}Z/i,utf ++ AskZ ++ ++/AskZ+/i,utf,caseless_restrict ++ AskZ ++ aSKz ++ A\x{17f}kZ ++ As\x{212a}Z ++ ++/AskZ+/i,utf ++ AskZ ++ aSKz ++ A\x{17f}kZ ++ As\x{212a}Z ++ ++/\x{17f}\x{212a}+/ir,utf ++\= Expect no match ++ AskZ ++ ++/\x{17f}\x{212a}+/i,utf ++ AskZ ++ ++/^s+/ir,utf ++ A\x{17f}Z ++ ++/^s+/i,utf ++ A\x{17f}Z ++ ++/^k+/ir,utf ++ A\x{212a}Z ++ ++/^k+/i,utf ++ A\x{212a}Z ++ ++/^sk+/ir,utf ++ A\x{17f}\x{212a}Z ++ ++/^sk+/i,utf ++ A\x{17f}\x{212a}Z ++ ++/^\x{17f}+/ir,utf ++ AsSZ ++ ++/^\x{17f}+/i,utf ++ AsSZ ++ ++/Ss+/irB,utf ++ Sss\x{17f}ss ++ ++/Ss+/iB,utf ++ Sss\x{17f}ss ++ ++/S\x{17f}/irB,utf ++ ++/S\x{17f}/iB,utf ++ ++/\x{17f}s/irB,utf ++ ++/\x{17f}s/iB,utf ++ ++/\x{4b}\x{6b}/irB,utf ++ ++/\x{4b}\x{6b}/iB,utf ++ ++/s(?r)s(?-r)s(?r:s)s/i,utf ++ \x{17f}S\x{17f}S\x{17f} ++\= Expect no match ++ \x{17f}\x{17f}\x{17f}S\x{17f} ++ \x{17f}S\x{17f}\x{17f}\x{17f} ++ ++/k(?^i)k/ir,utf ++ K\x{212a} ++\= Expect no match ++ \x{212a}\x{212a} ++ ++# End caseless restrict tests ++ + # End of testinput5 +diff --git a/testdata/testinput7 b/testdata/testinput7 +index 2d90b41..991de88 100644 +--- a/testdata/testinput7 ++++ b/testdata/testinput7 +@@ -2231,4 +2231,101 @@ + /\p{sc:katakana}{3,}?/utf + \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66}\x{3001}ABC + ++# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without ++# the restriction. ++ ++/AskZ/i,utf,caseless_restrict ++ AskZ ++ aSKz ++\= Expect no match ++ A\x{17f}kZ ++ As\x{212a}Z ++ ++/AskZ/i,utf ++ AskZ ++ aSKz ++ A\x{17f}kZ ++ As\x{212a}Z ++ ++/A\x{17f}\x{212a}Z/ir,utf ++ \= Expect no match ++ AskZ ++ ++/A\x{17f}\x{212a}Z/i,utf ++ AskZ ++ ++/AskZ+/i,utf,caseless_restrict ++ AskZ ++ aSKz ++ A\x{17f}kZ ++ As\x{212a}Z ++ ++/AskZ+/i,utf ++ AskZ ++ aSKz ++ A\x{17f}kZ ++ As\x{212a}Z ++ ++/\x{17f}\x{212a}+/ir,utf ++\= Expect no match ++ AskZ ++ ++/\x{17f}\x{212a}+/i,utf ++ AskZ ++ ++/^s+/ir,utf ++ A\x{17f}Z ++ ++/^s+/i,utf ++ A\x{17f}Z ++ ++/^k+/ir,utf ++ A\x{212a}Z ++ ++/^k+/i,utf ++ A\x{212a}Z ++ ++/^sk+/ir,utf ++ A\x{17f}\x{212a}Z ++ ++/^sk+/i,utf ++ A\x{17f}\x{212a}Z ++ ++/^\x{17f}+/ir,utf ++ AsSZ ++ ++/^\x{17f}+/i,utf ++ AsSZ ++ ++/Ss+/irB,utf ++ Sss\x{17f}ss ++ ++/Ss+/iB,utf ++ Sss\x{17f}ss ++ ++/S\x{17f}/irB,utf ++ ++/S\x{17f}/iB,utf ++ ++/\x{17f}s/irB,utf ++ ++/\x{17f}s/iB,utf ++ ++/\x{4b}\x{6b}/irB,utf ++ ++/\x{4b}\x{6b}/iB,utf ++ ++/s(?r)s(?-r)s(?r:s)s/i,utf ++ \x{17f}S\x{17f}S\x{17f} ++\= Expect no match ++ \x{17f}\x{17f}\x{17f}S\x{17f} ++ \x{17f}S\x{17f}\x{17f}\x{17f} ++ ++/k(?^i)k/ir,utf ++ K\x{212a} ++\= Expect no match ++ \x{212a}\x{212a} ++ ++# End caseless restrict tests ++ + # End of testinput7 +diff --git a/testdata/testoutput5 b/testdata/testoutput5 +index 2c3fe94..db42a11 100644 +--- a/testdata/testoutput5 ++++ b/testdata/testoutput5 +@@ -5016,4 +5016,184 @@ Failed: error 147 at offset 8: unknown property after \P or \p + /\p{\2b:xäigi:t:_/ + Failed: error 146 at offset 17: malformed \P or \p sequence + ++# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without ++# the restriction. ++ ++/AskZ/i,utf,caseless_restrict ++ AskZ ++ 0: AskZ ++ aSKz ++ 0: aSKz ++\= Expect no match ++ A\x{17f}kZ ++No match ++ As\x{212a}Z ++No match ++ ++/AskZ/i,utf ++ AskZ ++ 0: AskZ ++ aSKz ++ 0: aSKz ++ A\x{17f}kZ ++ 0: A\x{17f}kZ ++ As\x{212a}Z ++ 0: As\x{212a}Z ++ ++/A\x{17f}\x{212a}Z/ir,utf ++ \= Expect no match ++ AskZ ++No match ++ ++/A\x{17f}\x{212a}Z/i,utf ++ AskZ ++ 0: AskZ ++ ++/AskZ+/i,utf,caseless_restrict ++ AskZ ++ 0: AskZ ++ aSKz ++ 0: aSKz ++ A\x{17f}kZ ++ 0: A ++ As\x{212a}Z ++ 0: As ++ ++/AskZ+/i,utf ++ AskZ ++ 0: AskZ ++ aSKz ++ 0: aSKz ++ A\x{17f}kZ ++ 0: A\x{17f}kZ ++ As\x{212a}Z ++ 0: As\x{212a}Z ++ ++/\x{17f}\x{212a}+/ir,utf ++\= Expect no match ++ AskZ ++No match ++ ++/\x{17f}\x{212a}+/i,utf ++ AskZ ++ 0: sk ++ ++/^s+/ir,utf ++ A\x{17f}Z ++ 0: A\x{17f}Z ++ ++/^s+/i,utf ++ A\x{17f}Z ++ 0: A ++ ++/^k+/ir,utf ++ A\x{212a}Z ++ 0: A\x{212a}Z ++ ++/^k+/i,utf ++ A\x{212a}Z ++ 0: A ++ ++/^sk+/ir,utf ++ A\x{17f}\x{212a}Z ++ 0: A\x{17f}\x{212a}Z ++ ++/^sk+/i,utf ++ A\x{17f}\x{212a}Z ++ 0: A ++ ++/^\x{17f}+/ir,utf ++ AsSZ ++ 0: AsSZ ++ ++/^\x{17f}+/i,utf ++ AsSZ ++ 0: A ++ ++/Ss+/irB,utf ++------------------------------------------------------------------ ++ Bra ++ /i S++ ++ Ket ++ End ++------------------------------------------------------------------ ++ Sss\x{17f}ss ++ 0: Sss ++ ++/Ss+/iB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f}\x{17f}++ ++ Ket ++ End ++------------------------------------------------------------------ ++ Sss\x{17f}ss ++ 0: Sss\x{17f}ss ++ ++/S\x{17f}/irB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/S\x{17f}/iB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f}\x{17f} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\x{17f}s/irB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\x{17f}s/iB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f}\x{17f} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\x{4b}\x{6b}/irB,utf ++------------------------------------------------------------------ ++ Bra ++ /i K ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\x{4b}\x{6b}/iB,utf ++------------------------------------------------------------------ ++ Bra ++ Kk\x{212a}\x{212a} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/s(?r)s(?-r)s(?r:s)s/i,utf ++ \x{17f}S\x{17f}S\x{17f} ++ 0: \x{17f}S\x{17f}S\x{17f} ++\= Expect no match ++ \x{17f}\x{17f}\x{17f}S\x{17f} ++No match ++ \x{17f}S\x{17f}\x{17f}\x{17f} ++No match ++ ++/k(?^i)k/ir,utf ++ K\x{212a} ++ 0: K\x{212a} ++\= Expect no match ++ \x{212a}\x{212a} ++No match ++ ++# End caseless restrict tests ++ + # End of testinput5 +diff --git a/testdata/testoutput7 b/testdata/testoutput7 +index 6e71fc8..c2291a1 100644 +--- a/testdata/testoutput7 ++++ b/testdata/testoutput7 +@@ -3756,4 +3756,184 @@ No match + 1: \x{30a1}\x{30fa}\x{32d0}\x{1b122} + 2: \x{30a1}\x{30fa}\x{32d0} + ++# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without ++# the restriction. ++ ++/AskZ/i,utf,caseless_restrict ++ AskZ ++ 0: AskZ ++ aSKz ++ 0: aSKz ++\= Expect no match ++ A\x{17f}kZ ++No match ++ As\x{212a}Z ++No match ++ ++/AskZ/i,utf ++ AskZ ++ 0: AskZ ++ aSKz ++ 0: aSKz ++ A\x{17f}kZ ++ 0: A\x{17f}kZ ++ As\x{212a}Z ++ 0: As\x{212a}Z ++ ++/A\x{17f}\x{212a}Z/ir,utf ++ \= Expect no match ++ AskZ ++No match ++ ++/A\x{17f}\x{212a}Z/i,utf ++ AskZ ++ 0: AskZ ++ ++/AskZ+/i,utf,caseless_restrict ++ AskZ ++ 0: AskZ ++ aSKz ++ 0: aSKz ++ A\x{17f}kZ ++ 0: A ++ As\x{212a}Z ++ 0: As ++ ++/AskZ+/i,utf ++ AskZ ++ 0: AskZ ++ aSKz ++ 0: aSKz ++ A\x{17f}kZ ++ 0: A\x{17f}kZ ++ As\x{212a}Z ++ 0: As\x{212a}Z ++ ++/\x{17f}\x{212a}+/ir,utf ++\= Expect no match ++ AskZ ++No match ++ ++/\x{17f}\x{212a}+/i,utf ++ AskZ ++ 0: sk ++ ++/^s+/ir,utf ++ A\x{17f}Z ++ 0: A\x{17f}Z ++ ++/^s+/i,utf ++ A\x{17f}Z ++ 0: A ++ ++/^k+/ir,utf ++ A\x{212a}Z ++ 0: A\x{212a}Z ++ ++/^k+/i,utf ++ A\x{212a}Z ++ 0: A ++ ++/^sk+/ir,utf ++ A\x{17f}\x{212a}Z ++ 0: A\x{17f}\x{212a}Z ++ ++/^sk+/i,utf ++ A\x{17f}\x{212a}Z ++ 0: A ++ ++/^\x{17f}+/ir,utf ++ AsSZ ++ 0: AsSZ ++ ++/^\x{17f}+/i,utf ++ AsSZ ++ 0: A ++ ++/Ss+/irB,utf ++------------------------------------------------------------------ ++ Bra ++ /i S++ ++ Ket ++ End ++------------------------------------------------------------------ ++ Sss\x{17f}ss ++ 0: Sss ++ ++/Ss+/iB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f}\x{17f}++ ++ Ket ++ End ++------------------------------------------------------------------ ++ Sss\x{17f}ss ++ 0: Sss\x{17f}ss ++ ++/S\x{17f}/irB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/S\x{17f}/iB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f}\x{17f} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\x{17f}s/irB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\x{17f}s/iB,utf ++------------------------------------------------------------------ ++ Bra ++ Ss\x{17f}\x{17f} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\x{4b}\x{6b}/irB,utf ++------------------------------------------------------------------ ++ Bra ++ /i K ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/\x{4b}\x{6b}/iB,utf ++------------------------------------------------------------------ ++ Bra ++ Kk\x{212a}\x{212a} ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/s(?r)s(?-r)s(?r:s)s/i,utf ++ \x{17f}S\x{17f}S\x{17f} ++ 0: \x{17f}S\x{17f}S\x{17f} ++\= Expect no match ++ \x{17f}\x{17f}\x{17f}S\x{17f} ++No match ++ \x{17f}S\x{17f}\x{17f}\x{17f} ++No match ++ ++/k(?^i)k/ir,utf ++ K\x{212a} ++ 0: K\x{212a} ++\= Expect no match ++ \x{212a}\x{212a} ++No match ++ ++# End caseless restrict tests ++ + # End of testinput7 +diff --git a/testdata/testoutput8-16-2 b/testdata/testoutput8-16-2 +index 569a860..49b1022 100644 +--- a/testdata/testoutput8-16-2 ++++ b/testdata/testoutput8-16-2 +@@ -838,7 +838,7 @@ Memory allocation (code space): 14 + /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| + ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + /parens_nest_limit=1000,-fullbincode +-Failed: error 184 at offset 1504: (?| and/or (?J: or (?x: parentheses are too deeply nested ++Failed: error 184 at offset 1129: (?| and/or (?J: or (?x: parentheses are too deeply nested + + # Use "expand" to create some very long patterns with nested parentheses, in + # order to test workspace overflow. Again, this varies with code unit width, +diff --git a/testdata/testoutput8-8-2 b/testdata/testoutput8-8-2 +index 8393d5c..e9568e5 100644 +--- a/testdata/testoutput8-8-2 ++++ b/testdata/testoutput8-8-2 +@@ -838,7 +838,7 @@ Memory allocation (code space): 10 + /(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| + ))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + /parens_nest_limit=1000,-fullbincode +-Failed: error 184 at offset 1504: (?| and/or (?J: or (?x: parentheses are too deeply nested ++Failed: error 184 at offset 1129: (?| and/or (?J: or (?x: parentheses are too deeply nested + + # Use "expand" to create some very long patterns with nested parentheses, in + # order to test workspace overflow. Again, this varies with code unit width, +-- +2.23.0 +
View file
_service
Changed
@@ -2,7 +2,7 @@ <service name="tar_scm"> <param name="scm">git</param> <param name="url">git@gitee.com:src-openeuler/pcre2.git</param> - <param name="revision">openEuler-24.03-LTS-Next</param> + <param name="revision">openEuler-24.03-LTS-SP1</param> <param name="exclude">*</param> <param name="extract">*</param> </service>
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2