Projects
openEuler:Mainline
pcre2
Sign Up
Log In
Username
Password
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 2
View file
_service:tar_scm:pcre2.spec
Changed
@@ -1,16 +1,21 @@ Name: pcre2 -Version: 10.39 -Release: 2 +Version: 10.40 +Release: 3 Summary: Perl Compatible Regular Expressions License: BSD URL: http://www.pcre.org/ -Source0: https://ftp.pcre.org/pub/pcre/%{name}-%{version}.tar.bz2 +Source0: https://github.com/PCRE2Project/pcre2/releases/download/%{name}-%{version}/%{name}-%{version}.tar.bz2 # Do no set RPATH if libdir is not /usr/lib -Patch0: backport-pcre2-10.10-Fix-multilib.patch -Patch1: backport-CVE-2022-1586-1.patch -Patch2: backport-CVE-2022-1586-2.patch -Patch3: backport-CVE-2022-1587.patch +Patch6000: backport-pcre2-10.10-Fix-multilib.patch +Patch6010: backport-doc-avoid-nonexistent-PCRE2_ERROR_MEMORY-error-107.patch +Patch6011: backport-Update-HTML-docs.patch +Patch6012: backport-Fixed-race-condition-that-occurs-when-initializing-t.patch +Patch6013: backport-Change-length-variables-in-pcre2grep-from-int-to-siz.patch +Patch6014: backport-Add-an-ifdef-to-avoid-the-need-even-to-link-with-pcr.patch +Patch6015: backport-Fixed-an-issue-in-the-backtracking-optimization-of-c.patch +Patch6016: backport-jit-fail-early-in-ffcps_-if-subject-shorter-than-off.patch +Patch6017: backport-jit-fix-pcre2_jit_free_unused_memory-if-sljit-not-us.patch BuildRequires: autoconf libtool automake coreutils gcc make readline-devel Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools @@ -57,7 +62,7 @@ %build %configure \ -%ifarch riscv64 +%ifarch riscv64 sw_64 loongarch64 --disable-jit \ %else --enable-jit \ @@ -127,6 +132,27 @@ %{_pkgdocdir}/html/ %changelog +* Thu Mar 16 2023 yangmingtai <yangmingtai@huawei.com> - 10.40-3 +- DESC:sync community patches + +* Sat Nov 26 2022 huyubiao <huyubiao@huawei.com> - 10.40-2 +- Update the Source0 URL. + +* Fri Nov 18 2022 dillon chen <dillon.chen@gmail.com> - 10.40-1 +- update to 10.40 + +* Mon Nov 14 2022 zhaozhen <zhaozhen@loongson.cn> - 10.39-5 +- Type:enhancement +- ID:NA +- SUG:NA +- DESC:fix build error for loongarch64 + +* Thu Nov 3 2022 wuzx<wuzx1226@qq.com> - 10.39-4 +- Add sw64 architecture + +* Tue Oct 18 2022 yangmingtai <yangmingtai@huawei.com> - 10.39-3 +- DESC:sync community patchs + * Sat May 28 2022 yangmingtai <yangmingtai@huawei.com> - 10.39-2 - DESC:fix CVE-2022-1586 and CVE-2022-1587
View file
_service:tar_scm:backport-Add-an-ifdef-to-avoid-the-need-even-to-link-with-pcr.patch
Added
@@ -0,0 +1,57 @@ +From 4804b00e8f50e446be43ee51863c8cdd3c743bea Mon Sep 17 00:00:00 2001 +From: Philip Hazel <Philip.Hazel@gmail.com> +Date: Thu, 30 Jun 2022 17:37:51 +0100 +Subject: PATCH Add an #ifdef to avoid the need even to link with + pcre2_jit_compile.o when JIT is not supported + +Conflict:delete changelog +Reference:https://github.com/PCRE2Project/pcre2/commit/4804b00e8f50e446be43ee51863c8cdd3c743bea +--- + src/pcre2_compile.c | 2 ++ + src/pcre2test.c | 4 ++-- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c +index 383159b..b906dc0 100644 +--- a/src/pcre2_compile.c ++++ b/src/pcre2_compile.c +@@ -1264,8 +1264,10 @@ PCRE2_SIZE* ref_count; + + if (code != NULL) + { ++#ifdef SUPPORT_JIT + if (code->executable_jit != NULL) + PRIV(jit_free)(code->executable_jit, &code->memctl); ++#endif + + if ((code->flags & PCRE2_DEREF_TABLES) != 0) + { +diff --git a/src/pcre2test.c b/src/pcre2test.c +index 84987d7..11ec3ca 100644 +--- a/src/pcre2test.c ++++ b/src/pcre2test.c +@@ -4737,19 +4737,19 @@ if ((pat_patctl.control & CTL_INFO) != 0) + + if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) + { ++#ifdef SUPPORT_JIT + if (FLD(compiled_code, executable_jit) != NULL) + fprintf(outfile, "JIT compilation was successful\n"); + else + { +-#ifdef SUPPORT_JIT + fprintf(outfile, "JIT compilation was not successful"); + if (jitrc != 0 && !print_error_message(jitrc, " (", ")")) + return PR_ABEND; + fprintf(outfile, "\n"); ++ } + #else + fprintf(outfile, "JIT support is not available in this version of PCRE2\n"); + #endif +- } + } + } + +-- +2.27.0 +
View file
_service:tar_scm:backport-CVE-2022-1586-1.patch
Deleted
@@ -1,53 +0,0 @@ -From 50a51cb7e67268e6ad417eb07c9de9bfea5cc55a Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg <hzmester@freemail.hu> -Date: Wed, 23 Mar 2022 07:53:25 +0000 -Subject: PATCH Fixed a unicode properrty matching issue in JIT - ---- - ChangeLog | 3 +++ - src/pcre2_jit_compile.c | 2 +- - src/pcre2_jit_test.c | 1 + - 3 files changed, 5 insertions(+), 1 deletion(-) - -diff --git a/ChangeLog b/ChangeLog -index d27542d..cd3da65 100644 ---- a/ChangeLog -+++ b/ChangeLog -@@ -63,6 +63,9 @@ Version 10.39 29-October-2021 - - Reformat slightly to make it C89 compatible again. - -+23. Fixed a unicode properrty matching issue in JIT. The character was not -+fully read in caseless matching. -+ - - Version 10.38 01-October-2021 - ----------------------------- -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index db2ce65..5baca9b 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -7473,7 +7473,7 @@ while (*cc != XCL_END) - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; -- if (*cc == PT_CLIST) -+ if (*cc == PT_CLIST && *cc == XCL_PROP) - { - other_cases = PRIV(ucd_caseless_sets) + cc1; - while (*other_cases != NOTACHAR) -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index 8dee16e..bf94c6d 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -412,6 +412,7 @@ static struct regression_test_case regression_test_cases = { - { MUP, A, 0, 0 | F_PROPERTY, "\\P{L&}{2}^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" }, - { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "a-b\\s{2,5}^a", "AB baaa" }, - { MUP, 0, 0, 0 | F_NOMATCH, "^\\p{Hangul}\\p{Z}", " " }, -+ { CMUP, 0, 0, 0, "^S\\B", "\xe2\x80\x8a" }, - - /* Possible empty brackets. */ - { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, --- -2.27.0 -
View file
_service:tar_scm:backport-CVE-2022-1586-2.patch
Deleted
@@ -1,25 +0,0 @@ -From d4fa336fbcc388f89095b184ba6d99422cfc676c Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg <hzmester@freemail.hu> -Date: Thu, 24 Mar 2022 05:34:42 +0000 -Subject: PATCH Fix incorrect value reading in JIT. - ---- - src/pcre2_jit_compile.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 94f6a58..7fcdac8 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -7489,7 +7489,7 @@ while (*cc != XCL_END) - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; -- if (*cc == PT_CLIST && *cc == XCL_PROP) -+ if (*cc == PT_CLIST && cc-1 == XCL_PROP) - { - other_cases = PRIV(ucd_caseless_sets) + cc1; - while (*other_cases != NOTACHAR) --- -2.27.0 -
View file
_service:tar_scm:backport-CVE-2022-1587.patch
Deleted
@@ -1,656 +0,0 @@ -From 03654e751e7f0700693526b67dfcadda6b42c9d0 Mon Sep 17 00:00:00 2001 -From: Zoltan Herczeg <hzmester@freemail.hu> -Date: Sat, 26 Mar 2022 07:55:50 +0000 -Subject: PATCH Fixed an issue affecting recursions in JIT - ---- - ChangeLog | 3 + - src/pcre2_jit_compile.c | 290 ++++++++++++++++++++++++++-------------- - src/pcre2_jit_test.c | 1 + - 3 files changed, 194 insertions(+), 100 deletions(-) - -diff --git a/ChangeLog b/ChangeLog -index cd3da65..4e8815d 100644 ---- a/ChangeLog -+++ b/ChangeLog -@@ -66,6 +66,9 @@ Version 10.39 29-October-2021 - 23. Fixed a unicode properrty matching issue in JIT. The character was not - fully read in caseless matching. - -+24. Fixed an issue affecting recursions in JIT caused by duplicated data -+transfers. -+ - - Version 10.38 01-October-2021 - ----------------------------- -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 5baca9b..08df1cf 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -413,6 +413,9 @@ typedef struct compiler_common { - /* Locals used by fast fail optimization. */ - sljit_s32 early_fail_start_ptr; - sljit_s32 early_fail_end_ptr; -+ /* Variables used by recursive call generator. */ -+ sljit_s32 recurse_bitset_size; -+ uint8_t *recurse_bitset; - - /* Flipped and lower case tables. */ - const sljit_u8 *fcc; -@@ -2315,19 +2318,39 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) - - #undef RECURSE_TMP_REG_COUNT - -+static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index) -+{ -+uint8_t *byte; -+uint8_t mask; -+ -+SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0); -+ -+bit_index >>= SLJIT_WORD_SHIFT; -+ -+mask = 1 << (bit_index & 0x7); -+byte = common->recurse_bitset + (bit_index >> 3); -+ -+if (*byte & mask) -+ return FALSE; -+ -+*byte |= mask; -+return TRUE; -+} -+ - static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, - BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept) - { - int length = 1; --int size; -+int size, offset; - PCRE2_SPTR alternative; - BOOL quit_found = FALSE; - BOOL accept_found = FALSE; - BOOL setsom_found = FALSE; - BOOL setmark_found = FALSE; --BOOL capture_last_found = FALSE; - BOOL control_head_found = FALSE; - -+memset(common->recurse_bitset, 0, common->recurse_bitset_size); -+ - #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD - SLJIT_ASSERT(common->control_head_ptr != 0); - control_head_found = TRUE; -@@ -2350,15 +2373,17 @@ while (cc < ccend) - setsom_found = TRUE; - if (common->mark_ptr != 0) - setmark_found = TRUE; -- if (common->capture_last_ptr != 0) -- capture_last_found = TRUE; -+ if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) -+ length++; - cc += 1 + LINK_SIZE; - break; - - case OP_KET: -- if (PRIVATE_DATA(cc) != 0) -+ offset = PRIVATE_DATA(cc); -+ if (offset != 0) - { -- length++; -+ if (recurse_check_bit(common, offset)) -+ length++; - SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); - cc += PRIVATE_DATA(cc + 1); - } -@@ -2377,39 +2402,55 @@ while (cc < ccend) - case OP_SBRA: - case OP_SBRAPOS: - case OP_SCOND: -- length++; - SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); -+ if (recurse_check_bit(common, PRIVATE_DATA(cc))) -+ length++; - cc += 1 + LINK_SIZE; - break; - - case OP_CBRA: - case OP_SCBRA: -- length += 2; -- if (common->capture_last_ptr != 0) -- capture_last_found = TRUE; -- if (common->optimized_cbracketGET2(cc, 1 + LINK_SIZE) == 0) -+ offset = GET2(cc, 1 + LINK_SIZE); -+ if (recurse_check_bit(common, OVECTOR(offset << 1))) -+ { -+ SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); -+ length += 2; -+ } -+ if (common->optimized_cbracketoffset == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset))) -+ length++; -+ if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) - length++; - cc += 1 + LINK_SIZE + IMM2_SIZE; - break; - - case OP_CBRAPOS: - case OP_SCBRAPOS: -- length += 2 + 2; -- if (common->capture_last_ptr != 0) -- capture_last_found = TRUE; -+ offset = GET2(cc, 1 + LINK_SIZE); -+ if (recurse_check_bit(common, OVECTOR(offset << 1))) -+ { -+ SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); -+ length += 2; -+ } -+ if (recurse_check_bit(common, OVECTOR_PRIV(offset))) -+ length++; -+ if (recurse_check_bit(common, PRIVATE_DATA(cc))) -+ length++; -+ if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) -+ length++; - cc += 1 + LINK_SIZE + IMM2_SIZE; - break; - - case OP_COND: - /* Might be a hidden SCOND. */ - alternative = cc + GET(cc, 1); -- if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) -+ if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc))) - length++; - cc += 1 + LINK_SIZE; - break; - - CASE_ITERATOR_PRIVATE_DATA_1 -- if (PRIVATE_DATA(cc) != 0) -+ offset = PRIVATE_DATA(cc); -+ if (offset != 0 && recurse_check_bit(common, offset)) - length++; - cc += 2; - #ifdef SUPPORT_UNICODE -@@ -2418,8 +2459,12 @@ while (cc < ccend) - break; - - CASE_ITERATOR_PRIVATE_DATA_2A -- if (PRIVATE_DATA(cc) != 0) -+ offset = PRIVATE_DATA(cc); -+ if (offset != 0 && recurse_check_bit(common, offset)) -+ { -+ SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); - length += 2; -+ } - cc += 2; - #ifdef SUPPORT_UNICODE - if (common->utf && HAS_EXTRALEN(cc-1)) cc += GET_EXTRALEN(cc-1); -@@ -2427,8 +2472,12 @@ while (cc < ccend) - break; - - CASE_ITERATOR_PRIVATE_DATA_2B -- if (PRIVATE_DATA(cc) != 0) -+ offset = PRIVATE_DATA(cc); -+ if (offset != 0 && recurse_check_bit(common, offset)) -+ { -+ SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); - length += 2; -+ } - cc += 2 + IMM2_SIZE; - #ifdef SUPPORT_UNICODE - if (common->utf && HAS_EXTRALEN(cc-1)) cc += GET_EXTRALEN(cc-1); -@@ -2436,20 +2485,29 @@ while (cc < ccend) - break;
View file
_service:tar_scm:backport-Change-length-variables-in-pcre2grep-from-int-to-siz.patch
Added
@@ -0,0 +1,110 @@ +From 7549fdca744378b9e6597c2882a490308573368b Mon Sep 17 00:00:00 2001 +From: Philip Hazel <Philip.Hazel@gmail.com> +Date: Thu, 30 Jun 2022 17:06:32 +0100 +Subject: PATCH Change length variables in pcre2grep from int to size_t + +Conflict:delete Changelog +Reference:https://github.com/PCRE2Project/pcre2/commit/7549fdca744378b9e6597c2882a490308573368b +--- + src/pcre2grep.c | 28 +++++++++++++++------------- + 1 file changed, 15 insertions(+), 13 deletions(-) + +diff --git a/src/pcre2grep.c b/src/pcre2grep.c +index 519346b..1484d67 100644 +--- a/src/pcre2grep.c ++++ b/src/pcre2grep.c +@@ -232,15 +232,16 @@ static int after_context = 0; + static int before_context = 0; + static int binary_files = BIN_BINARY; + static int both_context = 0; +-static int bufthird = PCRE2GREP_BUFSIZE; +-static int max_bufthird = PCRE2GREP_MAX_BUFSIZE; +-static int bufsize = 3*PCRE2GREP_BUFSIZE; + static int endlinetype; + + static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */ + static unsigned long int counts_printed = 0; + static unsigned long int total_count = 0; + ++static PCRE2_SIZE bufthird = PCRE2GREP_BUFSIZE; ++static PCRE2_SIZE max_bufthird = PCRE2GREP_MAX_BUFSIZE; ++static PCRE2_SIZE bufsize = 3*PCRE2GREP_BUFSIZE; ++ + #ifdef WIN32 + static int dee_action = dee_SKIP; + #else +@@ -433,8 +434,8 @@ static option_item optionlist = { + { OP_NODATA, 'a', NULL, "text", "treat binary files as text" }, + { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, + { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" }, +- { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" }, +- { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" }, ++ { OP_SIZE, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" }, ++ { OP_SIZE, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" }, + { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, + { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, + { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, +@@ -1419,10 +1420,10 @@ Returns: the number of characters read, zero at end of file + */ + + static PCRE2_SIZE +-read_one_line(char *buffer, int length, FILE *f) ++read_one_line(char *buffer, PCRE2_SIZE length, FILE *f) + { + int c; +-int yield = 0; ++PCRE2_SIZE yield = 0; + while ((c = fgetc(f)) != EOF) + { + bufferyield++ = c; +@@ -2468,8 +2469,8 @@ return result != 0; + * Read a portion of the file into buffer * + *************************************************/ + +-static int +-fill_buffer(void *handle, int frtype, char *buffer, int length, ++static PCRE2_SIZE ++fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length, + BOOL input_line_buffered) + { + (void)frtype; /* Avoid warning when not used */ +@@ -2631,7 +2632,7 @@ while (ptr < endptr) + if (bufthird < max_bufthird) + { + char *new_buffer; +- int new_bufthird = 2*bufthird; ++ PCRE2_SIZE new_bufthird = 2*bufthird; + + if (new_bufthird > max_bufthird) new_bufthird = max_bufthird; + new_buffer = (char *)malloc(3*new_bufthird); +@@ -2640,7 +2641,8 @@ while (ptr < endptr) + { + fprintf(stderr, + "pcre2grep: line %lu%s%s is too long for the internal buffer\n" +- "pcre2grep: not enough memory to increase the buffer size to %d\n", ++ "pcre2grep: not enough memory to increase the buffer size to %" ++ SIZ_FORM "\n", + linenumber, + (filename == NULL)? "" : " of file ", + (filename == NULL)? "" : filename, +@@ -2670,7 +2672,7 @@ while (ptr < endptr) + { + fprintf(stderr, + "pcre2grep: line %lu%s%s is too long for the internal buffer\n" +- "pcre2grep: the maximum buffer size is %d\n" ++ "pcre2grep: the maximum buffer size is %" SIZ_FORM "\n" + "pcre2grep: use the --max-buffer-size option to change it\n", + linenumber, + (filename == NULL)? "" : " of file ", +@@ -3153,7 +3155,7 @@ while (ptr < endptr) + + if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize) + { +- int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in); ++ PCRE2_SIZE add = read_one_line(ptr, bufsize - (ptr - main_buffer), in); + bufflength += add; + endptr += add; + } +-- +2.27.0 +
View file
_service:tar_scm:backport-Fixed-an-issue-in-the-backtracking-optimization-of-c.patch
Added
@@ -0,0 +1,62 @@ +From 4851890ede31313655e19180f4959ed348fee580 Mon Sep 17 00:00:00 2001 +From: Zoltan Herczeg <zherczeg.u-szeged@partner.samsung.com> +Date: Thu, 14 Jul 2022 05:25:39 +0200 +Subject: PATCH Fixed an issue in the backtracking optimization of character + repeats in JIT (#135) + +Conflict:adjust changelog +Reference:https://github.com/PCRE2Project/pcre2/commit/4851890ede31313655e19180f4959ed348fee580 +--- + src/pcre2_jit_compile.c | 20 ++++++++++---------- + src/pcre2_jit_test.c | 1 + + 2 files changed, 11 insertions(+), 10 deletions(-) + +diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c +index 8f6c090..75ba610 100644 +--- a/src/pcre2_jit_compile.c ++++ b/src/pcre2_jit_compile.c +@@ -11357,19 +11357,19 @@ if (exact > 1) + } + } + else if (exact == 1) +- { + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); + +- if (early_fail_type == type_fail_range) +- { +- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr); +- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw)); +- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); +- OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); +- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); ++if (early_fail_type == type_fail_range) ++ { ++ /* Range end first, followed by range start. */ ++ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr); ++ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw)); ++ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); ++ OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); ++ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); + +- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0); +- } ++ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); ++ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0); + } + + switch(opcode) +diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c +index f7b9f6d..e1f0bbc 100644 +--- a/src/pcre2_jit_test.c ++++ b/src/pcre2_jit_test.c +@@ -354,6 +354,7 @@ static struct regression_test_case regression_test_cases = { + { MU, A, 0, 0, "_ab+_*a", "_aa" }, + { MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" }, + { MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" }, ++ { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" }, + + /* Bracket repeats with limit. */ + { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" }, +-- +2.27.0 +
View file
_service:tar_scm:backport-Fixed-race-condition-that-occurs-when-initializing-t.patch
Added
@@ -0,0 +1,56 @@ +From 45af1203bdb5d1ccccc27526ce38c36f49196ccc Mon Sep 17 00:00:00 2001 +From: larinsv <97248465+larinsv@users.noreply.github.com> +Date: Wed, 18 May 2022 13:16:00 +0300 +Subject: PATCH Fixed race condition that occurs when initializing the + executable_allocator_is_working variable in the pcre2_jit_compile function + (#91) + +Conflict:NA +Reference:https://github.com/PCRE2Project/pcre2/commit/45af1203bdb5d1ccccc27526ce38c36f49196ccc +--- + src/pcre2_jit_compile.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c +index b6b1313..e638c24 100644 +--- a/src/pcre2_jit_compile.c ++++ b/src/pcre2_jit_compile.c +@@ -14384,7 +14384,7 @@ pcre2_jit_compile(pcre2_code *code, uint32_t options) + pcre2_real_code *re = (pcre2_real_code *)code; + #ifdef SUPPORT_JIT + executable_functions *functions; +-static int executable_allocator_is_working = 0; ++static int executable_allocator_is_working = -1; + #endif + + if (code == NULL) +@@ -14447,23 +14447,21 @@ return PCRE2_ERROR_JIT_BADOPTION; + + if ((re->flags & PCRE2_NOJIT) != 0) return 0; + +-if (executable_allocator_is_working == 0) ++if (executable_allocator_is_working == -1) + { + /* Checks whether the executable allocator is working. This check + might run multiple times in multi-threaded environments, but the + result should not be affected by it. */ + void *ptr = SLJIT_MALLOC_EXEC(32, NULL); +- +- executable_allocator_is_working = -1; +- + if (ptr != NULL) + { + SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL); + executable_allocator_is_working = 1; + } ++ else executable_allocator_is_working = 0; + } + +-if (executable_allocator_is_working < 0) ++if (!executable_allocator_is_working) + return PCRE2_ERROR_NOMEMORY; + + if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) +-- +2.27.0 +
View file
_service:tar_scm:backport-Update-HTML-docs.patch
Added
@@ -0,0 +1,55 @@ +From b52d055d1b8feb6e56804c2062de65d50a5601e2 Mon Sep 17 00:00:00 2001 +From: Philip Hazel <Philip.Hazel@gmail.com> +Date: Fri, 22 Apr 2022 18:02:14 +0100 +Subject: PATCH Update HTML docs + +Conflict:NA +Reference:https://github.com/PCRE2Project/pcre2/commit/b52d055d1b8feb6e56804c2062de65d50a5601e2 +--- + doc/html/pcre2_serialize_decode.html | 2 +- + doc/html/pcre2serialize.html | 2 +- + doc/pcre2.txt | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/doc/html/pcre2_serialize_decode.html b/doc/html/pcre2_serialize_decode.html +index cff6e6c..618ffa9 100644 +--- a/doc/html/pcre2_serialize_decode.html ++++ b/doc/html/pcre2_serialize_decode.html +@@ -48,7 +48,7 @@ the following negative error codes: + PCRE2_ERROR_BADDATA <i>number_of_codes</i> is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in <i>bytes</i> + PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version +- PCRE2_ERROR_MEMORY memory allocation failed ++ PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_NULL <i>codes</i> or <i>bytes</i> is NULL + </pre> + PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +diff --git a/doc/html/pcre2serialize.html b/doc/html/pcre2serialize.html +index df4098e..a492305 100644 +--- a/doc/html/pcre2serialize.html ++++ b/doc/html/pcre2serialize.html +@@ -94,7 +94,7 @@ of serialized patterns, or one of the following negative error codes: + <pre> + PCRE2_ERROR_BADDATA the number of patterns is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns +- PCRE2_ERROR_MEMORY memory allocation failed ++ PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL the 1st, 3rd, or 4th argument is NULL + </pre> +diff --git a/doc/pcre2.txt b/doc/pcre2.txt +index c318432..c70fb9b 100644 +--- a/doc/pcre2.txt ++++ b/doc/pcre2.txt +@@ -10436,7 +10436,7 @@ SAVING COMPILED PATTERNS + + PCRE2_ERROR_BADDATA the number of patterns is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns +- PCRE2_ERROR_MEMORY memory allocation failed ++ PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL the 1st, 3rd, or 4th argument is NULL + +-- +2.27.0 +
View file
_service:tar_scm:backport-doc-avoid-nonexistent-PCRE2_ERROR_MEMORY-error-107.patch
Added
@@ -0,0 +1,43 @@ +From a4ac97fea8a2d802985d1f5b298ede2cb07cc8cd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com> +Date: Fri, 22 Apr 2022 09:59:44 -0700 +Subject: PATCH doc: avoid nonexistent PCRE2_ERROR_MEMORY error (#107) + +5438fc8a (Add serialization functions and tests with updated pcre2test. +Fix PCRE2_INFO_SIZE issues., 2015-01-23) introduced the typo. + +Reported-by: @sjshuck +Fixes: #106 + +Conflict:NA +Reference:https://github.com/PCRE2Project/pcre2/commit/a4ac97fea8a2d802985d1f5b298ede2cb07cc8cd + +diff --git a/doc/pcre2_serialize_decode.3 b/doc/pcre2_serialize_decode.3 +index b67a112..611113f 100644 +--- a/doc/pcre2_serialize_decode.3 ++++ b/doc/pcre2_serialize_decode.3 +@@ -36,7 +36,7 @@ the following negative error codes: + PCRE2_ERROR_BADDATA \fInumber_of_codes\fP is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in \fIbytes\fP + PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version +- PCRE2_ERROR_MEMORY memory allocation failed ++ PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_NULL \fIcodes\fP or \fIbytes\fP is NULL + .sp + PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +diff --git a/doc/pcre2serialize.3 b/doc/pcre2serialize.3 +index 987bc3a..a94f13b 100644 +--- a/doc/pcre2serialize.3 ++++ b/doc/pcre2serialize.3 +@@ -81,7 +81,7 @@ of serialized patterns, or one of the following negative error codes: + .sp + PCRE2_ERROR_BADDATA the number of patterns is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns +- PCRE2_ERROR_MEMORY memory allocation failed ++ PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL the 1st, 3rd, or 4th argument is NULL + .sp +-- +2.27.0 +
View file
_service:tar_scm:backport-jit-fail-early-in-ffcps_-if-subject-shorter-than-off.patch
Added
@@ -0,0 +1,36 @@ +From f2411acb3711a44497539d17b245bd366d9c26d7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com> +Date: Mon, 12 Dec 2022 08:32:42 -0800 +Subject: PATCH jit: fail early in ffcps_* if subject shorter than offs1 + (#175) + +FF_FUN would try loading a vector from an invalid address +triggering a crash. + +Add the same check that is done in the x86/s390x implementations +and that was missing from the original code. + +Fixes: #86 + +Conflict:NA +Reference:https://github.com/PCRE2Project/pcre2/commit/f2411acb3711a44497539d17b245bd366d9c26d7 +--- + src/pcre2_jit_neon_inc.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/pcre2_jit_neon_inc.h b/src/pcre2_jit_neon_inc.h +index e74adf1..165602e 100644 +--- a/src/pcre2_jit_neon_inc.h ++++ b/src/pcre2_jit_neon_inc.h +@@ -183,6 +183,8 @@ restart:; + #endif + + #if defined(FFCPS) ++if (str_ptr >= str_end) ++ return NULL; + sljit_u8 *p1 = str_ptr - diff; + #endif + sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf); +-- +1.8.3.1 +
View file
_service:tar_scm:backport-jit-fix-pcre2_jit_free_unused_memory-if-sljit-not-us.patch
Added
@@ -0,0 +1,39 @@ +From 7846880d63cf4b0d0d861659e222cce9c597c914 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com> +Date: Thu, 1 Dec 2022 01:40:01 -0800 +Subject: PATCH jit: fix pcre2_jit_free_unused_memory() if sljit not using + allocator (#165) + +sljit allows building without an internal allocator, but instead using +an external one. + +make sure to only invoke the corresponding sljit call if an internal +allocator is in use (the default and as coded in pcre integration) to +avoid problems if the code is changed to use an external allocator +instead. + +Conflict:NA +Reference:https://github.com/PCRE2Project/pcre2/commit/7846880d63cf4b0d0d861659e222cce9c597c914 +--- + src/pcre2_jit_misc.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/src/pcre2_jit_misc.c b/src/pcre2_jit_misc.c +index e57afad..bb6a558 100644 +--- a/src/pcre2_jit_misc.c ++++ b/src/pcre2_jit_misc.c +@@ -110,8 +110,10 @@ pcre2_jit_free_unused_memory(pcre2_general_context *gcontext) + (void)gcontext; /* Suppress warning */ + #else /* SUPPORT_JIT */ + SLJIT_UNUSED_ARG(gcontext); ++#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + sljit_free_unused_memory_exec(); +-#endif /* SUPPORT_JIT */ ++#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ ++#endif /* SUPPORT_JIT */ + } + + +-- +1.8.3.1 +
View file
_service
Changed
@@ -2,7 +2,7 @@ <service name="tar_scm"> <param name="scm">git</param> <param name="url">git@gitee.com:src-openeuler/pcre2.git</param> - <param name="revision">4758fdd70fa72cde0fe1f643bbcae162032fe30f</param> + <param name="revision">master</param> <param name="exclude">*</param> <param name="extract">*</param> </service>
View file
_service:tar_scm:pcre2-10.39.tar.bz2/AUTHORS -> _service:tar_scm:pcre2-10.40.tar.bz2/AUTHORS
Changed
@@ -8,7 +8,7 @@ Retired from University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2021 University of Cambridge +Copyright (c) 1997-2022 University of Cambridge All rights reserved @@ -19,7 +19,7 @@ Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2010-2021 Zoltan Herczeg +Copyright(c) 2010-2022 Zoltan Herczeg All rights reserved. @@ -30,7 +30,7 @@ Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2009-2021 Zoltan Herczeg +Copyright(c) 2009-2022 Zoltan Herczeg All rights reserved. ####
View file
_service:tar_scm:pcre2-10.39.tar.bz2/CMakeLists.txt -> _service:tar_scm:pcre2-10.40.tar.bz2/CMakeLists.txt
Changed
@@ -110,6 +110,11 @@ # GET_TARGET_PROPERTY. This should no longer be required. # CMAKE_POLICY(SET CMP0026 OLD) +# With a recent cmake, you can provide a rootdir to look for non +# standard installed library dependencies, but to do so, the policy +# needs to be set to new (by uncommenting the following) +# CMAKE_POLICY(SET CMP0074 NEW) + # For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH # on the command line. # SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) @@ -142,10 +147,16 @@ CHECK_SYMBOL_EXISTS(bcopy "strings.h" HAVE_BCOPY) CHECK_SYMBOL_EXISTS(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) CHECK_SYMBOL_EXISTS(memmove "string.h" HAVE_MEMMOVE) -CHECK_SYMBOL_EXISTS(realpath "stdlib.h" HAVE_REALPATH) CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) CHECK_SYMBOL_EXISTS(strerror "string.h" HAVE_STRERROR) +CHECK_C_SOURCE_COMPILES( + "#include <stdlib.h> + #include <limits.h> + int main(int c, char *v) { char bufPATH_MAX; realpath(v1, buf); return 0; }" + HAVE_REALPATH +) + set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror") CHECK_C_SOURCE_COMPILES( @@ -300,9 +311,19 @@ IF(EDITLINE_FOUND) OPTION (PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF) ENDIF(EDITLINE_FOUND) -IF(PCRE2_SUPPORT_LIBEDIT) - INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR}) -ENDIF(PCRE2_SUPPORT_LIBEDIT) +IF(EDITLINE_FOUND) + IF(PCRE2_SUPPORT_LIBEDIT) + INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR}) + ENDIF(PCRE2_SUPPORT_LIBEDIT) +ELSE(EDITLINE_FOUND) + IF(PCRE2_SUPPORT_LIBEDIT) + MESSAGE(FATAL_ERROR + " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n" + " or set Editline_ROOT to a full libedit installed tree, as needed\n" + " Might need to enable policy CMP0074 in CMakeLists.txt" + ) + ENDIF(PCRE2_SUPPORT_LIBEDIT) +ENDIF(EDITLINE_FOUND) # readline lib IF(READLINE_FOUND) @@ -340,7 +361,12 @@ ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) - MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified") + IF(READLINE_FOUND) + MESSAGE(FATAL_ERROR + " Only one of the readline compatible libraries can be enabled.\n" + " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF" + ) + ENDIF(READLINE_FOUND) ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) IF(PCRE2_SUPPORT_BSR_ANYCRLF) @@ -1022,25 +1048,23 @@ FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1) FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3) -FOREACH(man ${man3}) - GET_FILENAME_COMPONENT(man_tmp ${man} NAME) - SET(man3_new ${man3} ${man}) -ENDFOREACH(man ${man3}) -SET(man3 ${man3_new}) - INSTALL(FILES ${man1} DESTINATION man/man1) INSTALL(FILES ${man3} DESTINATION man/man3) INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html) IF(MSVC AND INSTALL_MSVC_PDB) - INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2.pdb - ${PROJECT_BINARY_DIR}/pcre2posix.pdb - DESTINATION bin - CONFIGURATIONS RelWithDebInfo) - INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2d.pdb - ${PROJECT_BINARY_DIR}/pcre2posixd.pdb - DESTINATION bin - CONFIGURATIONS Debug) + INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2-8.pdb + ${PROJECT_BINARY_DIR}/pcre2-16.pdb + ${PROJECT_BINARY_DIR}/pcre2-32.pdb + ${PROJECT_BINARY_DIR}/pcre2-posix.pdb + DESTINATION bin + CONFIGURATIONS RelWithDebInfo) + INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2-8d.pdb + ${PROJECT_BINARY_DIR}/pcre2-16d.pdb + ${PROJECT_BINARY_DIR}/pcre2-32d.pdb + ${PROJECT_BINARY_DIR}/pcre2-posixd.pdb + DESTINATION bin + CONFIGURATIONS Debug) ENDIF(MSVC AND INSTALL_MSVC_PDB) # Help, only for nice output
View file
_service:tar_scm:pcre2-10.39.tar.bz2/ChangeLog -> _service:tar_scm:pcre2-10.40.tar.bz2/ChangeLog
Changed
@@ -1,6 +1,111 @@ Change Log for PCRE2 -------------------- + +Version 10.40 15-April-2022 +--------------------------- + +1. Merged patch from @carenas (GitHub #35, 7db87842) to fix pcre2grep incorrect +handling of multiple passes. + +2. Merged patch from @carenas (GitHub #36, dae47509) to fix portability issue +in pcre2grep with buffered fseek(stdin). + +3. Merged patch from @carenas (GitHub #37, acc520924) to fix tests when -S is +not supported. + +4. Revert an unintended change in JIT repeat detection. + +5. Merged patch from @carenas (GitHub #52, b037bfa1) to fix build on GNU Hurd. + +6. Merged documentation and comments patches from @carenas (GitHub #47). + +7. Merged patch from @carenas (GitHub #49) to remove obsolete JFriedl test code +from pcre2grep. + +8. Merged patch from @carenas (GitHub #48) to fix CMake install issue #46. + +9. Merged patch from @carenas (GitHub #53) fixing NULL checks in matching and +substituting. + +10. Add null_subject and null_replacement modifiers to pcre2test. + +11. Add check for NULL subject to POSIX regexec() function. + +12. Add check for NULL replacement to pcre2_substitute(). + +13. For the subject arguments of pcre2_match(), pcre2_dfa_match(), and +pcre2_substitute(), and the replacement argument of the latter, if the pointer +is NULL and the length is zero, treat as an empty string. Apparently a number +of applications treat NULL/0 in this way. + +14. Added support for Bidi_Class and a number of binary Unicode properties, +including Bidi_Control. + +15. Fix some minor issues raised by clang sanitize. + +16. Very minor code speed up for maximizing character property matches. + +17. A number of changes to script matching for \p and \P: + + (a) Script extensions for a character are now coded as a bitmap instead of + a list of script numbers, which should be faster and does not need a + loop. + + (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms + sc and scx). + + (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being + the same as \p{scx:scriptname} because this change happened in Perl at + release 5.26. + + (d) The standard Unicode 4-letter abbreviations for script names are now + recognized. + + (e) In accordance with Unicode and Perl's "loose matching" rules, spaces, + hyphens, and underscores are ignored in property names, which are then + matched independent of case. + +18. The Python scripts in the maint directory have been refactored. There are +now three scripts that generate pcre2_ucd.c, pcre2_ucp.h, and pcre2_ucptables.c +(which is #included by pcre2_tables.c). The data lists that used to be +duplicated are now held in a single common Python module. + +19. On CHERI, and thus Arm's Morello prototype, pointers are represented as +hardware capabilities, which consist of both an integer address and additional +metadata, meaning they are twice the size of the platform's size_t type, i.e. +16 bytes on a 64-bit system. The ovector member of heapframe happens to only be +8 byte aligned, and so computing frame_size ended up with a multiple of 8 but +not 16. Whilst the first frame was always suitably aligned, this then +misaligned the frame that follows, resulting in an alignment fault when storing +a pointer to Fecode at the start of match. Patch to fix this issue by Jessica +Clarke PR#72. + +20. Added -LP and -LS listing options to pcre2test. + +21. A user discovered that the library names in CMakeLists.txt for MSVC +debugger (PDB) files were incorrect - perhaps never tried for PCRE2? + +22. An item such as Aa is optimized into a caseless single character match. +When this was quantified (e.g. Aa{2}) and was also the last literal item in a +pattern, the optimizing "must be present for a match" character check was not +being flagged as caseless, causing some matches that should have succeeded to +fail. + +23. Fixed a unicode properrty matching issue in JIT. The character was not +fully read in caseless matching. + +24. Fixed an issue affecting recursions in JIT caused by duplicated data +transfers. + +25. Merged patch from @carenas (GitHub #96) which fixes some problems with +pcre2test and readline/readedit: + + * Use the right header for libedit in FreeBSD with autoconf + * Really allow libedit with cmake + * Avoid using readline headers with libedit + + Version 10.39 29-October-2021 -----------------------------
View file
_service:tar_scm:pcre2-10.39.tar.bz2/HACKING -> _service:tar_scm:pcre2-10.40.tar.bz2/HACKING
Changed
@@ -546,8 +546,9 @@ and a value. The types are a set of #defines of the form PT_xxx, and the values are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file. The value is relevant only for PT_GC (General Category), PT_PC (Particular -Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to -identify a list of case-equivalent characters when there are three or more. +Category), PT_SC (Script), PT_BIDICL (Bidi Class), and the pseudo-property +PT_CLIST, which is used to identify a list of case-equivalent characters when +there are three or more. Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by three code units: OP_PROP or OP_NOTPROP, and then the desired property type and @@ -827,4 +828,4 @@ opcode are the correct length, in order to catch updating errors. Philip Hazel -12 July 2019 +December 2021
View file
_service:tar_scm:pcre2-10.39.tar.bz2/LICENCE -> _service:tar_scm:pcre2-10.40.tar.bz2/LICENCE
Changed
@@ -26,7 +26,7 @@ Retired from University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2021 University of Cambridge +Copyright (c) 1997-2022 University of Cambridge All rights reserved. @@ -37,7 +37,7 @@ Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2010-2021 Zoltan Herczeg +Copyright(c) 2010-2022 Zoltan Herczeg All rights reserved. @@ -48,7 +48,7 @@ Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2009-2021 Zoltan Herczeg +Copyright(c) 2009-2022 Zoltan Herczeg All rights reserved.
View file
_service:tar_scm:pcre2-10.39.tar.bz2/Makefile.am -> _service:tar_scm:pcre2-10.40.tar.bz2/Makefile.am
Changed
@@ -382,6 +382,10 @@ src/pcre2_valid_utf.c \ src/pcre2_xclass.c +# The pcre2_ucptables.c file is #included by pcre2_tables.c + +EXTRA_DIST += src/pcre2_ucptables.c + if WITH_PCRE2_8 lib_LTLIBRARIES += libpcre2-8.la libpcre2_8_la_SOURCES = \ @@ -663,6 +667,7 @@ testdata/testinput23 \ testdata/testinput24 \ testdata/testinput25 \ + testdata/testinput26 \ testdata/testinputEBC \ testdata/testoutput1 \ testdata/testoutput2 \ @@ -705,6 +710,7 @@ testdata/testoutput23 \ testdata/testoutput24 \ testdata/testoutput25 \ + testdata/testoutput26 \ testdata/testoutputEBC \ testdata/valgrind-jit.supp \ testdata/wintestinput3 \
View file
_service:tar_scm:pcre2-10.39.tar.bz2/Makefile.in -> _service:tar_scm:pcre2-10.40.tar.bz2/Makefile.in
Changed
@@ -829,6 +829,7 @@ EXTRA_LIBPCRE2_8_LDFLAGS = @EXTRA_LIBPCRE2_8_LDFLAGS@ EXTRA_LIBPCRE2_POSIX_LDFLAGS = @EXTRA_LIBPCRE2_POSIX_LDFLAGS@ FGREP = @FGREP@ +FILECMD = @FILECMD@ GCOV_CFLAGS = @GCOV_CFLAGS@ GCOV_CXXFLAGS = @GCOV_CXXFLAGS@ GCOV_LIBS = @GCOV_LIBS@ @@ -1192,6 +1193,8 @@ # for the benefit of people who are building PCRE2 manually, without the # Autotools support. +# The pcre2_ucptables.c file is #included by pcre2_tables.c + # The pcre2_chartables.c.dist file is the default version of # pcre2_chartables.c, used unless --enable-rebuild-chartables is specified. @@ -1205,11 +1208,12 @@ EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \ NON-AUTOTOOLS-BUILD HACKING PrepareRelease CheckMan CleanTxt \ Detrail 132html doc/index.html.src src/pcre2.h.generic \ - src/config.h.generic src/pcre2_chartables.c.dist \ - src/sljit/sljitConfig.h src/sljit/sljitConfigInternal.h \ - src/sljit/sljitExecAllocator.c src/sljit/sljitLir.c \ - src/sljit/sljitLir.h src/sljit/sljitNativeARM_32.c \ - src/sljit/sljitNativeARM_64.c src/sljit/sljitNativeARM_T2_32.c \ + src/config.h.generic src/pcre2_ucptables.c \ + src/pcre2_chartables.c.dist src/sljit/sljitConfig.h \ + src/sljit/sljitConfigInternal.h src/sljit/sljitExecAllocator.c \ + src/sljit/sljitLir.c src/sljit/sljitLir.h \ + src/sljit/sljitNativeARM_32.c src/sljit/sljitNativeARM_64.c \ + src/sljit/sljitNativeARM_T2_32.c \ src/sljit/sljitNativeMIPS_32.c src/sljit/sljitNativeMIPS_64.c \ src/sljit/sljitNativeMIPS_common.c \ src/sljit/sljitNativePPC_32.c src/sljit/sljitNativePPC_64.c \ @@ -1235,8 +1239,9 @@ testdata/testinput16 testdata/testinput17 testdata/testinput18 \ testdata/testinput19 testdata/testinput20 testdata/testinput21 \ testdata/testinput22 testdata/testinput23 testdata/testinput24 \ - testdata/testinput25 testdata/testinputEBC \ - testdata/testoutput1 testdata/testoutput2 testdata/testoutput3 \ + testdata/testinput25 testdata/testinput26 \ + testdata/testinputEBC testdata/testoutput1 \ + testdata/testoutput2 testdata/testoutput3 \ testdata/testoutput3A testdata/testoutput3B \ testdata/testoutput4 testdata/testoutput5 testdata/testoutput6 \ testdata/testoutput7 testdata/testoutput8-16-2 \ @@ -1255,10 +1260,11 @@ testdata/testoutput21 testdata/testoutput22-16 \ testdata/testoutput22-32 testdata/testoutput22-8 \ testdata/testoutput23 testdata/testoutput24 \ - testdata/testoutput25 testdata/testoutputEBC \ - testdata/valgrind-jit.supp testdata/wintestinput3 \ - testdata/wintestoutput3 perltest.sh src/pcre2demo.c \ - cmake/COPYING-CMAKE-SCRIPTS cmake/FindEditline.cmake \ + testdata/testoutput25 testdata/testoutput26 \ + testdata/testoutputEBC testdata/valgrind-jit.supp \ + testdata/wintestinput3 testdata/wintestoutput3 perltest.sh \ + src/pcre2demo.c cmake/COPYING-CMAKE-SCRIPTS \ + cmake/FindEditline.cmake \ cmake/FindPackageHandleStandardArgs.cmake \ cmake/FindReadline.cmake cmake/pcre2-config-version.cmake.in \ cmake/pcre2-config.cmake.in CMakeLists.txt config-cmake.h.in
View file
_service:tar_scm:pcre2-10.39.tar.bz2/NEWS -> _service:tar_scm:pcre2-10.40.tar.bz2/NEWS
Changed
@@ -2,6 +2,38 @@ ------------------------- +Version 10.40 15-April-2022 +--------------------------- + +This is mostly a bug-fixing and code-tidying release. However, there are some +extensions to Unicode property handling: + +* Added support for Bidi_Class and a number of binary Unicode properties, +including Bidi_Control. + +* A number of changes to script matching for \p and \P: + + (a) Script extensions for a character are now coded as a bitmap instead of + a list of script numbers, which should be faster and does not need a + loop. + + (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms + sc and scx). + + (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being + the same as \p{scx:scriptname} because this change happened in Perl at + release 5.26. + + (d) The standard Unicode 4-letter abbreviations for script names are now + recognized. + + (e) In accordance with Unicode and Perl's "loose matching" rules, spaces, + hyphens, and underscores are ignored in property names, which are then + matched independent of case. + +As always, see ChangeLog for a list of all changes (also the Git log). + + Version 10.39 29-October-2021 -----------------------------
View file
_service:tar_scm:pcre2-10.39.tar.bz2/README -> _service:tar_scm:pcre2-10.40.tar.bz2/README
Changed
@@ -114,12 +114,18 @@ The following instructions assume the use of the widely used "configure; make; make install" (autotools) process. -To build PCRE2 on system that supports autotools, first run the "configure" -command from the PCRE2 distribution directory, with your current directory set +If you have downloaded and unpacked a PCRE2 release tarball, run the +"configure" command from the PCRE2 directory, with your current directory set to the directory where you want the files to be created. This command is a standard GNU "autoconf" configuration script, for which generic instructions are supplied in the file INSTALL. +The files in the GitHub repository do not contain "configure". If you have +downloaded the PCRE2 source files from GitHub, before you can run "configure" +you must run the shell script called autogen.sh. This runs a number of +autotools to create a "configure" script (you must of course have the autotools +commands installed in order to do this). + Most commonly, people build PCRE2 within its own distribution directory, and in this case, on many systems, just running "./configure" is sufficient. However, the usual methods of changing standard defaults are available. For example: @@ -188,10 +194,10 @@ As well as supporting UTF strings, Unicode support includes support for the \P, \p, and \X sequences that recognize Unicode character properties. - However, only the basic two-letter properties such as Lu are supported. - Escape sequences such as \d and \w in patterns do not by default make use of - Unicode properties, but can be made to do so by setting the PCRE2_UCP option - or starting a pattern with (*UCP). + However, only a subset of Unicode properties are supported; see the + pcre2pattern man page for details. Escape sequences such as \d and \w in + patterns do not by default make use of Unicode properties, but can be made to + do so by setting the PCRE2_UCP option or starting a pattern with (*UCP). . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any of the preceding, or any of the Unicode newline sequences, or the NUL (zero) @@ -411,7 +417,7 @@ . Makefile the makefile that builds the library . src/config.h build-time configuration options for the library . src/pcre2.h the public PCRE2 header file -. pcre2-config script that shows the building settings such as CFLAGS +. pcre2-config script that shows the building settings such as CFLAGS that were set for "configure" . libpcre2-8.pc ) . libpcre2-16.pc ) data for the pkg-config command @@ -571,9 +577,9 @@ Making new tarballs ------------------- -The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats. -The command "make distcheck" does the same, but then does a trial build of the -new distribution to ensure that it works. +The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and +zip formats. The command "make distcheck" does the same, but then does a trial +build of the new distribution to ensure that it works. If you have modified any of the man page sources in the doc directory, you should first run the PrepareRelease script before making a distribution. This @@ -602,13 +608,13 @@ Many (but not all) of the tests that are not skipped are run twice if JIT support is available. On the second run, JIT compilation is forced. This -testing can be suppressed by putting "nojit" on the RunTest command line. +testing can be suppressed by putting "-nojit" on the RunTest command line. The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit libraries that are enabled. If you want to run just one set of tests, call RunTest with either the -8, -16 or -32 option. -If valgrind is installed, you can run the tests under it by putting "valgrind" +If valgrind is installed, you can run the tests under it by putting "-valgrind" on the RunTest command line. To run pcre2test on just one or more specific test files, give their numbers as arguments to RunTest, for example: @@ -905,4 +911,4 @@ Philip Hazel Email local part: Philip.Hazel Email domain: gmail.com -Last updated: 29 October 2021 +Last updated: 15 April 2022
View file
_service:tar_scm:pcre2-10.39.tar.bz2/RunGrepTest -> _service:tar_scm:pcre2-10.40.tar.bz2/RunGrepTest
Changed
@@ -674,10 +674,14 @@ echo "RC=$?" >>testtrygrep echo "---------------------------- Test 132 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <$srcdir/testdata/grepinput >>testtrygrep 2>&1 +(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; head -1 <&3; exec 3<&-) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 133 -----------------------------" >>testtrygrep +(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; exec 3<&-) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 134 -----------------------------" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep
View file
_service:tar_scm:pcre2-10.39.tar.bz2/RunTest -> _service:tar_scm:pcre2-10.40.tar.bz2/RunTest
Changed
@@ -80,7 +80,8 @@ title23="Test 23: \C disabled test" title24="Test 24: Non-UTF pattern conversion tests" title25="Test 25: UTF pattern conversion tests" -maxtest=25 +title26="Test 26: Auto-generated unicode property tests" +maxtest=26 if $# -eq 1 -a "$1" = "list" ; then echo $title0 @@ -109,6 +110,7 @@ echo $title23 echo $title24 echo $title25 + echo $title26 exit 0 fi @@ -238,6 +240,7 @@ do23=no do24=no do25=no +do26=no while $# -gt 0 ; do case $1 in @@ -267,6 +270,7 @@ 23) do23=yes;; 24) do24=yes;; 25) do25=yes;; + 26) do26=yes;; -8) arg8=yes;; -16) arg16=yes;; -32) arg32=yes;; @@ -320,7 +324,8 @@ # set up a large stack. $sim ./pcre2test -S 64 /dev/null /dev/null -if $? -eq 0 -a "$bigstack" != "" ; then +support_setstack=$? +if $support_setstack -eq 0 -a "$bigstack" != "" ; then setstack="-S 64" else setstack="" @@ -416,7 +421,7 @@ $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \ $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \ $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \ - $do24 = no -a $do25 = no \ + $do24 = no -a $do25 = no -a $do26 = no \ ; then do0=yes do1=yes @@ -444,6 +449,7 @@ do23=yes do24=yes do25=yes + do26=yes fi # Handle any explicit skips at this stage, so that an argument list may consist @@ -479,7 +485,9 @@ echo '' >testtry checkspecial '-C' checkspecial '--help' - checkspecial '-S 1 -t 10 testSinput' + if $support_setstack -eq 0 ; then + checkspecial '-S 1 -t 10 testSinput' + fi echo " OK" fi @@ -860,6 +868,20 @@ fi fi + # Auto-generated unicode property tests + + if $do26 = yes ; then + echo $title26 + if $utf -eq 0 ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry + checkresult $? 26 "$opt" + done + fi + fi + # End of loop for 8/16/32-bit tests done
View file
_service:tar_scm:pcre2-10.39.tar.bz2/cmake/FindEditline.cmake -> _service:tar_scm:pcre2-10.40.tar.bz2/cmake/FindEditline.cmake
Changed
@@ -1,17 +1,16 @@ # Modified from FindReadline.cmake (PH Feb 2012) -if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY) +if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY) set(EDITLINE_FOUND TRUE) -else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY) - FIND_PATH(EDITLINE_INCLUDE_DIR readline.h - /usr/include/editline - /usr/include/edit/readline - /usr/include/readline +else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY) + FIND_PATH(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES + editline + edit/readline ) FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit) include(FindPackageHandleStandardArgs) - FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY ) + FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) -endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY) +endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
View file
_service:tar_scm:pcre2-10.39.tar.bz2/configure -> _service:tar_scm:pcre2-10.40.tar.bz2/configure
Changed
@@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for PCRE2 10.39. +# Generated by GNU Autoconf 2.71 for PCRE2 10.40. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -618,8 +618,8 @@ # Identity of this package. PACKAGE_NAME='PCRE2' PACKAGE_TARNAME='pcre2' -PACKAGE_VERSION='10.39' -PACKAGE_STRING='PCRE2 10.39' +PACKAGE_VERSION='10.40' +PACKAGE_STRING='PCRE2 10.40' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -724,6 +724,7 @@ DSYMUTIL MANIFEST_TOOL RANLIB +FILECMD LN_S NM ac_ct_DUMPBIN @@ -1452,7 +1453,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures PCRE2 10.39 to adapt to many kinds of systems. +\`configure' configures PCRE2 10.40 to adapt to many kinds of systems. Usage: $0 OPTION... VAR=VALUE... @@ -1523,7 +1524,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of PCRE2 10.39:";; + short | recursive ) echo "Configuration of PCRE2 10.40:";; esac cat <<\_ACEOF @@ -1703,7 +1704,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -PCRE2 configure 10.39 +PCRE2 configure 10.40 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -2058,7 +2059,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by PCRE2 $as_me 10.39, which was +It was created by PCRE2 $as_me 10.40, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3330,7 +3331,7 @@ # Define the identity of the package. PACKAGE='pcre2' - VERSION='10.39' + VERSION='10.40' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -5129,8 +5130,8 @@ -macro_version='2.4.6.42-b88ce-dirty' -macro_revision='2.4.6.42' +macro_version='2.4.6.59-b55b-dirty' +macro_revision='2.4.6.59' @@ -5758,13 +5759,13 @@ mingw*) lt_bad_file=conftest.nm/nofile ;; *) lt_bad_file=/dev/null ;; esac - case `"$tmp_nm" -B $lt_bad_file 2>&1 | sed '1q'` in + case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in *$lt_bad_file* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break 2 ;; *) - case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break 2 @@ -5902,7 +5903,7 @@ fi fi - case `$DUMPBIN -symbols -headers /dev/null 2>&1 | sed '1q'` in + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in *COFF*) DUMPBIN="$DUMPBIN -symbols -headers" ;; @@ -6006,7 +6007,7 @@ lt_cv_sys_max_cmd_len=8192; ;; - bitrig* | darwin* | dragonfly* | freebsd* | netbsd* | openbsd*) + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` @@ -6049,7 +6050,7 @@ sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then - lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.* //'` + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.* //'` else lt_cv_sys_max_cmd_len=32768 fi @@ -6255,6 +6256,114 @@ if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}file", so it can be a program name with args. +set dummy ${ac_tool_prefix}file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_FILECMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$FILECMD"; then + ac_cv_prog_FILECMD="$FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_FILECMD="${ac_tool_prefix}file" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +FILECMD=$ac_cv_prog_FILECMD +if test -n "$FILECMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $FILECMD" >&5 +printf "%s\n" "$FILECMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_FILECMD"; then + ac_ct_FILECMD=$FILECMD + # Extract the first word of "file", so it can be a program name with args. +set dummy file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_FILECMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_FILECMD"; then + ac_cv_prog_ac_ct_FILECMD="$ac_ct_FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_FILECMD="file" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2
View file
_service:tar_scm:pcre2-10.39.tar.bz2/configure.ac -> _service:tar_scm:pcre2-10.40.tar.bz2/configure.ac
Changed
@@ -9,15 +9,15 @@ dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre2_major, 10) -m4_define(pcre2_minor, 39) +m4_define(pcre2_minor, 40) m4_define(pcre2_prerelease, ) -m4_define(pcre2_date, 2021-10-29) +m4_define(pcre2_date, 2022-04-14) # Libtool shared library interface versions (current:revision:age) -m4_define(libpcre2_8_version, 10:4:10) -m4_define(libpcre2_16_version, 10:4:10) -m4_define(libpcre2_32_version, 10:4:10) -m4_define(libpcre2_posix_version, 3:1:0) +m4_define(libpcre2_8_version, 11:0:11) +m4_define(libpcre2_16_version, 11:0:11) +m4_define(libpcre2_32_version, 11:0:11) +m4_define(libpcre2_posix_version, 3:2:0) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. @@ -512,7 +512,20 @@ # Checks for library functions. -AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp realpath secure_getenv strerror) +AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror) +AC_MSG_CHECKING(for realpath) +AC_LINK_IFELSE(AC_LANG_PROGRAM( +#include <stdlib.h> +#include <limits.h> +, +char bufferPATH_MAX; +realpath(".", buffer); +), +AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REALPATH, 1, + Define to 1 if you have the `realpath' function.) +, +AC_MSG_RESULT(no)) # Check for the availability of libz (aka zlib) @@ -584,14 +597,14 @@ fi fi - # Check for the availability of libedit. Different distributions put its # headers in different places. Try to cover the most common ones. if test "$enable_pcre2test_libedit" = "yes"; then - AC_CHECK_HEADERS(editline/readline.h, HAVE_EDITLINE_READLINE_H=1, - AC_CHECK_HEADERS(edit/readline/readline.h, HAVE_READLINE_READLINE_H=1, - AC_CHECK_HEADERS(readline/readline.h, HAVE_READLINE_READLINE_H=1))) + AC_CHECK_HEADERS(editline/readline.h edit/readline/readline.h readline.h, + HAVE_LIBEDIT_HEADER=1 + break + ) AC_CHECK_LIB(edit, readline, LIBEDIT="-ledit") fi @@ -927,10 +940,9 @@ echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline" exit 1 fi - if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \ - "$HAVE_READLINE_READLINE_H" != "1"; then - echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h" - echo "** nor readline/readline.h was found." + if test -z "$HAVE_LIBEDIT_HEADER"; then + echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h," + echo "** edit/readline/readline.h nor a compatible header was found." exit 1 fi if test -z "$LIBEDIT"; then
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/README.txt -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/README.txt
Changed
@@ -114,12 +114,18 @@ The following instructions assume the use of the widely used "configure; make; make install" (autotools) process. -To build PCRE2 on system that supports autotools, first run the "configure" -command from the PCRE2 distribution directory, with your current directory set +If you have downloaded and unpacked a PCRE2 release tarball, run the +"configure" command from the PCRE2 directory, with your current directory set to the directory where you want the files to be created. This command is a standard GNU "autoconf" configuration script, for which generic instructions are supplied in the file INSTALL. +The files in the GitHub repository do not contain "configure". If you have +downloaded the PCRE2 source files from GitHub, before you can run "configure" +you must run the shell script called autogen.sh. This runs a number of +autotools to create a "configure" script (you must of course have the autotools +commands installed in order to do this). + Most commonly, people build PCRE2 within its own distribution directory, and in this case, on many systems, just running "./configure" is sufficient. However, the usual methods of changing standard defaults are available. For example: @@ -188,10 +194,10 @@ As well as supporting UTF strings, Unicode support includes support for the \P, \p, and \X sequences that recognize Unicode character properties. - However, only the basic two-letter properties such as Lu are supported. - Escape sequences such as \d and \w in patterns do not by default make use of - Unicode properties, but can be made to do so by setting the PCRE2_UCP option - or starting a pattern with (*UCP). + However, only a subset of Unicode properties are supported; see the + pcre2pattern man page for details. Escape sequences such as \d and \w in + patterns do not by default make use of Unicode properties, but can be made to + do so by setting the PCRE2_UCP option or starting a pattern with (*UCP). . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any of the preceding, or any of the Unicode newline sequences, or the NUL (zero) @@ -411,7 +417,7 @@ . Makefile the makefile that builds the library . src/config.h build-time configuration options for the library . src/pcre2.h the public PCRE2 header file -. pcre2-config script that shows the building settings such as CFLAGS +. pcre2-config script that shows the building settings such as CFLAGS that were set for "configure" . libpcre2-8.pc ) . libpcre2-16.pc ) data for the pkg-config command @@ -571,9 +577,9 @@ Making new tarballs ------------------- -The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats. -The command "make distcheck" does the same, but then does a trial build of the -new distribution to ensure that it works. +The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and +zip formats. The command "make distcheck" does the same, but then does a trial +build of the new distribution to ensure that it works. If you have modified any of the man page sources in the doc directory, you should first run the PrepareRelease script before making a distribution. This @@ -602,13 +608,13 @@ Many (but not all) of the tests that are not skipped are run twice if JIT support is available. On the second run, JIT compilation is forced. This -testing can be suppressed by putting "nojit" on the RunTest command line. +testing can be suppressed by putting "-nojit" on the RunTest command line. The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit libraries that are enabled. If you want to run just one set of tests, call RunTest with either the -8, -16 or -32 option. -If valgrind is installed, you can run the tests under it by putting "valgrind" +If valgrind is installed, you can run the tests under it by putting "-valgrind" on the RunTest command line. To run pcre2test on just one or more specific test files, give their numbers as arguments to RunTest, for example: @@ -905,4 +911,4 @@ Philip Hazel Email local part: Philip.Hazel Email domain: gmail.com -Last updated: 29 October 2021 +Last updated: 15 April 2022
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2_jit_stack_create.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2_jit_stack_create.html
Changed
@@ -34,7 +34,8 @@ <b>pcre2_jit_stack_assign()</b> to associate the stack with a compiled pattern, which can then be processed by <b>pcre2_match()</b> or <b>pcre2_jit_match()</b>. A maximum stack size of 512KiB to 1MiB should be more than enough for any -pattern. For more details, see the +pattern. If the stack couldn't be allocated or the values passed were not +reasonable, NULL will be returned. For more details, see the <a href="pcre2jit.html"><b>pcre2jit</b></a> page. </P>
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2_set_compile_extra_options.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2_set_compile_extra_options.html
Changed
@@ -30,8 +30,8 @@ housed in a compile context. It completely replaces all the bits. The extra options are: <pre> - PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK Allow \K in lookarounds PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \x{df800} to \x{dfff} - in UTF-8 and UTF-32 modes + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK Allow \K in lookarounds + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \x{d800} to \x{dfff} in UTF-8 and UTF-32 modes PCRE2_EXTRA_ALT_BSUX Extended alternate \u, \U, and \x handling PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as a literal following character PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2_substitute.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2_substitute.html
Changed
@@ -68,29 +68,29 @@ The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for zero-terminated strings. The options are: <pre> - PCRE2_ANCHORED Match only at the first position - PCRE2_ENDANCHORED Pattern can match only at end of subject - PCRE2_NOTBOL Subject is not the beginning of a line - PCRE2_NOTEOL Subject is not the end of a line - PCRE2_NOTEMPTY An empty string is not a valid match - PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject is not a valid match - PCRE2_NO_JIT Do not use JIT matching - PCRE2_NO_UTF_CHECK Do not check the subject or replacement for UTF validity (only relevant if - PCRE2_UTF was set at compile time) - PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing - PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject - PCRE2_SUBSTITUTE_LITERAL The replacement string is literal - PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for 1st match - PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length + PCRE2_ANCHORED Match only at the first position + PCRE2_ENDANCHORED Match only at end of subject + PCRE2_NOTBOL Subject is not the beginning of a line + PCRE2_NOTEOL Subject is not the end of a line + PCRE2_NOTEMPTY An empty string is not a valid match + PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject is not a valid match + PCRE2_NO_JIT Do not use JIT matching + PCRE2_NO_UTF_CHECK Do not check for UTF validity in the subject or replacement + (only relevant if PCRE2_UTF was set at compile time) + PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing + PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject + PCRE2_SUBSTITUTE_LITERAL The replacement string is literal + PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for first match + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s) - PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset - PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string + PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset + PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string </pre> If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored. </P> <P> -If PCRE2_SUBSTITUTE_MATCHED is set, <i>match_data</i> must be non-zero; its +If PCRE2_SUBSTITUTE_MATCHED is set, <i>match_data</i> must be non-NULL; its contents must be the result of a call to <b>pcre2_match()</b> using the same pattern and subject. </P>
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2api.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2api.html
Changed
@@ -1845,7 +1845,7 @@ </P> <P> Note that this option can also be passed to <b>pcre2_match()</b> and -<b>pcre_dfa_match()</b>, to suppress UTF validity checking of the subject +<b>pcre2_dfa_match()</b>, to suppress UTF validity checking of the subject string. </P> <P> @@ -2055,8 +2055,8 @@ \d. </P> <P> -When PCRE2 is built with Unicode support (the default), the Unicode properties -of all characters can be tested with \p and \P, or, alternatively, the +When PCRE2 is built with Unicode support (the default), certain Unicode +character properties can be tested with \p and \P, or, alternatively, the PCRE2_UCP option can be set when a pattern is compiled; this causes \w and friends to use Unicode property support instead of the built-in tables. PCRE2_UCP also causes upper/lower casing operations on characters with code @@ -2316,7 +2316,7 @@ PCRE2_INFO_LASTCODETYPE </pre> Returns 1 if there is a rightmost literal code unit that must exist in any -matched string, other than at its start. The third argument should point to a +matched string, other than at its start. The third argument should point to a <b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is returned, the code unit value itself can be retrieved using PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is @@ -2640,7 +2640,9 @@ <i>startoffset</i>. The length and offset are in code units, not characters. That is, they are in bytes for the 8-bit library, 16-bit code units for the 16-bit library, and 32-bit code units for the 32-bit library, whether or not -UTF processing is enabled. +UTF processing is enabled. As a special case, if <i>subject</i> is NULL and +<i>length</i> is zero, the subject is assumed to be an empty string. If +<i>length</i> is non-zero, an error occurs if <i>subject</i> is NULL. </P> <P> If <i>startoffset</i> is greater than the length of the subject, @@ -3394,12 +3396,17 @@ <P> This function optionally calls <b>pcre2_match()</b> and then makes a copy of the subject string in <i>outputbuffer</i>, replacing parts that were matched with -the <i>replacement</i> string, whose length is supplied in <b>rlength</b>. This -can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an -option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the -replacement string(s). The default action is to perform just one replacement if -the pattern matches, but there is an option that requests multiple replacements -(see PCRE2_SUBSTITUTE_GLOBAL below). +the <i>replacement</i> string, whose length is supplied in <b>rlength</b>, which +can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a +special case, if <i>replacement</i> is NULL and <i>rlength</i> is zero, the +replacement is assumed to be an empty string. If <i>rlength</i> is non-zero, an +error occurs if <i>replacement</i> is NULL. +</P> +<P> +There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just +the replacement string(s). The default action is to perform just one +replacement if the pattern matches, but there is an option that requests +multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below). </P> <P> If successful, <b>pcre2_substitute()</b> returns the number of substitutions @@ -3433,12 +3440,12 @@ As well as the usual options for <b>pcre2_match()</b>, a number of additional options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>. One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external -<i>match_data</i> block must be provided, and it must have been used for an -external call to <b>pcre2_match()</b>. The data in the <i>match_data</i> block -(return code, offset vector) is used for the first substitution instead of -calling <b>pcre2_match()</b> from within <b>pcre2_substitute()</b>. This allows -an application to check for a match before choosing to substitute, without -having to repeat the match. +<i>match_data</i> block must be provided, and it must have already been used for +an external call to <b>pcre2_match()</b> with the same pattern and subject +arguments. The data in the <i>match_data</i> block (return code, offset vector) +is then used for the first substitution instead of calling <b>pcre2_match()</b> +from within <b>pcre2_substitute()</b>. This allows an application to check for a +match before choosing to substitute, without having to repeat the match. </P> <P> The contents of the externally supplied match data block are not changed when @@ -3583,7 +3590,7 @@ terminating a \Q quoted sequence) reverts to no case forcing. The sequences \u and \l force the next character (if it is a letter) to upper or lower case, respectively, and then the state automatically reverts to no case -forcing. Case forcing applies to all inserted characters, including those from +forcing. Case forcing applies to all inserted characters, including those from capture groups and letters within \Q...\E quoted sequences. If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode properties are used for case forcing characters whose code points are greater @@ -3655,7 +3662,9 @@ </P> <P> PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the -<i>match_data</i> argument is NULL. +<i>match_data</i> argument is NULL or if the <i>subject</i> or <i>replacement</i> +arguments are NULL. For backward compatibility reasons an exception is made for +the <i>replacement</i> argument if the <i>rlength</i> argument is also 0. </P> <P> PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the @@ -3810,12 +3819,13 @@ <P> The function <b>pcre2_dfa_match()</b> is called to match a subject string against a compiled pattern, using a matching algorithm that scans the subject -string just once (not counting lookaround assertions), and does not backtrack. -This has different characteristics to the normal algorithm, and is not -compatible with Perl. Some of the features of PCRE2 patterns are not supported. -Nevertheless, there are times when this kind of matching can be useful. For a -discussion of the two matching algorithms, and a list of features that -<b>pcre2_dfa_match()</b> does not support, see the +string just once (not counting lookaround assertions), and does not backtrack +(except when processing lookaround assertions). This has different +characteristics to the normal algorithm, and is not compatible with Perl. Some +of the features of PCRE2 patterns are not supported. Nevertheless, there are +times when this kind of matching can be useful. For a discussion of the two +matching algorithms, and a list of features that <b>pcre2_dfa_match()</b> does +not support, see the <a href="pcre2matching.html"><b>pcre2matching</b></a> documentation. </P> @@ -3850,7 +3860,7 @@ </PRE> </P> <br><b> -Option bits for <b>pcre_dfa_match()</b> +Option bits for <b>pcre2_dfa_match()</b> </b><br> <P> The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must @@ -4008,7 +4018,7 @@ </P> <br><a name="SEC42" href="#TOC1">REVISION</a><br> <P> -Last updated: 30 August 2021 +Last updated: 14 December 2021 <br> Copyright © 1997-2021 University of Cambridge. <br>
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2build.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2build.html
Changed
@@ -142,8 +142,9 @@ UTF support allows the libraries to process character code points up to 0x10ffff in the strings that they handle. Unicode support also gives access to the Unicode properties of characters, using pattern escapes such as \P, \p, -and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i> are -supported. Details are given in the +and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i>, +script names, and some bi-directional properties are supported. Details are +given in the <a href="pcre2pattern.html"><b>pcre2pattern</b></a> documentation. </P> @@ -307,7 +308,7 @@ for --with-match-limit. You can set a lower default limit by adding, for example, <pre> - --with-match-limit_depth=10000 + --with-match-limit-depth=10000 </pre> to the <b>configure</b> command. This value can be overridden at run time. This depth limit indirectly limits the amount of heap memory that is used, but @@ -615,9 +616,9 @@ </P> <br><a name="SEC26" href="#TOC1">REVISION</a><br> <P> -Last updated: 20 March 2020 +Last updated: 08 December 2021 <br> -Copyright © 1997-2020 University of Cambridge. +Copyright © 1997-2021 University of Cambridge. <br> <p> Return to the <a href="index.html">PCRE2 index page</a>.
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2compat.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2compat.html
Changed
@@ -18,33 +18,41 @@ <P> This document describes some of the differences in the ways that PCRE2 and Perl handle regular expressions. The differences described here are with respect to -Perl version 5.32.0, but as both Perl and PCRE2 are continually changing, the +Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the information may at times be out of date. </P> <P> -1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does +1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the +behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the +next character unless it is the start of a newline sequence. This means that, +if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF +(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using +EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline +indicator. +</P> +<P> +2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does have are given in the <a href="pcre2unicode.html"><b>pcre2unicode</b></a> page. </P> <P> -2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but +3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but they do not mean what you might think. For example, (?!a){3} does not assert that the next three characters are not "a". It just asserts that the next character is not "a" three times (in principle; PCRE2 optimizes this to run the assertion just once). Perl allows some repeat quantifiers on other assertions, -for example, \b* (but not \b{3}, though oddly it does allow ^{3}), but these -do not seem to have any use. PCRE2 does not allow any kind of quantifier on -non-lookaround assertions. +for example, \b* , but these do not seem to have any use. PCRE2 does not allow +any kind of quantifier on non-lookaround assertions. </P> <P> -3. Capture groups that occur inside negative lookaround assertions are counted, +4. Capture groups that occur inside negative lookaround assertions are counted, but their entries in the offsets vector are set only when a negative assertion is a condition that has a matching branch (that is, the condition is false). Perl may set such capture groups in other circumstances. </P> <P> -4. The following Perl escape sequences are not supported: \F, \l, \L, \u, +5. The following Perl escape sequences are not supported: \F, \l, \L, \u, \U, and \N when followed by a character name. \N on its own, matching a non-newline character, and \N{U+dd..}, matching a Unicode code point, are supported. The escapes that modify the case of following letters are @@ -55,26 +63,26 @@ interprets them. </P> <P> -5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is +6. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is built with Unicode support (the default). The properties that can be tested with \p and \P are limited to the general category properties such as Lu and -Nd, script names such as Greek or Han, and the derived properties Any and L&. -Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use -is limited. See the +Nd, script names such as Greek or Han, Bidi_Class, Bidi_Control, and the +derived properties Any and LC (synonym L&). Both PCRE2 and Perl support the Cs +(surrogate) property, but in PCRE2 its use is limited. See the <a href="pcre2pattern.html"><b>pcre2pattern</b></a> documentation for details. The long synonyms for property names that Perl supports (such as \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix any of these properties with "Is". </P> <P> -6. PCRE2 supports the \Q...\E escape for quoting substrings. Characters +7. PCRE2 supports the \Q...\E escape for quoting substrings. Characters in between are treated as literals. However, this is slightly different from Perl in that $ and @ are also handled as literals inside the quotes. In Perl, -they cause variable interpolation (but of course PCRE2 does not have -variables). Also, Perl does "double-quotish backslash interpolation" on any -backslashes between \Q and \E which, its documentation says, "may lead to -confusing results". PCRE2 treats a backslash between \Q and \E just like any -other character. Note the following examples: +they cause variable interpolation (PCRE2 does not have variables). Also, Perl +does "double-quotish backslash interpolation" on any backslashes between \Q +and \E which, its documentation says, "may lead to confusing results". PCRE2 +treats a backslash between \Q and \E just like any other character. Note the +following examples: <pre> Pattern PCRE2 matches Perl matches @@ -88,19 +96,19 @@ by both PCRE2 and Perl. </P> <P> -7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) +8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) constructions. However, PCRE2 does have a "callout" feature, which allows an external function to be called during pattern matching. See the <a href="pcre2callout.html"><b>pcre2callout</b></a> documentation for details. </P> <P> -8. Subroutine calls (whether recursive or not) were treated as atomic groups up +9. Subroutine calls (whether recursive or not) were treated as atomic groups up to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking into subroutine calls is now supported, as in Perl. </P> <P> -9. In PCRE2, if any of the backtracking control verbs are used in a group that +10. In PCRE2, if any of the backtracking control verbs are used in a group that is called as a subroutine (whether or not recursively), their effect is confined to that group; it does not extend to the surrounding pattern. This is not always the case in Perl. In particular, if (*THEN) is present in a group @@ -109,20 +117,20 @@ processed as anchored at the point where they are tested. </P> <P> -10. If a pattern contains more than one backtracking control verb, the first +11. If a pattern contains more than one backtracking control verb, the first one that is backtracked onto acts. For example, in the pattern A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the same as PCRE2, but there are cases where it differs. </P> <P> -11. There are some differences that are concerned with the settings of captured +12. There are some differences that are concerned with the settings of captured strings when part of a pattern is repeated. For example, matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to "b". </P> <P> -12. PCRE2's handling of duplicate capture group numbers and names is not as +13. PCRE2's handling of duplicate capture group numbers and names is not as general as Perl's. This is a consequence of the fact the PCRE2 works internally just with numbers, using an external table to translate between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b>B)), where the two @@ -132,42 +140,43 @@ number 1. To avoid this confusing situation, an error is given at compile time. </P> <P> -13. Perl used to recognize comments in some places that PCRE2 does not, for +14. Perl used to recognize comments in some places that PCRE2 does not, for example, between the ( and ? at the start of a group. If the /x modifier is set, Perl allowed white space between ( and ? though the latest Perls give an error (for a while it was just deprecated). There may still be some cases where Perl behaves differently. </P> <P> -14. Perl, when in warning mode, gives warnings for character classes such as +15. Perl, when in warning mode, gives warnings for character classes such as A-\d or a-:digit:. It then treats the hyphens as literals. PCRE2 has no warning features, so it gives an error in these cases because they are almost certainly user mistakes. </P> <P> -15. In PCRE2, the upper/lower case character properties Lu and Ll are not +16. In PCRE2, the upper/lower case character properties Lu and Ll are not affected when case-independent matching is specified. For example, \p{Lu} always matches an upper case letter. I think Perl has changed in this respect; -in the release at the time of writing (5.32), \p{Lu} and \p{Ll} match all +in the release at the time of writing (5.34), \p{Lu} and \p{Ll} match all letters, regardless of case, when case independence is specified. </P> <P> -16. From release 5.32.0, Perl locks out the use of \K in lookaround +17. From release 5.32.0, Perl locks out the use of \K in lookaround assertions. From release 10.38 PCRE2 does the same by default. However, there is an option for re-enabling the previous behaviour. When this option is set, \K is acted on when it occurs in positive assertions, but is ignored in negative assertions. </P> <P> -17. PCRE2 provides some extensions to the Perl regular expression facilities. +18. PCRE2 provides some extensions to the Perl regular expression facilities. Perl 5.10 included new features that were not in earlier versions of Perl, some of which (such as named parentheses) were in PCRE2 for some time before. This -list is with respect to Perl 5.32: +list is with respect to Perl 5.34: <br> <br> (a) Although lookbehind assertions in PCRE2 must match fixed length strings, each alternative toplevel branch of a lookbehind assertion can match a -different length of string. Perl requires them all to have the same length. +different length of string. Perl used to require them all to have the same +length, but the latest version has some variable length support. <br> <br> (b) From PCRE2 10.23, backreferences to groups of fixed length are supported @@ -221,12 +230,12 @@ lookarounds are atomic. </P> <P> -18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa +19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode rules. This separation cannot be represented with PCRE2_UCP. </P> <P> -19. Perl has different limits than PCRE2. See the +20. Perl has different limits than PCRE2. See the <a href="pcre2limit.html"><b>pcre2limit</b></a> documentation for details. Perl went with 5.10 from recursion to iteration
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2jit.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2jit.html
Changed
@@ -269,11 +269,11 @@ for currently suspended match(es). </P> <P> -In a multithread application, if you do not -specify a JIT stack, or if you assign or pass back NULL from a callback, that -is thread-safe, because each thread has its own machine stack. However, if you -assign or pass back a non-NULL JIT stack, this must be a different stack for -each thread so that the application is thread-safe. +In a multithread application, if you do not specify a JIT stack, or if you +assign or pass back NULL from a callback, that is thread-safe, because each +thread has its own machine stack. However, if you assign or pass back a +non-NULL JIT stack, this must be a different stack for each thread so that the +application is thread-safe. </P> <P> Strictly speaking, even more is allowed. You can assign the same non-NULL stack @@ -382,8 +382,8 @@ <b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b> </P> <P> -The JIT executable allocator does not free all memory when it is possible. -It expects new allocations, and keeps some free memory around to improve +The JIT executable allocator does not free all memory when it is possible. It +expects new allocations, and keeps some free memory around to improve allocation speed. However, in low memory conditions, it might be better to free all possible memory. You can cause this to happen by calling pcre2_jit_free_unused_memory(). Its argument is a general context, for custom @@ -442,10 +442,10 @@ <P> When you call <b>pcre2_match()</b>, as well as testing for invalid options, a number of other sanity checks are performed on the arguments. For example, if -the subject pointer is NULL, an immediate error is given. Also, unless -PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the -interests of speed, these checks do not happen on the JIT fast path, and if -invalid data is passed, the result is undefined. +the subject pointer is NULL but the length is non-zero, an immediate error is +given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested +for validity. In the interests of speed, these checks do not happen on the JIT +fast path, and if invalid data is passed, the result is undefined. </P> <P> Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give @@ -466,9 +466,9 @@ </P> <br><a name="SEC14" href="#TOC1">REVISION</a><br> <P> -Last updated: 23 May 2019 +Last updated: 30 November 2021 <br> -Copyright © 1997-2019 University of Cambridge. +Copyright © 1997-2021 University of Cambridge. <br> <p> Return to the <a href="index.html">PCRE2 index page</a>.
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2pattern.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2pattern.html
Changed
@@ -534,7 +534,7 @@ \0113 is a tab followed by the character "3" \113 might be a backreference, otherwise the character with octal code 113 \377 might be a backreference, otherwise the value 255 (decimal) - \81 is always a backreference .sp + \81 is always a backreference </pre> Note that octal values of 100 or greater that are specified using this syntax must not be introduced by a leading zero, because no more than three octal @@ -776,199 +776,62 @@ sequences are of course limited to testing characters whose code points are less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit) may be encountered. These are all -treated as being in the Unknown script and with an unassigned type. The extra -escape sequences are: +treated as being in the Unknown script and with an unassigned type. +</P> +<P> +Matching characters by Unicode property is not fast, because PCRE2 has to do a +multistage table lookup in order to find a character's property. That is why +the traditional escape sequences such as \d and \w do not use Unicode +properties in PCRE2 by default, though you can make them do so by setting the +PCRE2_UCP option or by starting the pattern with (*UCP). +</P> +<P> +The extra escape sequences that provide property support are: <pre> \p{<i>xx</i>} a character with the <i>xx</i> property \P{<i>xx</i>} a character without the <i>xx</i> property \X a Unicode extended grapheme cluster </pre> -The property names represented by <i>xx</i> above are case-sensitive. There is -support for Unicode script names, Unicode general category properties, "Any", -which matches any character (including newline), and some special PCRE2 -properties (described in the -<a href="#extraprops">next section).</a> -Other Perl properties such as "InMusicalSymbols" are not supported by PCRE2. -Note that \P{Any} does not match any characters, so always causes a match -failure. +The property names represented by <i>xx</i> above are not case-sensitive, and in +accordance with Unicode's "loose matching" rules, spaces, hyphens, and +underscores are ignored. There is support for Unicode script names, Unicode +general category properties, "Any", which matches any character (including +newline), Bidi_Class, a number of binary (yes/no) properties, and some special +PCRE2 properties (described +<a href="#extraprops">below).</a> +Certain other Perl properties such as "InMusicalSymbols" are not supported by +PCRE2. Note that \P{Any} does not match any characters, so always causes a +match failure. +</P> +<br><b> +Script properties for \p and \P +</b><br> +<P> +There are three different syntax forms for matching a script. Each Unicode +character has a basic script and, optionally, a list of other scripts ("Script +Extensions") with which it is commonly used. Using the Adlam script as an +example, \p{sc:Adlam} matches characters whose basic script is Adlam, whereas +\p{scx:Adlam} matches, in addition, characters that have Adlam in their +extensions list. The full names "script" and "script extensions" for the +property types are recognized, and a equals sign is an alternative to the +colon. If a script name is given without a property type, for example, +\p{Adlam}, it is treated as \p{scx:Adlam}. Perl changed to this +interpretation at release 5.26 and PCRE2 changed at release 10.40. </P> <P> -Sets of Unicode characters are defined as belonging to certain scripts. A -character from one of these sets can be matched using a script name. For -example: -<pre> - \p{Greek} - \P{Han} -</pre> Unassigned characters (and in non-UTF 32-bit mode, characters with code points greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not part of an identified script are lumped together as "Common". The current list -of scripts is: -</P> -<P> -Adlam, -Ahom, -Anatolian_Hieroglyphs, -Arabic, -Armenian, -Avestan, -Balinese, -Bamum, -Bassa_Vah, -Batak, -Bengali, -Bhaiksuki, -Bopomofo, -Brahmi, -Braille, -Buginese, -Buhid, -Canadian_Aboriginal, -Carian, -Caucasian_Albanian, -Chakma, -Cham, -Cherokee, -Chorasmian, -Common, -Coptic, -Cuneiform, -Cypriot, -Cypro_Minoan, -Cyrillic, -Deseret, -Devanagari, -Dives_Akuru, -Dogra, -Duployan, -Egyptian_Hieroglyphs, -Elbasan, -Elymaic, -Ethiopic, -Georgian, -Glagolitic, -Gothic, -Grantha, -Greek, -Gujarati, -Gunjala_Gondi, -Gurmukhi, -Han, -Hangul, -Hanifi_Rohingya, -Hanunoo, -Hatran, -Hebrew, -Hiragana, -Imperial_Aramaic, -Inherited, -Inscriptional_Pahlavi, -Inscriptional_Parthian, -Javanese, -Kaithi, -Kannada, -Katakana, -Kayah_Li, -Kharoshthi, -Khitan_Small_Script, -Khmer, -Khojki, -Khudawadi, -Lao, -Latin, -Lepcha, -Limbu, -Linear_A, -Linear_B, -Lisu, -Lycian, -Lydian, -Mahajani, -Makasar, -Malayalam, -Mandaic, -Manichaean, -Marchen, -Masaram_Gondi, -Medefaidrin, -Meetei_Mayek, -Mende_Kikakui, -Meroitic_Cursive, -Meroitic_Hieroglyphs, -Miao, -Modi, -Mongolian, -Mro, -Multani, -Myanmar, -Nabataean, -Nandinagari, -New_Tai_Lue, -Newa, -Nko, -Nushu, -Nyakeng_Puachue_Hmong, -Ogham, -Ol_Chiki, -Old_Hungarian, -Old_Italic, -Old_North_Arabian, -Old_Permic, -Old_Persian, -Old_Sogdian, -Old_South_Arabian, -Old_Turkic, -Old_Uyghur, -Oriya, -Osage, -Osmanya, -Pahawh_Hmong, -Palmyrene, -Pau_Cin_Hau, -Phags_Pa, -Phoenician,
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2serialize.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2serialize.html
Changed
@@ -23,12 +23,12 @@ <br><a name="SEC1" href="#TOC1">SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS</a><br> <P> <b>int32_t pcre2_serialize_decode(pcre2_code **<i>codes</i>,</b> -<b> int32_t <i>number_of_codes</i>, const uint32_t *<i>bytes</i>,</b> +<b> int32_t <i>number_of_codes</i>, const uint8_t *<i>bytes</i>,</b> <b> pcre2_general_context *<i>gcontext</i>);</b> <br> <br> -<b>int32_t pcre2_serialize_encode(pcre2_code **<i>codes</i>,</b> -<b> int32_t <i>number_of_codes</i>, uint32_t **<i>serialized_bytes</i>,</b> +<b>int32_t pcre2_serialize_encode(const pcre2_code **<i>codes</i>,</b> +<b> int32_t <i>number_of_codes</i>, uint8_t **<i>serialized_bytes</i>,</b> <b> PCRE2_SIZE *<i>serialized_size</i>, pcre2_general_context *<i>gcontext</i>);</b> <br> <br> @@ -154,7 +154,6 @@ <b>malloc()</b> and <b>free()</b> are used. After deserialization, the byte stream is no longer needed and can be discarded. <pre> - int32_t number_of_codes; pcre2_code *list_of_codes2; uint8_t *bytes = <serialized data>; int32_t number_of_codes =
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2syntax.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2syntax.html
Changed
@@ -19,29 +19,31 @@ <li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a> <li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTIES FOR \p and \P</a> <li><a name="TOC6" href="#SEC6">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a> -<li><a name="TOC7" href="#SEC7">SCRIPT NAMES FOR \p AND \P</a> -<li><a name="TOC8" href="#SEC8">CHARACTER CLASSES</a> -<li><a name="TOC9" href="#SEC9">QUANTIFIERS</a> -<li><a name="TOC10" href="#SEC10">ANCHORS AND SIMPLE ASSERTIONS</a> -<li><a name="TOC11" href="#SEC11">REPORTED MATCH POINT SETTING</a> -<li><a name="TOC12" href="#SEC12">ALTERNATION</a> -<li><a name="TOC13" href="#SEC13">CAPTURING</a> -<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a> -<li><a name="TOC15" href="#SEC15">COMMENT</a> -<li><a name="TOC16" href="#SEC16">OPTION SETTING</a> -<li><a name="TOC17" href="#SEC17">NEWLINE CONVENTION</a> -<li><a name="TOC18" href="#SEC18">WHAT \R MATCHES</a> -<li><a name="TOC19" href="#SEC19">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a> -<li><a name="TOC20" href="#SEC20">NON-ATOMIC LOOKAROUND ASSERTIONS</a> -<li><a name="TOC21" href="#SEC21">SCRIPT RUNS</a> -<li><a name="TOC22" href="#SEC22">BACKREFERENCES</a> -<li><a name="TOC23" href="#SEC23">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a> -<li><a name="TOC24" href="#SEC24">CONDITIONAL PATTERNS</a> -<li><a name="TOC25" href="#SEC25">BACKTRACKING CONTROL</a> -<li><a name="TOC26" href="#SEC26">CALLOUTS</a> -<li><a name="TOC27" href="#SEC27">SEE ALSO</a> -<li><a name="TOC28" href="#SEC28">AUTHOR</a> -<li><a name="TOC29" href="#SEC29">REVISION</a> +<li><a name="TOC7" href="#SEC7">BINARY PROPERTIES FOR \p AND \P</a> +<li><a name="TOC8" href="#SEC8">SCRIPT MATCHING WITH \p AND \P</a> +<li><a name="TOC9" href="#SEC9">THE BIDI_CLASS PROPERTY FOR \p AND \P</a> +<li><a name="TOC10" href="#SEC10">CHARACTER CLASSES</a> +<li><a name="TOC11" href="#SEC11">QUANTIFIERS</a> +<li><a name="TOC12" href="#SEC12">ANCHORS AND SIMPLE ASSERTIONS</a> +<li><a name="TOC13" href="#SEC13">REPORTED MATCH POINT SETTING</a> +<li><a name="TOC14" href="#SEC14">ALTERNATION</a> +<li><a name="TOC15" href="#SEC15">CAPTURING</a> +<li><a name="TOC16" href="#SEC16">ATOMIC GROUPS</a> +<li><a name="TOC17" href="#SEC17">COMMENT</a> +<li><a name="TOC18" href="#SEC18">OPTION SETTING</a> +<li><a name="TOC19" href="#SEC19">NEWLINE CONVENTION</a> +<li><a name="TOC20" href="#SEC20">WHAT \R MATCHES</a> +<li><a name="TOC21" href="#SEC21">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a> +<li><a name="TOC22" href="#SEC22">NON-ATOMIC LOOKAROUND ASSERTIONS</a> +<li><a name="TOC23" href="#SEC23">SCRIPT RUNS</a> +<li><a name="TOC24" href="#SEC24">BACKREFERENCES</a> +<li><a name="TOC25" href="#SEC25">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a> +<li><a name="TOC26" href="#SEC26">CONDITIONAL PATTERNS</a> +<li><a name="TOC27" href="#SEC27">BACKTRACKING CONTROL</a> +<li><a name="TOC28" href="#SEC28">CALLOUTS</a> +<li><a name="TOC29" href="#SEC29">SEE ALSO</a> +<li><a name="TOC30" href="#SEC30">AUTHOR</a> +<li><a name="TOC31" href="#SEC31">REVISION</a> </ul> <br><a name="SEC1" href="#TOC1">PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY</a><br> <P> @@ -136,6 +138,11 @@ sequences is changed to use Unicode properties and they match many more characters. </P> +<P> +Property descriptions in \p and \P are matched caselessly; hyphens, +underscores, and white space are ignored, in accordance with Unicode's "loose +matching" rules. +</P> <br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br> <P> <pre> @@ -152,6 +159,7 @@ Lo Other letter Lt Title case letter Lu Upper case letter + Lc Ll, Lu, or Lt L& Ll, Lu, or Lt M Mark @@ -198,171 +206,58 @@ Perl and POSIX space are now the same. Perl added VT to its space character set at release 5.18. </P> -<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br> -<P> -Adlam, -Ahom, -Anatolian_Hieroglyphs, -Arabic, -Armenian, -Avestan, -Balinese, -Bamum, -Bassa_Vah, -Batak, -Bengali, -Bhaiksuki, -Bopomofo, -Brahmi, -Braille, -Buginese, -Buhid, -Canadian_Aboriginal, -Carian, -Caucasian_Albanian, -Chakma, -Cham, -Cherokee, -Chorasmian, -Common, -Coptic, -Cuneiform, -Cypriot, -Cypro_Minoan, -Cyrillic, -Deseret, -Devanagari, -Dives_Akuru, -Dogra, -Duployan, -Egyptian_Hieroglyphs, -Elbasan, -Elymaic, -Ethiopic, -Georgian, -Glagolitic, -Gothic, -Grantha, -Greek, -Gujarati, -Gunjala_Gondi, -Gurmukhi, -Han, -Hangul, -Hanifi_Rohingya, -Hanunoo, -Hatran, -Hebrew, -Hiragana, -Imperial_Aramaic, -Inherited, -Inscriptional_Pahlavi, -Inscriptional_Parthian, -Javanese, -Kaithi, -Kannada, -Katakana, -Kayah_Li, -Kharoshthi, -Khitan_Small_Script, -Khmer, -Khojki, -Khudawadi, -Lao, -Latin, -Lepcha, -Limbu, -Linear_A, -Linear_B, -Lisu, -Lycian, -Lydian, -Mahajani, -Makasar, -Malayalam, -Mandaic, -Manichaean, -Marchen, -Masaram_Gondi, -Medefaidrin, -Meetei_Mayek, -Mende_Kikakui, -Meroitic_Cursive, -Meroitic_Hieroglyphs, -Miao, -Modi, -Mongolian, -Mro, -Multani, -Myanmar, -Nabataean, -Nandinagari, -New_Tai_Lue, -Newa, -Nko, -Nushu, -Nyakeng_Puachue_Hmong, -Ogham, -Ol_Chiki, -Old_Hungarian, -Old_Italic, -Old_North_Arabian, -Old_Permic, -Old_Persian, -Old_Sogdian, -Old_South_Arabian, -Old_Turkic, -Old_Uyghur, -Oriya, -Osage, -Osmanya, -Pahawh_Hmong, -Palmyrene, -Pau_Cin_Hau,
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2test.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2test.html
Changed
@@ -78,7 +78,7 @@ </P> <P> In the rest of this document, the names of library functions and structures -are given in generic form, for example, <b>pcre_compile()</b>. The actual +are given in generic form, for example, <b>pcre2_compile()</b>. The actual names used in the libraries have a suffix _8, _16, or _32, as appropriate. <a name="inputencoding"></a></P> <br><a name="SEC3" href="#TOC1">INPUT ENCODING</a><br> @@ -253,7 +253,19 @@ <b>-LM</b> List modifiers: write a list of available pattern and subject modifiers to the standard output, then exit with zero exit code. All other options are ignored. -If both -C and -LM are present, whichever is first is recognized. +If both -C and any -Lx options are present, whichever is first is recognized. +</P> +<P> +<b>-LP</b> +List properties: write a list of recognized Unicode properties to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +</P> +<P> +<b>-LS</b> +List scripts: write a list of recogized Unicode script names to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. </P> <P> <b>-pattern</b> <i>modifier-list</i> @@ -1239,6 +1251,8 @@ match_limit=<n> set a match limit memory show heap memory usage null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject offset=<n> set starting offset offset_limit=<n> set offset limit ovector=<n> set size of output vector @@ -1668,7 +1682,7 @@ passing the replacement string as zero-terminated. </P> <br><b> -Passing a NULL context +Passing a NULL context, subject, or replacement </b><br> <P> Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>, @@ -1678,6 +1692,11 @@ case (they use default values). This modifier cannot be used with the <b>find_limits</b> or <b>substitute_callout</b> modifiers. </P> +<P> +Similarly, for testing purposes, if the <b>null_subject</b> or +<b>null_replacement</b> modifier is set, the subject or replacement string +pointers are passed as NULL, respectively, to the relevant functions. +</P> <br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br> <P> By default, <b>pcre2test</b> uses the standard PCRE2 matching function, @@ -2122,9 +2141,9 @@ </P> <br><a name="SEC21" href="#TOC1">REVISION</a><br> <P> -Last updated: 30 August 2021 +Last updated: 12 January 2022 <br> -Copyright © 1997-2021 University of Cambridge. +Copyright © 1997-2022 University of Cambridge. <br> <p> Return to the <a href="index.html">PCRE2 index page</a>.
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/html/pcre2unicode.html -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/html/pcre2unicode.html
Changed
@@ -50,17 +50,18 @@ <P> When PCRE2 is built with Unicode support, the escape sequences \p{..}, \P{..}, and \X can be used. This is not dependent on the PCRE2_UTF setting. -The Unicode properties that can be tested are limited to the general category -properties such as Lu for an upper case letter or Nd for a decimal number, the -Unicode script names such as Arabic or Han, and the derived properties Any and -L&. Full lists are given in the +The Unicode properties that can be tested are a subset of those that Perl +supports. Currently they are limited to the general category properties such as +Lu for an upper case letter or Nd for a decimal number, the Unicode script +names such as Arabic or Han, Bidi_Class, Bidi_Control, and the derived +properties Any and LC (synonym L&). Full lists are given in the <a href="pcre2pattern.html"><b>pcre2pattern</b></a> and <a href="pcre2syntax.html"><b>pcre2syntax</b></a> -documentation. Only the short names for properties are supported. For example, -\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported. -Furthermore, in Perl, many properties may optionally be prefixed by "Is", for -compatibility with Perl 5.6. PCRE2 does not support this. +documentation. In general, only the short names for properties are supported. +For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is not +supported. Furthermore, in Perl, many properties may optionally be prefixed by +"Is", for compatibility with Perl 5.6. PCRE2 does not support this. </P> <br><b> WIDE CHARACTERS AND UTF MODES @@ -477,7 +478,7 @@ <P> Philip Hazel <br> -University Computing Service +Retired from University Computing Service <br> Cambridge, England. <br> @@ -486,9 +487,9 @@ REVISION </b><br> <P> -Last updated: 23 February 2020 +Last updated: 22 December 2021 <br> -Copyright © 1997-2020 University of Cambridge. +Copyright © 1997-2021 University of Cambridge. <br> <p> Return to the <a href="index.html">PCRE2 index page</a>.
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2.txt -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2.txt
Changed
@@ -1815,7 +1815,7 @@ to crash or loop. Note that this option can also be passed to pcre2_match() and - pcre_dfa_match(), to suppress UTF validity checking of the subject + pcre2_dfa_match(), to suppress UTF validity checking of the subject string. Note also that setting PCRE2_NO_UTF_CHECK at compile time does not dis- @@ -2012,13 +2012,13 @@ code points are less than 256. By default, higher-valued code points never match escapes such as \w or \d. - When PCRE2 is built with Unicode support (the default), the Unicode - properties of all characters can be tested with \p and \P, or, alterna- - tively, the PCRE2_UCP option can be set when a pattern is compiled; - this causes \w and friends to use Unicode property support instead of - the built-in tables. PCRE2_UCP also causes upper/lower casing opera- - tions on characters with code points greater than 127 to use Unicode - properties. These effects apply even when PCRE2_UTF is not set. + When PCRE2 is built with Unicode support (the default), certain Unicode + character properties can be tested with \p and \P, or, alternatively, + the PCRE2_UCP option can be set when a pattern is compiled; this causes + \w and friends to use Unicode property support instead of the built-in + tables. PCRE2_UCP also causes upper/lower casing operations on charac- + ters with code points greater than 127 to use Unicode properties. These + effects apply even when PCRE2_UTF is not set. The use of locales with Unicode is discouraged. If you are handling characters with code points greater than 127, you should either use @@ -2579,7 +2579,9 @@ and offset are in code units, not characters. That is, they are in bytes for the 8-bit library, 16-bit code units for the 16-bit library, and 32-bit code units for the 32-bit library, whether or not UTF pro- - cessing is enabled. + cessing is enabled. As a special case, if subject is NULL and length is + zero, the subject is assumed to be an empty string. If length is non- + zero, an error occurs if subject is NULL. If startoffset is greater than the length of the subject, pcre2_match() returns PCRE2_ERROR_BADOFFSET. When the starting offset is zero, the @@ -3280,8 +3282,12 @@ This function optionally calls pcre2_match() and then makes a copy of the subject string in outputbuffer, replacing parts that were matched - with the replacement string, whose length is supplied in rlength. This - can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. + with the replacement string, whose length is supplied in rlength, which + can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As + a special case, if replacement is NULL and rlength is zero, the re- + placement is assumed to be an empty string. If rlength is non-zero, an + error occurs if replacement is NULL. + There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to re- turn just the replacement string(s). The default action is to perform just one replacement if the pattern matches, but there is an option @@ -3315,89 +3321,90 @@ As well as the usual options for pcre2_match(), a number of additional options can be set in the options argument of pcre2_substitute(). One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external - match_data block must be provided, and it must have been used for an - external call to pcre2_match(). The data in the match_data block (re- - turn code, offset vector) is used for the first substitution instead of - calling pcre2_match() from within pcre2_substitute(). This allows an - application to check for a match before choosing to substitute, without - having to repeat the match. - - The contents of the externally supplied match data block are not - changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI- - TUTE_GLOBAL is also set, pcre2_match() is called after the first sub- - stitution to check for further matches, but this is done using an in- - ternally obtained match data block, thus always leaving the external + match_data block must be provided, and it must have already been used + for an external call to pcre2_match() with the same pattern and subject + arguments. The data in the match_data block (return code, offset vec- + tor) is then used for the first substitution instead of calling + pcre2_match() from within pcre2_substitute(). This allows an applica- + tion to check for a match before choosing to substitute, without having + to repeat the match. + + The contents of the externally supplied match data block are not + changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI- + TUTE_GLOBAL is also set, pcre2_match() is called after the first sub- + stitution to check for further matches, but this is done using an in- + ternally obtained match data block, thus always leaving the external block unchanged. - The code argument is not used for matching before the first substitu- - tion when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, - even when PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains in- + The code argument is not used for matching before the first substitu- + tion when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, + even when PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains in- formation such as the UTF setting and the number of capturing parenthe- ses in the pattern. - The default action of pcre2_substitute() is to return a copy of the + The default action of pcre2_substitute() is to return a copy of the subject string with matched substrings replaced. However, if PCRE2_SUB- - STITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are + STITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are returned. In the global case, multiple replacements are concatenated in - the output buffer. Substitution callouts (see below) can be used to + the output buffer. Substitution callouts (see below) can be used to separate them if necessary. - The outlengthptr argument of pcre2_substitute() must point to a vari- - able that contains the length, in code units, of the output buffer. If - the function is successful, the value is updated to contain the length - in code units of the new string, excluding the trailing zero that is + The outlengthptr argument of pcre2_substitute() must point to a vari- + able that contains the length, in code units, of the output buffer. If + the function is successful, the value is updated to contain the length + in code units of the new string, excluding the trailing zero that is automatically added. - If the function is not successful, the value set via outlengthptr de- - pends on the type of error. For syntax errors in the replacement + If the function is not successful, the value set via outlengthptr de- + pends on the type of error. For syntax errors in the replacement string, the value is the offset in the replacement string where the er- - ror was detected. For other errors, the value is PCRE2_UNSET by de- + ror was detected. For other errors, the value is PCRE2_UNSET by de- fault. This includes the case of the output buffer being too small, un- less PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set. - PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is too small. The default action is to return PCRE2_ERROR_NOMEM- - ORY immediately. If this option is set, however, pcre2_substitute() + ORY immediately. If this option is set, however, pcre2_substitute() continues to go through the motions of matching and substituting (with- - out, of course, writing anything) in order to compute the size of buf- - fer that is needed. This value is passed back via the outlengthptr - variable, with the result of the function still being PCRE2_ER- + out, of course, writing anything) in order to compute the size of buf- + fer that is needed. This value is passed back via the outlengthptr + variable, with the result of the function still being PCRE2_ER- ROR_NOMEMORY. - Passing a buffer size of zero is a permitted way of finding out how - much memory is needed for given substitution. However, this does mean + Passing a buffer size of zero is a permitted way of finding out how + much memory is needed for given substitution. However, this does mean that the entire operation is carried out twice. Depending on the appli- - cation, it may be more efficient to allocate a large buffer and free - the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER- + cation, it may be more efficient to allocate a large buffer and free + the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER- FLOW_LENGTH. - The replacement string, which is interpreted as a UTF string in UTF - mode, is checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An + The replacement string, which is interpreted as a UTF string in UTF + mode, is checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF replacement string causes an immediate return with the rel- evant UTF error code. - If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in- + If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in- terpreted in any way. By default, however, a dollar character is an es- - cape character that can specify the insertion of characters from cap- - ture groups and names from (*MARK) or other control verbs in the pat- + cape character that can specify the insertion of characters from cap- + ture groups and names from (*MARK) or other control verbs in the pat- tern. The following forms are always recognized: $$ insert a dollar character $<n> or ${<n>} insert the contents of group <n> $*MARK or ${*MARK} insert a control verb name - Either a group number or a group name can be given for <n>. Curly - brackets are required only if the following character would be inter- + Either a group number or a group name can be given for <n>. Curly + brackets are required only if the following character would be inter- preted as part of the number or name. The number may be zero to include - the entire matched string. For example, if the pattern a(b)c is - matched with "=abc=" and the replacement string "+$1$0$1+", the result + the entire matched string. For example, if the pattern a(b)c is + matched with "=abc=" and the replacement string "+$1$0$1+", the result is "=+babcb+=". - $*MARK inserts the name from the last encountered backtracking control - verb on the matching path that has a name. (*MARK) must always include - a name, but the other verbs need not. For example, in the case of + $*MARK inserts the name from the last encountered backtracking control + verb on the matching path that has a name. (*MARK) must always include + a name, but the other verbs need not. For example, in the case of (*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B) - the relevant name is "B". This facility can be used to perform simple + the relevant name is "B". This facility can be used to perform simple simultaneous substitutions, as this pcre2test example shows: /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} @@ -3405,15 +3412,15 @@ 2: pear orange PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject - string, replacing every matching substring. If this option is not set, - only the first matching substring is replaced. The search for matches
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2_jit_stack_create.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2_jit_stack_create.3
Changed
@@ -22,7 +22,8 @@ \fBpcre2_jit_stack_assign()\fP to associate the stack with a compiled pattern, which can then be processed by \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP. A maximum stack size of 512KiB to 1MiB should be more than enough for any -pattern. For more details, see the +pattern. If the stack couldn't be allocated or the values passed were not +reasonable, NULL will be returned. For more details, see the .\" HREF \fBpcre2jit\fP .\"
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2_set_compile_extra_options.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2_set_compile_extra_options.3
Changed
@@ -18,9 +18,9 @@ housed in a compile context. It completely replaces all the bits. The extra options are: .sp -.\" JOIN PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK Allow \eK in lookarounds - PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \ex{df800} to \ex{dfff} +.\" JOIN + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \ex{d800} to \ex{dfff} in UTF-8 and UTF-32 modes .\" JOIN PCRE2_EXTRA_ALT_BSUX Extended alternate \eu, \eU, and
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2_substitute.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2_substitute.3
Changed
@@ -55,32 +55,42 @@ The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for zero-terminated strings. The options are: .sp - PCRE2_ANCHORED Match only at the first position - PCRE2_ENDANCHORED Pattern can match only at end of subject - PCRE2_NOTBOL Subject is not the beginning of a line - PCRE2_NOTEOL Subject is not the end of a line - PCRE2_NOTEMPTY An empty string is not a valid match + PCRE2_ANCHORED Match only at the first position + PCRE2_ENDANCHORED Match only at end of subject .\" JOIN - PCRE2_NOTEMPTY_ATSTART An empty string at the start of the - subject is not a valid match - PCRE2_NO_JIT Do not use JIT matching + PCRE2_NOTBOL Subject is not the beginning of a + line + PCRE2_NOTEOL Subject is not the end of a line .\" JOIN - PCRE2_NO_UTF_CHECK Do not check the subject or replacement - for UTF validity (only relevant if - PCRE2_UTF was set at compile time) - PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing - PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject - PCRE2_SUBSTITUTE_LITERAL The replacement string is literal - PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for 1st match - PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length + PCRE2_NOTEMPTY An empty string is not a + valid match +.\" JOIN + PCRE2_NOTEMPTY_ATSTART An empty string at the start of + the subject is not a valid match + PCRE2_NO_JIT Do not use JIT matching +.\" JOIN + PCRE2_NO_UTF_CHECK Do not check for UTF validity in + the subject or replacement +.\" JOIN + (only relevant if PCRE2_UTF was + set at compile time) + PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing +.\" JOIN + PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the + subject + PCRE2_SUBSTITUTE_LITERAL The replacement string is literal +.\" JOIN + PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for + first match + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s) - PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset - PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string + PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset + PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string .sp If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored. .P -If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-zero; its +If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-NULL; its contents must be the result of a call to \fBpcre2_match()\fP using the same pattern and subject. .P
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2api.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2api.3
Changed
@@ -1,4 +1,4 @@ -.TH PCRE2API 3 "30 August 2021" "PCRE2 10.38" +.TH PCRE2API 3 "14 December 2021" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -1794,7 +1794,7 @@ undefined. It may cause your program to crash or loop. .P Note that this option can also be passed to \fBpcre2_match()\fP and -\fBpcre_dfa_match()\fP, to suppress UTF validity checking of the subject +\fBpcre2_dfa_match()\fP, to suppress UTF validity checking of the subject string. .P Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the @@ -2015,8 +2015,8 @@ 256. By default, higher-valued code points never match escapes such as \ew or \ed. .P -When PCRE2 is built with Unicode support (the default), the Unicode properties -of all characters can be tested with \ep and \eP, or, alternatively, the +When PCRE2 is built with Unicode support (the default), certain Unicode +character properties can be tested with \ep and \eP, or, alternatively, the PCRE2_UCP option can be set when a pattern is compiled; this causes \ew and friends to use Unicode property support instead of the built-in tables. PCRE2_UCP also causes upper/lower casing operations on characters with code @@ -2279,7 +2279,7 @@ PCRE2_INFO_LASTCODETYPE .sp Returns 1 if there is a rightmost literal code unit that must exist in any -matched string, other than at its start. The third argument should point to a +matched string, other than at its start. The third argument should point to a \fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is returned, the code unit value itself can be retrieved using PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is @@ -2624,7 +2624,9 @@ \fIstartoffset\fP. The length and offset are in code units, not characters. That is, they are in bytes for the 8-bit library, 16-bit code units for the 16-bit library, and 32-bit code units for the 32-bit library, whether or not -UTF processing is enabled. +UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and +\fIlength\fP is zero, the subject is assumed to be an empty string. If +\fIlength\fP is non-zero, an error occurs if \fIsubject\fP is NULL. .P If \fIstartoffset\fP is greater than the length of the subject, \fBpcre2_match()\fP returns PCRE2_ERROR_BADOFFSET. When the starting offset is @@ -3413,12 +3415,16 @@ .P This function optionally calls \fBpcre2_match()\fP and then makes a copy of the subject string in \fIoutputbuffer\fP, replacing parts that were matched with -the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This -can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an -option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the -replacement string(s). The default action is to perform just one replacement if -the pattern matches, but there is an option that requests multiple replacements -(see PCRE2_SUBSTITUTE_GLOBAL below). +the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP, which +can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a +special case, if \fIreplacement\fP is NULL and \fIrlength\fP is zero, the +replacement is assumed to be an empty string. If \fIrlength\fP is non-zero, an +error occurs if \fIreplacement\fP is NULL. +.P +There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just +the replacement string(s). The default action is to perform just one +replacement if the pattern matches, but there is an option that requests +multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below). .P If successful, \fBpcre2_substitute()\fP returns the number of substitutions that were carried out. This may be zero if no match was found, and is never @@ -3447,12 +3453,12 @@ As well as the usual options for \fBpcre2_match()\fP, a number of additional options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP. One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external -\fImatch_data\fP block must be provided, and it must have been used for an -external call to \fBpcre2_match()\fP. The data in the \fImatch_data\fP block -(return code, offset vector) is used for the first substitution instead of -calling \fBpcre2_match()\fP from within \fBpcre2_substitute()\fP. This allows -an application to check for a match before choosing to substitute, without -having to repeat the match. +\fImatch_data\fP block must be provided, and it must have already been used for +an external call to \fBpcre2_match()\fP with the same pattern and subject +arguments. The data in the \fImatch_data\fP block (return code, offset vector) +is then used for the first substitution instead of calling \fBpcre2_match()\fP +from within \fBpcre2_substitute()\fP. This allows an application to check for a +match before choosing to substitute, without having to repeat the match. .P The contents of the externally supplied match data block are not changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set, @@ -3584,7 +3590,7 @@ terminating a \eQ quoted sequence) reverts to no case forcing. The sequences \eu and \el force the next character (if it is a letter) to upper or lower case, respectively, and then the state automatically reverts to no case -forcing. Case forcing applies to all inserted characters, including those from +forcing. Case forcing applies to all inserted characters, including those from capture groups and letters within \eQ...\eE quoted sequences. If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode properties are used for case forcing characters whose code points are greater @@ -3649,7 +3655,9 @@ default. .P PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the -\fImatch_data\fP argument is NULL. +\fImatch_data\fP argument is NULL or if the \fIsubject\fP or \fIreplacement\fP +arguments are NULL. For backward compatibility reasons an exception is made for +the \fIreplacement\fP argument if the \fIrlength\fP argument is also 0. .P PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE @@ -3811,12 +3819,13 @@ .P The function \fBpcre2_dfa_match()\fP is called to match a subject string against a compiled pattern, using a matching algorithm that scans the subject -string just once (not counting lookaround assertions), and does not backtrack. -This has different characteristics to the normal algorithm, and is not -compatible with Perl. Some of the features of PCRE2 patterns are not supported. -Nevertheless, there are times when this kind of matching can be useful. For a -discussion of the two matching algorithms, and a list of features that -\fBpcre2_dfa_match()\fP does not support, see the +string just once (not counting lookaround assertions), and does not backtrack +(except when processing lookaround assertions). This has different +characteristics to the normal algorithm, and is not compatible with Perl. Some +of the features of PCRE2 patterns are not supported. Nevertheless, there are +times when this kind of matching can be useful. For a discussion of the two +matching algorithms, and a list of features that \fBpcre2_dfa_match()\fP does +not support, see the .\" HREF \fBpcre2matching\fP .\" @@ -3848,7 +3857,7 @@ wspace, /* working space vector */ 20); /* number of elements (NOT size in bytes) */ . -.SS "Option bits for \fBpcre_dfa_match()\fP" +.SS "Option bits for \fBpcre2_dfa_match()\fP" .rs .sp The unused bits of the \fIoptions\fP argument for \fBpcre2_dfa_match()\fP must @@ -4016,6 +4025,6 @@ .rs .sp .nf -Last updated: 30 August 2021 +Last updated: 14 December 2021 Copyright (c) 1997-2021 University of Cambridge. .fi
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2build.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2build.3
Changed
@@ -1,4 +1,4 @@ -.TH PCRE2BUILD 3 "20 March 2020" "PCRE2 10.35" +.TH PCRE2BUILD 3 "08 December 2021" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) . @@ -122,8 +122,9 @@ UTF support allows the libraries to process character code points up to 0x10ffff in the strings that they handle. Unicode support also gives access to the Unicode properties of characters, using pattern escapes such as \eP, \ep, -and \eX. Only the general category properties such as \fILu\fP and \fINd\fP are -supported. Details are given in the +and \eX. Only the general category properties such as \fILu\fP and \fINd\fP, +script names, and some bi-directional properties are supported. Details are +given in the .\" HREF \fBpcre2pattern\fP .\" @@ -302,7 +303,7 @@ for --with-match-limit. You can set a lower default limit by adding, for example, .sp - --with-match-limit_depth=10000 + --with-match-limit-depth=10000 .sp to the \fBconfigure\fP command. This value can be overridden at run time. This depth limit indirectly limits the amount of heap memory that is used, but @@ -633,6 +634,6 @@ .rs .sp .nf -Last updated: 20 March 2020 -Copyright (c) 1997-2020 University of Cambridge. +Last updated: 08 December 2021 +Copyright (c) 1997-2021 University of Cambridge. .fi
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2compat.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2compat.3
Changed
@@ -1,4 +1,4 @@ -.TH PCRE2COMPAT 3 "30 August 2021" "PCRE2 10.38" +.TH PCRE2COMPAT 3 "08 December 2021" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "DIFFERENCES BETWEEN PCRE2 AND PERL" @@ -6,31 +6,38 @@ .sp This document describes some of the differences in the ways that PCRE2 and Perl handle regular expressions. The differences described here are with respect to -Perl version 5.32.0, but as both Perl and PCRE2 are continually changing, the +Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the information may at times be out of date. .P -1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does +1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the +behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the +next character unless it is the start of a newline sequence. This means that, +if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF +(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using +EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline +indicator. +.P +2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does have are given in the .\" HREF \fBpcre2unicode\fP .\" page. .P -2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but +3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but they do not mean what you might think. For example, (?!a){3} does not assert that the next three characters are not "a". It just asserts that the next character is not "a" three times (in principle; PCRE2 optimizes this to run the assertion just once). Perl allows some repeat quantifiers on other assertions, -for example, \eb* (but not \eb{3}, though oddly it does allow ^{3}), but these -do not seem to have any use. PCRE2 does not allow any kind of quantifier on -non-lookaround assertions. +for example, \eb* , but these do not seem to have any use. PCRE2 does not allow +any kind of quantifier on non-lookaround assertions. .P -3. Capture groups that occur inside negative lookaround assertions are counted, +4. Capture groups that occur inside negative lookaround assertions are counted, but their entries in the offsets vector are set only when a negative assertion is a condition that has a matching branch (that is, the condition is false). Perl may set such capture groups in other circumstances. .P -4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu, +5. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu, \eU, and \eN when followed by a character name. \eN on its own, matching a non-newline character, and \eN{U+dd..}, matching a Unicode code point, are supported. The escapes that modify the case of following letters are @@ -40,12 +47,12 @@ PCRE2_EXTRA_ALT_BSUX options is set, \eU and \eu are interpreted as ECMAScript interprets them. .P -5. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is +6. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is built with Unicode support (the default). The properties that can be tested with \ep and \eP are limited to the general category properties such as Lu and -Nd, script names such as Greek or Han, and the derived properties Any and L&. -Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use -is limited. See the +Nd, script names such as Greek or Han, Bidi_Class, Bidi_Control, and the +derived properties Any and LC (synonym L&). Both PCRE2 and Perl support the Cs +(surrogate) property, but in PCRE2 its use is limited. See the .\" HREF \fBpcre2pattern\fP .\" @@ -53,14 +60,14 @@ supports (such as \ep{Letter}) are not supported by PCRE2, nor is it permitted to prefix any of these properties with "Is". .P -6. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters +7. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters in between are treated as literals. However, this is slightly different from Perl in that $ and @ are also handled as literals inside the quotes. In Perl, -they cause variable interpolation (but of course PCRE2 does not have -variables). Also, Perl does "double-quotish backslash interpolation" on any -backslashes between \eQ and \eE which, its documentation says, "may lead to -confusing results". PCRE2 treats a backslash between \eQ and \eE just like any -other character. Note the following examples: +they cause variable interpolation (PCRE2 does not have variables). Also, Perl +does "double-quotish backslash interpolation" on any backslashes between \eQ +and \eE which, its documentation says, "may lead to confusing results". PCRE2 +treats a backslash between \eQ and \eE just like any other character. Note the +following examples: .sp Pattern PCRE2 matches Perl matches .sp @@ -75,7 +82,7 @@ The \eQ...\eE sequence is recognized both inside and outside character classes by both PCRE2 and Perl. .P -7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) +8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) constructions. However, PCRE2 does have a "callout" feature, which allows an external function to be called during pattern matching. See the .\" HREF @@ -83,11 +90,11 @@ .\" documentation for details. .P -8. Subroutine calls (whether recursive or not) were treated as atomic groups up +9. Subroutine calls (whether recursive or not) were treated as atomic groups up to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking into subroutine calls is now supported, as in Perl. .P -9. In PCRE2, if any of the backtracking control verbs are used in a group that +10. In PCRE2, if any of the backtracking control verbs are used in a group that is called as a subroutine (whether or not recursively), their effect is confined to that group; it does not extend to the surrounding pattern. This is not always the case in Perl. In particular, if (*THEN) is present in a group @@ -95,18 +102,18 @@ the group does not contain any | characters. Note that such groups are processed as anchored at the point where they are tested. .P -10. If a pattern contains more than one backtracking control verb, the first +11. If a pattern contains more than one backtracking control verb, the first one that is backtracked onto acts. For example, in the pattern A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the same as PCRE2, but there are cases where it differs. .P -11. There are some differences that are concerned with the settings of captured +12. There are some differences that are concerned with the settings of captured strings when part of a pattern is repeated. For example, matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to "b". .P -12. PCRE2's handling of duplicate capture group numbers and names is not as +13. PCRE2's handling of duplicate capture group numbers and names is not as general as Perl's. This is a consequence of the fact the PCRE2 works internally just with numbers, using an external table to translate between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b>B)), where the two @@ -115,37 +122,38 @@ to distinguish which group matched, because both names map to capture group number 1. To avoid this confusing situation, an error is given at compile time. .P -13. Perl used to recognize comments in some places that PCRE2 does not, for +14. Perl used to recognize comments in some places that PCRE2 does not, for example, between the ( and ? at the start of a group. If the /x modifier is set, Perl allowed white space between ( and ? though the latest Perls give an error (for a while it was just deprecated). There may still be some cases where Perl behaves differently. .P -14. Perl, when in warning mode, gives warnings for character classes such as +15. Perl, when in warning mode, gives warnings for character classes such as A-\ed or a-:digit:. It then treats the hyphens as literals. PCRE2 has no warning features, so it gives an error in these cases because they are almost certainly user mistakes. .P -15. In PCRE2, the upper/lower case character properties Lu and Ll are not +16. In PCRE2, the upper/lower case character properties Lu and Ll are not affected when case-independent matching is specified. For example, \ep{Lu} always matches an upper case letter. I think Perl has changed in this respect; -in the release at the time of writing (5.32), \ep{Lu} and \ep{Ll} match all +in the release at the time of writing (5.34), \ep{Lu} and \ep{Ll} match all letters, regardless of case, when case independence is specified. .P -16. From release 5.32.0, Perl locks out the use of \eK in lookaround +17. From release 5.32.0, Perl locks out the use of \eK in lookaround assertions. From release 10.38 PCRE2 does the same by default. However, there is an option for re-enabling the previous behaviour. When this option is set, \eK is acted on when it occurs in positive assertions, but is ignored in negative assertions. .P -17. PCRE2 provides some extensions to the Perl regular expression facilities. +18. PCRE2 provides some extensions to the Perl regular expression facilities. Perl 5.10 included new features that were not in earlier versions of Perl, some of which (such as named parentheses) were in PCRE2 for some time before. This -list is with respect to Perl 5.32: +list is with respect to Perl 5.34: .sp (a) Although lookbehind assertions in PCRE2 must match fixed length strings, each alternative toplevel branch of a lookbehind assertion can match a -different length of string. Perl requires them all to have the same length. +different length of string. Perl used to require them all to have the same +length, but the latest version has some variable length support. .sp (b) From PCRE2 10.23, backreferences to groups of fixed length are supported in lookbehinds, provided that there is no possibility of referencing a @@ -186,11 +194,11 @@ extension to the lookaround facilities. The default, Perl-compatible lookarounds are atomic. .P -18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa +19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode rules. This separation cannot be represented with PCRE2_UCP. .P -19. Perl has different limits than PCRE2. See the +20. Perl has different limits than PCRE2. See the .\" HREF \fBpcre2limit\fP .\" @@ -214,6 +222,6 @@ .rs .sp
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2jit.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2jit.3
Changed
@@ -1,4 +1,4 @@ -.TH PCRE2JIT 3 "23 May 2019" "PCRE2 10.34" +.TH PCRE2JIT 3 "30 November 2021" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT" @@ -251,11 +251,11 @@ starts another match, that match must use a different JIT stack to the one used for currently suspended match(es). .P -In a multithread application, if you do not -specify a JIT stack, or if you assign or pass back NULL from a callback, that -is thread-safe, because each thread has its own machine stack. However, if you -assign or pass back a non-NULL JIT stack, this must be a different stack for -each thread so that the application is thread-safe. +In a multithread application, if you do not specify a JIT stack, or if you +assign or pass back NULL from a callback, that is thread-safe, because each +thread has its own machine stack. However, if you assign or pass back a +non-NULL JIT stack, this must be a different stack for each thread so that the +application is thread-safe. .P Strictly speaking, even more is allowed. You can assign the same non-NULL stack to a match context that is used by any number of patterns, as long as they are @@ -355,8 +355,8 @@ .B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); .fi .P -The JIT executable allocator does not free all memory when it is possible. -It expects new allocations, and keeps some free memory around to improve +The JIT executable allocator does not free all memory when it is possible. It +expects new allocations, and keeps some free memory around to improve allocation speed. However, in low memory conditions, it might be better to free all possible memory. You can cause this to happen by calling pcre2_jit_free_unused_memory(). Its argument is a general context, for custom @@ -416,10 +416,10 @@ .P When you call \fBpcre2_match()\fP, as well as testing for invalid options, a number of other sanity checks are performed on the arguments. For example, if -the subject pointer is NULL, an immediate error is given. Also, unless -PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the -interests of speed, these checks do not happen on the JIT fast path, and if -invalid data is passed, the result is undefined. +the subject pointer is NULL but the length is non-zero, an immediate error is +given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested +for validity. In the interests of speed, these checks do not happen on the JIT +fast path, and if invalid data is passed, the result is undefined. .P Bypassing the sanity checks and the \fBpcre2_match()\fP wrapping can give speedups of more than 10%. @@ -445,6 +445,6 @@ .rs .sp .nf -Last updated: 23 May 2019 -Copyright (c) 1997-2019 University of Cambridge. +Last updated: 30 November 2021 +Copyright (c) 1997-2021 University of Cambridge. .fi
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2pattern.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2pattern.3
Changed
@@ -1,4 +1,4 @@ -.TH PCRE2PATTERN 3 "3o0 August 2021" "PCRE2 10.38" +.TH PCRE2PATTERN 3 "12 January 2022" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION DETAILS" @@ -509,7 +509,6 @@ .\" JOIN \e377 might be a backreference, otherwise the value 255 (decimal) -.\" JOIN \e81 is always a backreference .sp Note that octal values of 100 or greater that are specified using this syntax @@ -773,200 +772,64 @@ sequences are of course limited to testing characters whose code points are less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit) may be encountered. These are all -treated as being in the Unknown script and with an unassigned type. The extra -escape sequences are: +treated as being in the Unknown script and with an unassigned type. +.P +Matching characters by Unicode property is not fast, because PCRE2 has to do a +multistage table lookup in order to find a character's property. That is why +the traditional escape sequences such as \ed and \ew do not use Unicode +properties in PCRE2 by default, though you can make them do so by setting the +PCRE2_UCP option or by starting the pattern with (*UCP). +.P +The extra escape sequences that provide property support are: .sp \ep{\fIxx\fP} a character with the \fIxx\fP property \eP{\fIxx\fP} a character without the \fIxx\fP property \eX a Unicode extended grapheme cluster .sp -The property names represented by \fIxx\fP above are case-sensitive. There is -support for Unicode script names, Unicode general category properties, "Any", -which matches any character (including newline), and some special PCRE2 -properties (described in the +The property names represented by \fIxx\fP above are not case-sensitive, and in +accordance with Unicode's "loose matching" rules, spaces, hyphens, and +underscores are ignored. There is support for Unicode script names, Unicode +general category properties, "Any", which matches any character (including +newline), Bidi_Class, a number of binary (yes/no) properties, and some special +PCRE2 properties (described .\" HTML <a href="#extraprops"> .\" </a> -next section). +below). .\" -Other Perl properties such as "InMusicalSymbols" are not supported by PCRE2. -Note that \eP{Any} does not match any characters, so always causes a match -failure. -.P -Sets of Unicode characters are defined as belonging to certain scripts. A -character from one of these sets can be matched using a script name. For -example: -.sp - \ep{Greek} - \eP{Han} +Certain other Perl properties such as "InMusicalSymbols" are not supported by +PCRE2. Note that \eP{Any} does not match any characters, so always causes a +match failure. +. +. +. +.SS "Script properties for \ep and \eP" +.rs .sp +There are three different syntax forms for matching a script. Each Unicode +character has a basic script and, optionally, a list of other scripts ("Script +Extensions") with which it is commonly used. Using the Adlam script as an +example, \ep{sc:Adlam} matches characters whose basic script is Adlam, whereas +\ep{scx:Adlam} matches, in addition, characters that have Adlam in their +extensions list. The full names "script" and "script extensions" for the +property types are recognized, and a equals sign is an alternative to the +colon. If a script name is given without a property type, for example, +\ep{Adlam}, it is treated as \ep{scx:Adlam}. Perl changed to this +interpretation at release 5.26 and PCRE2 changed at release 10.40. +.P Unassigned characters (and in non-UTF 32-bit mode, characters with code points greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not part of an identified script are lumped together as "Common". The current list -of scripts is: -.P -Adlam, -Ahom, -Anatolian_Hieroglyphs, -Arabic, -Armenian, -Avestan, -Balinese, -Bamum, -Bassa_Vah, -Batak, -Bengali, -Bhaiksuki, -Bopomofo, -Brahmi, -Braille, -Buginese, -Buhid, -Canadian_Aboriginal, -Carian, -Caucasian_Albanian, -Chakma, -Cham, -Cherokee, -Chorasmian, -Common, -Coptic, -Cuneiform, -Cypriot, -Cypro_Minoan, -Cyrillic, -Deseret, -Devanagari, -Dives_Akuru, -Dogra, -Duployan, -Egyptian_Hieroglyphs, -Elbasan, -Elymaic, -Ethiopic, -Georgian, -Glagolitic, -Gothic, -Grantha, -Greek, -Gujarati, -Gunjala_Gondi, -Gurmukhi, -Han, -Hangul, -Hanifi_Rohingya, -Hanunoo, -Hatran, -Hebrew, -Hiragana, -Imperial_Aramaic, -Inherited, -Inscriptional_Pahlavi, -Inscriptional_Parthian, -Javanese, -Kaithi, -Kannada, -Katakana, -Kayah_Li, -Kharoshthi, -Khitan_Small_Script, -Khmer, -Khojki, -Khudawadi, -Lao, -Latin, -Lepcha, -Limbu, -Linear_A, -Linear_B, -Lisu, -Lycian, -Lydian, -Mahajani, -Makasar, -Malayalam, -Mandaic, -Manichaean, -Marchen, -Masaram_Gondi, -Medefaidrin, -Meetei_Mayek, -Mende_Kikakui, -Meroitic_Cursive, -Meroitic_Hieroglyphs, -Miao, -Modi, -Mongolian, -Mro, -Multani, -Myanmar, -Nabataean, -Nandinagari, -New_Tai_Lue, -Newa, -Nko, -Nushu, -Nyakeng_Puachue_Hmong, -Ogham, -Ol_Chiki, -Old_Hungarian, -Old_Italic, -Old_North_Arabian, -Old_Permic, -Old_Persian, -Old_Sogdian, -Old_South_Arabian, -Old_Turkic, -Old_Uyghur, -Oriya, -Osage, -Osmanya,
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2serialize.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2serialize.3
Changed
@@ -6,11 +6,11 @@ .sp .nf .B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, -.B " int32_t \fInumber_of_codes\fP, const uint32_t *\fIbytes\fP," +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," .B " pcre2_general_context *\fIgcontext\fP);" .sp -.B int32_t pcre2_serialize_encode(pcre2_code **\fIcodes\fP, -.B " int32_t \fInumber_of_codes\fP, uint32_t **\fIserialized_bytes\fP," +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," .B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" .sp .B void pcre2_serialize_free(uint8_t *\fIbytes\fP); @@ -141,7 +141,6 @@ \fBmalloc()\fP and \fBfree()\fP are used. After deserialization, the byte stream is no longer needed and can be discarded. .sp - int32_t number_of_codes; pcre2_code *list_of_codes2; uint8_t *bytes = <serialized data>; int32_t number_of_codes =
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2syntax.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2syntax.3
Changed
@@ -1,4 +1,4 @@ -.TH PCRE2SYNTAX 3 "30 August 2021" "PCRE2 10.38" +.TH PCRE2SYNTAX 3 "12 January 2022" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY" @@ -102,6 +102,10 @@ 128-255. If the PCRE2_UCP option is set, the behaviour of these escape sequences is changed to use Unicode properties and they match many more characters. +.P +Property descriptions in \ep and \eP are matched caselessly; hyphens, +underscores, and white space are ignored, in accordance with Unicode's "loose +matching" rules. . . .SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP" @@ -120,6 +124,7 @@ Lo Other letter Lt Title case letter Lu Upper case letter + Lc Ll, Lu, or Lt L& Ll, Lu, or Lt .sp M Mark @@ -167,170 +172,59 @@ at release 5.18. . . -.SH "SCRIPT NAMES FOR \ep AND \eP" +.SH "BINARY PROPERTIES FOR \ep AND \eP" +.rs +.sp +Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\ep and \eP, along with their abbreviations, by running this command: +.sp + pcre2test -LP +. +. +. +.SH "SCRIPT MATCHING WITH \ep AND \eP" +.rs +.sp +Many script names and their 4-letter abbreviations are recognized in +\ep{sc:...} or \ep{scx:...} items, or on their own with \ep (and also \eP of +course). You can obtain a list of these scripts by running this command: +.sp + pcre2test -LS +. +. +. +.SH "THE BIDI_CLASS PROPERTY FOR \ep AND \eP" .rs .sp -Adlam, -Ahom, -Anatolian_Hieroglyphs, -Arabic, -Armenian, -Avestan, -Balinese, -Bamum, -Bassa_Vah, -Batak, -Bengali, -Bhaiksuki, -Bopomofo, -Brahmi, -Braille, -Buginese, -Buhid, -Canadian_Aboriginal, -Carian, -Caucasian_Albanian, -Chakma, -Cham, -Cherokee, -Chorasmian, -Common, -Coptic, -Cuneiform, -Cypriot, -Cypro_Minoan, -Cyrillic, -Deseret, -Devanagari, -Dives_Akuru, -Dogra, -Duployan, -Egyptian_Hieroglyphs, -Elbasan, -Elymaic, -Ethiopic, -Georgian, -Glagolitic, -Gothic, -Grantha, -Greek, -Gujarati, -Gunjala_Gondi, -Gurmukhi, -Han, -Hangul, -Hanifi_Rohingya, -Hanunoo, -Hatran, -Hebrew, -Hiragana, -Imperial_Aramaic, -Inherited, -Inscriptional_Pahlavi, -Inscriptional_Parthian, -Javanese, -Kaithi, -Kannada, -Katakana, -Kayah_Li, -Kharoshthi, -Khitan_Small_Script, -Khmer, -Khojki, -Khudawadi, -Lao, -Latin, -Lepcha, -Limbu, -Linear_A, -Linear_B, -Lisu, -Lycian, -Lydian, -Mahajani, -Makasar, -Malayalam, -Mandaic, -Manichaean, -Marchen, -Masaram_Gondi, -Medefaidrin, -Meetei_Mayek, -Mende_Kikakui, -Meroitic_Cursive, -Meroitic_Hieroglyphs, -Miao, -Modi, -Mongolian, -Mro, -Multani, -Myanmar, -Nabataean, -Nandinagari, -New_Tai_Lue, -Newa, -Nko, -Nushu, -Nyakeng_Puachue_Hmong, -Ogham, -Ol_Chiki, -Old_Hungarian, -Old_Italic, -Old_North_Arabian, -Old_Permic, -Old_Persian, -Old_Sogdian, -Old_South_Arabian, -Old_Turkic, -Old_Uyghur, -Oriya, -Osage, -Osmanya, -Pahawh_Hmong, -Palmyrene, -Pau_Cin_Hau, -Phags_Pa, -Phoenician, -Psalter_Pahlavi, -Rejang, -Runic, -Samaritan, -Saurashtra, -Sharada, -Shavian, -Siddham, -SignWriting, -Sinhala, -Sogdian, -Sora_Sompeng, -Soyombo, -Sundanese, -Syloti_Nagri, -Syriac, -Tagalog, -Tagbanwa, -Tai_Le, -Tai_Tham, -Tai_Viet, -Takri, -Tamil, -Tangsa,
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2test.1 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2test.1
Changed
@@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "30 August 2021" "PCRE 10.38" +.TH PCRE2TEST 1 "12 January 2022" "PCRE 10.40" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -47,7 +47,7 @@ to 8-bit code units for output. .P In the rest of this document, the names of library functions and structures -are given in generic form, for example, \fBpcre_compile()\fP. The actual +are given in generic form, for example, \fBpcre2_compile()\fP. The actual names used in the libraries have a suffix _8, _16, or _32, as appropriate. . . @@ -211,7 +211,17 @@ \fB-LM\fP List modifiers: write a list of available pattern and subject modifiers to the standard output, then exit with zero exit code. All other options are ignored. -If both -C and -LM are present, whichever is first is recognized. +If both -C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-LP\fP +List properties: write a list of recognized Unicode properties to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-LS\fP +List scripts: write a list of recogized Unicode script names to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. .TP 10 \fB-pattern\fP \fImodifier-list\fP Behave as if each pattern line contains the given modifiers. @@ -1206,6 +1216,8 @@ match_limit=<n> set a match limit memory show heap memory usage null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject offset=<n> set starting offset offset_limit=<n> set offset limit ovector=<n> set size of output vector @@ -1629,7 +1641,7 @@ passing the replacement string as zero-terminated. . . -.SS "Passing a NULL context" +.SS "Passing a NULL context, subject, or replacement" .rs .sp Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP, @@ -1638,6 +1650,10 @@ testing that the matching and substitution functions behave correctly in this case (they use default values). This modifier cannot be used with the \fBfind_limits\fP or \fBsubstitute_callout\fP modifiers. +.P +Similarly, for testing purposes, if the \fBnull_subject\fP or +\fBnull_replacement\fP modifier is set, the subject or replacement string +pointers are passed as NULL, respectively, to the relevant functions. . . .SH "THE ALTERNATIVE MATCHING FUNCTION" @@ -2103,6 +2119,6 @@ .rs .sp .nf -Last updated: 30 August 2021 -Copyright (c) 1997-2021 University of Cambridge. +Last updated: 12 January 2022 +Copyright (c) 1997-2022 University of Cambridge. .fi
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2test.txt -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2test.txt
Changed
@@ -44,7 +44,7 @@ output. In the rest of this document, the names of library functions and struc- - tures are given in generic form, for example, pcre_compile(). The ac- + tures are given in generic form, for example, pcre2_compile(). The ac- tual names used in the libraries have a suffix _8, _16, or _32, as ap- propriate. @@ -197,7 +197,17 @@ -LM List modifiers: write a list of available pattern and subject modifiers to the standard output, then exit with zero exit - code. All other options are ignored. If both -C and -LM are + code. All other options are ignored. If both -C and any -Lx + options are present, whichever is first is recognized. + + -LP List properties: write a list of recognized Unicode proper- + ties to the standard output, then exit with zero exit code. + All other options are ignored. If both -C and any -Lx options + are present, whichever is first is recognized. + + -LS List scripts: write a list of recogized Unicode script names + to the standard output, then exit with zero exit code. All + other options are ignored. If both -C and any -Lx options are present, whichever is first is recognized. -pattern modifier-list @@ -1111,6 +1121,8 @@ match_limit=<n> set a match limit memory show heap memory usage null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject offset=<n> set starting offset offset_limit=<n> set offset limit ovector=<n> set size of output vector @@ -1499,7 +1511,7 @@ When testing pcre2_substitute(), this modifier also has the effect of passing the replacement string as zero-terminated. - Passing a NULL context + Passing a NULL context, subject, or replacement Normally, pcre2test passes a context block to pcre2_match(), pcre2_dfa_match(), pcre2_jit_match() or pcre2_substitute(). If the @@ -1508,6 +1520,10 @@ in this case (they use default values). This modifier cannot be used with the find_limits or substitute_callout modifiers. + Similarly, for testing purposes, if the null_subject or null_replace- + ment modifier is set, the subject or replacement string pointers are + passed as NULL, respectively, to the relevant functions. + THE ALTERNATIVE MATCHING FUNCTION @@ -1933,5 +1949,5 @@ REVISION - Last updated: 30 August 2021 - Copyright (c) 1997-2021 University of Cambridge. + Last updated: 12 January 2022 + Copyright (c) 1997-2022 University of Cambridge.
View file
_service:tar_scm:pcre2-10.39.tar.bz2/doc/pcre2unicode.3 -> _service:tar_scm:pcre2-10.40.tar.bz2/doc/pcre2unicode.3
Changed
@@ -1,4 +1,4 @@ -.TH PCRE2UNICODE 3 "23 February 2020" "PCRE2 10.35" +.TH PCRE2UNICODE 3 "22 December 2021" "PCRE2 10.40" .SH NAME PCRE - Perl-compatible regular expressions (revised API) .SH "UNICODE AND UTF SUPPORT" @@ -40,10 +40,11 @@ .sp When PCRE2 is built with Unicode support, the escape sequences \ep{..}, \eP{..}, and \eX can be used. This is not dependent on the PCRE2_UTF setting. -The Unicode properties that can be tested are limited to the general category -properties such as Lu for an upper case letter or Nd for a decimal number, the -Unicode script names such as Arabic or Han, and the derived properties Any and -L&. Full lists are given in the +The Unicode properties that can be tested are a subset of those that Perl +supports. Currently they are limited to the general category properties such as +Lu for an upper case letter or Nd for a decimal number, the Unicode script +names such as Arabic or Han, Bidi_Class, Bidi_Control, and the derived +properties Any and LC (synonym L&). Full lists are given in the .\" HREF \fBpcre2pattern\fP .\" @@ -51,10 +52,10 @@ .\" HREF \fBpcre2syntax\fP .\" -documentation. Only the short names for properties are supported. For example, -\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported. -Furthermore, in Perl, many properties may optionally be prefixed by "Is", for -compatibility with Perl 5.6. PCRE2 does not support this. +documentation. In general, only the short names for properties are supported. +For example, \ep{L} matches a letter. Its longer synonym, \ep{Letter}, is not +supported. Furthermore, in Perl, many properties may optionally be prefixed by +"Is", for compatibility with Perl 5.6. PCRE2 does not support this. . . .SH "WIDE CHARACTERS AND UTF MODES" @@ -448,7 +449,7 @@ .sp .nf Philip Hazel -University Computing Service +Retired from University Computing Service Cambridge, England. .fi . @@ -457,6 +458,6 @@ .rs .sp .nf -Last updated: 23 February 2020 -Copyright (c) 1997-2020 University of Cambridge. +Last updated: 22 December 2021 +Copyright (c) 1997-2021 University of Cambridge. .fi
View file
_service:tar_scm:pcre2-10.39.tar.bz2/ltmain.sh -> _service:tar_scm:pcre2-10.40.tar.bz2/ltmain.sh
Changed
@@ -1,12 +1,12 @@ -#! /bin/sh +#! /usr/bin/env sh ## DO NOT EDIT - This file generated from ./build-aux/ltmain.in -## by inline-source v2018-07-24.06 +## by inline-source v2019-02-19.15 -# libtool (GNU libtool) 2.4.6.42-b88ce-dirty +# libtool (GNU libtool) 2.4.6.59-b55b-dirty # Provide generalized library-building support services. # Written by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996 -# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# Copyright (C) 1996-2019, 2021 Free Software Foundation, Inc. # This is free software; see the source for copying conditions. There is NO # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. @@ -31,8 +31,8 @@ PROGRAM=libtool PACKAGE=libtool -VERSION=2.4.6.42-b88ce-dirty -package_revision=2.4.6.42 +VERSION=2.4.6.59-b55b-dirty +package_revision=2.4.6.59 ## ------ ## @@ -64,7 +64,7 @@ # libraries, which are installed to $pkgauxdir. # Set a version string for this script. -scriptversion=2018-07-24.06; # UTC +scriptversion=2019-02-19.15; # UTC # General shell script boiler plate, and helper functions. # Written by Gary V. Vaughan, 2004 @@ -72,7 +72,7 @@ # This is free software. There is NO warranty; not even for # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# Copyright (C) 2004-2018 Bootstrap Authors +# Copyright (C) 2004-2019, 2021 Bootstrap Authors # # This file is dual licensed under the terms of the MIT license # <https://opensource.org/license/MIT>, and GPL version 3 or later @@ -130,6 +130,12 @@ _G_safe_locale=\"$_G_var=C; \$_G_safe_locale\" fi" done +# These NLS vars are set unconditionally (bootstrap issue #24). Unset those +# in case the environment reset is needed later and the $save_* variant is not +# defined (see the code above). +LC_ALL=C +LANGUAGE=C +export LANGUAGE LC_ALL # Make sure IFS has a sensible default sp=' ' @@ -368,6 +374,35 @@ s/\\(^$_G_bs\\)$_G_bs2$_G_dollar/\\1$_G_bs2$_G_bs$_G_dollar/g s/\n//g" +# require_check_ifs_backslash +# --------------------------- +# Check if we can use backslash as IFS='\' separator, and set +# $check_ifs_backshlash_broken to ':' or 'false'. +require_check_ifs_backslash=func_require_check_ifs_backslash +func_require_check_ifs_backslash () +{ + _G_save_IFS=$IFS + IFS='\' + _G_check_ifs_backshlash='a\\b' + for _G_i in $_G_check_ifs_backshlash + do + case $_G_i in + a) + check_ifs_backshlash_broken=false + ;; + '') + break + ;; + *) + check_ifs_backshlash_broken=: + break + ;; + esac + done + IFS=$_G_save_IFS + require_check_ifs_backslash=: +} + ## ----------------- ## ## Global variables. ## @@ -1108,6 +1143,8 @@ { $debug_cmd + $require_check_ifs_backslash + func_quote_portable_result=$2 # one-time-loop (easy break) @@ -1122,8 +1159,10 @@ # Quote for eval. case $func_quote_portable_result in *\\\`\"\$*) - case $func_quote_portable_result in - *\\*\?*) + # Fallback to sed for $func_check_bs_ifs_broken=:, or when the string + # contains the shell wildcard characters. + case $check_ifs_backshlash_broken$func_quote_portable_result in + :*|*\\*\?*) func_quote_portable_result=`$ECHO "$func_quote_portable_result" \ | $SED "$sed_quote_subst"` break @@ -1497,7 +1536,7 @@ # This is free software. There is NO warranty; not even for # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# Copyright (C) 2010-2018 Bootstrap Authors +# Copyright (C) 2010-2019, 2021 Bootstrap Authors # # This file is dual licensed under the terms of the MIT license # <https://opensource.org/license/MIT>, and GPL version 3 or later @@ -1510,7 +1549,7 @@ # <https://github.com/gnulib-modules/bootstrap/issues> # Set a version string for this script. -scriptversion=2018-07-24.06; # UTC +scriptversion=2019-02-19.15; # UTC ## ------ ## @@ -2056,7 +2095,7 @@ func_split_equals_lhs=`expr "x$1" : 'x\(^=*\)'` func_split_equals_rhs= - test "x$func_split_equals_lhs" = "x$1" \ + test "x$func_split_equals_lhs=" = "x$1" \ || func_split_equals_rhs=`expr "x$1" : 'x^=*=\(.*\)$'` } fi #func_split_equals @@ -2082,7 +2121,7 @@ { $debug_cmd - func_split_short_opt_name=`expr "x$1" : 'x-\(.\)'` + func_split_short_opt_name=`expr "x$1" : 'x\(-.\)'` func_split_short_opt_arg=`expr "x$1" : 'x-.\(.*\)$'` } fi #func_split_short_opt @@ -2176,7 +2215,7 @@ # End: # Set a version string. -scriptversion='(GNU libtool) 2.4.6.42-b88ce-dirty' +scriptversion='(GNU libtool) 2.4.6.59-b55b-dirty' # func_echo ARG... @@ -2267,7 +2306,7 @@ compiler: $LTCC compiler flags: $LTCFLAGS linker: $LD (gnu? $with_gnu_ld) - version: $progname (GNU libtool) 2.4.6.42-b88ce-dirty + version: $progname (GNU libtool) 2.4.6.59-b55b-dirty automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q` autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q` @@ -3862,7 +3901,8 @@ -prefer-non-pic try to build non-PIC objects only -shared do not build a '.o' file suitable for static linking -static only build a '.o' file suitable for static linking - -Wc,FLAG pass FLAG directly to the compiler + -Wc,FLAG + -Xcompiler FLAG pass FLAG directly to the compiler COMPILE-COMMAND is a command to be used in creating a 'standard' object file from the given SOURCEFILE. @@ -3968,6 +4008,8 @@ -weak LIBNAME declare that the target provides the LIBNAME interface -Wc,FLAG -Xcompiler FLAG pass linker-specific FLAG directly to the compiler + -Wa,FLAG + -Xassembler FLAG pass linker-specific FLAG directly to the assembler -Wl,FLAG -Xlinker FLAG pass linker-specific FLAG directly to the linker -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) @@ -7064,6 +7106,13 @@ prev= continue ;; + xassembler) + func_append compiler_flags " -Xassembler $qarg" + prev= + func_append compile_command " -Xassembler $qarg" + func_append finalize_command " -Xassembler $qarg" + continue
View file
_service:tar_scm:pcre2-10.39.tar.bz2/m4/libtool.m4 -> _service:tar_scm:pcre2-10.40.tar.bz2/m4/libtool.m4
Changed
@@ -1,6 +1,7 @@ # libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- # -# Copyright (C) 1996-2001, 2003-2018 Free Software Foundation, Inc. +# Copyright (C) 1996-2001, 2003-2019, 2021 Free Software Foundation, +# Inc. # Written by Gordon Matzigkeit, 1996 # # This file is free software; the Free Software Foundation gives @@ -31,7 +32,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. ) -# serial 58 LT_INIT +# serial 59 LT_INIT # LT_PREREQ(VERSION) @@ -181,6 +182,7 @@ m4_require(_LT_CHECK_SHELL_FEATURES)dnl m4_require(_LT_PATH_CONVERSION_FUNCTIONS)dnl m4_require(_LT_CMD_RELOAD)dnl +m4_require(_LT_DECL_FILECMD)dnl m4_require(_LT_CHECK_MAGIC_METHOD)dnl m4_require(_LT_CHECK_SHAREDLIB_FROM_LINKLIB)dnl m4_require(_LT_CMD_OLD_ARCHIVE)dnl @@ -777,7 +779,7 @@ # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? - sed '$q' "$ltmain" >> "$cfgfile" \ + $SED '$q' "$ltmain" >> "$cfgfile" \ || (rm -f "$cfgfile"; exit 1) mv -f "$cfgfile" "$ofile" || @@ -1066,17 +1068,12 @@ _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; darwin1.*) _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; - darwin*) # darwin 5.x on - # if running on 10.5 or later, the deployment target defaults - # to the OS version, if on x86, and 10.4, the deployment - # target defaults to 10.4. Don't you love it? - case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in - 10.0,*86*-darwin8*|10.0,*-darwin91*) - _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; - 10.012,.*) - _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; - 10.*) - _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + darwin*) + case ${MACOSX_DEPLOYMENT_TARGET},$host in + 10.012,*|,*powerpc*-darwin5-8*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + *) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; esac ;; esac @@ -1125,12 +1122,12 @@ output_verbose_link_cmd=func_echo_all _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" - _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" - _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(module_expsym_cmds, $1)="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" m4_if($1, CXX, if test yes != "$lt_cv_apple_cc_single_mod"; then _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" - _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" fi ,) else @@ -1244,7 +1241,8 @@ # _LT_WITH_SYSROOT # ---------------- AC_DEFUN(_LT_WITH_SYSROOT, -AC_MSG_CHECKING(for sysroot) +m4_require(_LT_DECL_SED)dnl +AC_MSG_CHECKING(for sysroot) AC_ARG_WITH(sysroot, AS_HELP_STRING(--with-sysroot@<:@=DIR@:>@, Search for dependent libraries within DIR (or the compiler's sysroot @@ -1261,7 +1259,7 @@ fi ;; #( /*) - lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` ;; #( no|'') ;; #( @@ -1291,7 +1289,7 @@ # options accordingly. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE=32 ;; @@ -1308,7 +1306,7 @@ echo '#line '$LINENO' "configure"' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then if test yes = "$lt_cv_prog_gnu_ld"; then - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; @@ -1320,7 +1318,7 @@ ;; esac else - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; @@ -1342,7 +1340,7 @@ echo '#line '$LINENO' "configure"' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then emul=elf - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *32-bit*) emul="${emul}32" ;; @@ -1350,7 +1348,7 @@ emul="${emul}64" ;; esac - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *MSB*) emul="${emul}btsmip" ;; @@ -1358,7 +1356,7 @@ emul="${emul}ltsmip" ;; esac - case `/usr/bin/file conftest.$ac_objext` in + case `$FILECMD conftest.$ac_objext` in *N32*) emul="${emul}n32" ;; @@ -1378,14 +1376,14 @@ # not appear in the list. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.o` in + case `$FILECMD conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) - case `/usr/bin/file conftest.o` in + case `$FILECMD conftest.o` in *x86-64*) LD="${LD-ld} -m elf32_x86_64" ;; @@ -1453,7 +1451,7 @@ # options accordingly. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.o` in + case `$FILECMD conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) @@ -1726,7 +1724,7 @@ lt_cv_sys_max_cmd_len=8192; ;; - bitrig* | darwin* | dragonfly* | freebsd* | netbsd* | openbsd*) + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` @@ -1769,7 +1767,7 @@ sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then - lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.* //'` + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.* //'` else lt_cv_sys_max_cmd_len=32768 fi @@ -2570,7 +2568,7 @@ case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' - soname_spec='`echo $libname | sed -e 's/^lib/cyg/'``echo $release | $SED -e 's/./-/g'`$versuffix$shared_ext' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/./-/g'`$versuffix$shared_ext' m4_if($1, , sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api")
View file
_service:tar_scm:pcre2-10.39.tar.bz2/m4/ltoptions.m4 -> _service:tar_scm:pcre2-10.40.tar.bz2/m4/ltoptions.m4
Changed
@@ -1,6 +1,6 @@ # Helper functions for option handling. -*- Autoconf -*- # -# Copyright (C) 2004-2005, 2007-2009, 2011-2018 Free Software +# Copyright (C) 2004-2005, 2007-2009, 2011-2019, 2021 Free Software # Foundation, Inc. # Written by Gary V. Vaughan, 2004 #
View file
_service:tar_scm:pcre2-10.39.tar.bz2/m4/ltsugar.m4 -> _service:tar_scm:pcre2-10.40.tar.bz2/m4/ltsugar.m4
Changed
@@ -1,6 +1,6 @@ # ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- # -# Copyright (C) 2004-2005, 2007-2008, 2011-2018 Free Software +# Copyright (C) 2004-2005, 2007-2008, 2011-2019, 2021 Free Software # Foundation, Inc. # Written by Gary V. Vaughan, 2004 #
View file
_service:tar_scm:pcre2-10.39.tar.bz2/m4/ltversion.m4 -> _service:tar_scm:pcre2-10.40.tar.bz2/m4/ltversion.m4
Changed
@@ -1,6 +1,6 @@ # ltversion.m4 -- version numbers -*- Autoconf -*- # -# Copyright (C) 2004, 2011-2018 Free Software Foundation, Inc. +# Copyright (C) 2004, 2011-2019, 2021 Free Software Foundation, Inc. # Written by Scott James Remnant, 2004 # # This file is free software; the Free Software Foundation gives @@ -9,15 +9,15 @@ # @configure_input@ -# serial 4221 ltversion.m4 +# serial 4238 ltversion.m4 # This file is part of GNU Libtool -m4_define(LT_PACKAGE_VERSION, 2.4.6.42-b88ce-dirty) -m4_define(LT_PACKAGE_REVISION, 2.4.6.42) +m4_define(LT_PACKAGE_VERSION, 2.4.6.59-b55b-dirty) +m4_define(LT_PACKAGE_REVISION, 2.4.6.59) AC_DEFUN(LTVERSION_VERSION, -macro_version='2.4.6.42-b88ce-dirty' -macro_revision='2.4.6.42' +macro_version='2.4.6.59-b55b-dirty' +macro_revision='2.4.6.59' _LT_DECL(, macro_version, 0, Which release of libtool.m4 was used?) _LT_DECL(, macro_revision, 0) )
View file
_service:tar_scm:pcre2-10.39.tar.bz2/m4/lt~obsolete.m4 -> _service:tar_scm:pcre2-10.40.tar.bz2/m4/lt~obsolete.m4
Changed
@@ -1,6 +1,6 @@ # lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- # -# Copyright (C) 2004-2005, 2007, 2009, 2011-2018 Free Software +# Copyright (C) 2004-2005, 2007, 2009, 2011-2019, 2021 Free Software # Foundation, Inc. # Written by Scott James Remnant, 2004. #
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/config.h.generic -> _service:tar_scm:pcre2-10.40.tar.bz2/src/config.h.generic
Changed
@@ -97,6 +97,9 @@ /* Have PTHREAD_PRIO_INHERIT. */ /* #undef HAVE_PTHREAD_PRIO_INHERIT */ +/* Define to 1 if you have the <readline.h> header file. */ +/* #undef HAVE_READLINE_H */ + /* Define to 1 if you have the <readline/history.h> header file. */ /* #undef HAVE_READLINE_HISTORY_H */ @@ -233,7 +236,7 @@ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.39" +#define PACKAGE_STRING "PCRE2 10.40" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -242,7 +245,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.39" +#define PACKAGE_VERSION "10.40" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -435,7 +438,7 @@ #endif /* Version number of package */ -#define VERSION "10.39" +#define VERSION "10.40" /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/config.h.in -> _service:tar_scm:pcre2-10.40.tar.bz2/src/config.h.in
Changed
@@ -97,6 +97,9 @@ /* Have PTHREAD_PRIO_INHERIT. */ #undef HAVE_PTHREAD_PRIO_INHERIT +/* Define to 1 if you have the <readline.h> header file. */ +#undef HAVE_READLINE_H + /* Define to 1 if you have the <readline/history.h> header file. */ #undef HAVE_READLINE_HISTORY_H
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2.h.generic -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2.h.generic
Changed
@@ -42,9 +42,9 @@ /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 39 +#define PCRE2_MINOR 40 #define PCRE2_PRERELEASE -#define PCRE2_DATE 2021-10-29 +#define PCRE2_DATE 2022-04-14 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_auto_possess.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_auto_possess.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -123,18 +123,21 @@ */ static const uint8_t propposstabPT_TABSIZEPT_TABSIZE = { -/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ - { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */ - { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */ - { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */ - { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ - { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */ - { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */ - { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */ - { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */ +/* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ + { 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */ + { 0, 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ + { 0, 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ + { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ + { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */ + { 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */ + { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */ + { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */ + { 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */ }; /* This table is used to check whether auto-possessification is possible @@ -196,6 +199,7 @@ check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata, BOOL negated) { +BOOL ok; const uint32_t *p; const ucd_record *prop = GET_UCD(c); @@ -215,6 +219,11 @@ case PT_SC: return (pdata == prop->script) == negated; + case PT_SCX: + ok = (pdata == prop->script + || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0); + return ok == negated; + /* These are specials */ case PT_ALNUM: @@ -251,6 +260,14 @@ if (c == *p++) return negated; } break; /* Control never reaches here */ + + /* Haven't yet thought these through. */ + + case PT_BIDICL: + return FALSE; + + case PT_BOOL: + return FALSE; } return FALSE;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_compile.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_compile.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -124,7 +124,7 @@ static int compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t, - uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *, + uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, compile_block *, PCRE2_SIZE *); static int @@ -385,13 +385,15 @@ #define SETBIT(a,b) a(b)/8 = (uint8_t)(a(b)/8 | (1u << ((b)&7))) -/* Private flags added to firstcu and reqcu. */ - -#define REQ_CASELESS (1u << 0) /* Indicates caselessness */ -#define REQ_VARY (1u << 1) /* reqcu followed non-literal item */ -/* Negative values for the firstcu and reqcu flags */ -#define REQ_UNSET (-2) /* Not yet found anything */ -#define REQ_NONE (-1) /* Found not fixed char */ +/* Values and flags for the unsigned xxcuflags variables that accompany xxcu +variables, which are concerned with first and required code units. A value +greater than or equal to REQ_NONE means "no code unit set"; otherwise the +matching xxcu variable is set, and the low valued bits are relevant. */ + +#define REQ_UNSET 0xffffffffu /* Not yet found anything */ +#define REQ_NONE 0xfffffffeu /* Found not fixed character */ +#define REQ_CASELESS 0x00000001u /* Code unit in xxcu is caseless */ +#define REQ_VARY 0x00000002u /* Code unit is followed by non-literal */ /* These flags are used in the groupinfo vector. */ @@ -2088,7 +2090,9 @@ PCRE2_UCHAR c; PCRE2_SIZE i, bot, top; PCRE2_SPTR ptr = *ptrptr; -PCRE2_UCHAR name32; +PCRE2_UCHAR name50; +PCRE2_UCHAR *vptr = NULL; +uint16_t ptscript = PT_NOTSCRIPT; if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; @@ -2100,36 +2104,95 @@ if (c == CHAR_LEFT_CURLY_BRACKET) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; + if (*ptr == CHAR_CIRCUMFLEX_ACCENT) { *negptr = TRUE; ptr++; } + for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; + while (c == '_' || c == '-' || isspace(c)) + { + if (ptr >= cb->end_pattern) goto ERROR_RETURN; + c = *ptr++; + } if (c == CHAR_NUL) goto ERROR_RETURN; if (c == CHAR_RIGHT_CURLY_BRACKET) break; - namei = c; + namei = tolower(c); + if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i; } + if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; namei = 0; } -/* Otherwise there is just one following character, which must be an ASCII -letter. */ +/* If { doesn't follow \p or \P there is just one following character, which +must be an ASCII letter. */ else if (MAX_255(c) && (cb->ctypesc & ctype_letter) != 0) { - name0 = c; + name0 = tolower(c); name1 = 0; } else goto ERROR_RETURN; *ptrptr = ptr; -/* Search for a recognized property name using binary chop. */ +/* If the property contains ':' or '=' we have class name and value separately +specified. The following are supported: + + . Bidi_Class (synonym bc), for which the property names are "bidi<name>". + . Script (synonym sc) for which the property name is the script name + . Script_Extensions (synonym scx), ditto + +As this is a small number, we currently just check the names directly. If this +grows, a sorted table and a switch will be neater. + +For both the script properties, set a PT_xxx value so that (1) they can be +distinguished and (2) invalid script names that happen to be the name of +another property can be diagnosed. */ + +if (vptr != NULL) + { + int offset = 0; + PCRE2_UCHAR sname8; + + *vptr = 0; /* Terminate property name */ + if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 || + PRIV(strcmp_c8)(name, STRING_bc) == 0) + { + offset = 4; + sname0 = CHAR_b; + sname1 = CHAR_i; /* There is no strcpy_c8 function */ + sname2 = CHAR_d; + sname3 = CHAR_i; + } + + else if (PRIV(strcmp_c8)(name, STRING_script) == 0 || + PRIV(strcmp_c8)(name, STRING_sc) == 0) + ptscript = PT_SC; + + else if (PRIV(strcmp_c8)(name, STRING_scriptextensions) == 0 || + PRIV(strcmp_c8)(name, STRING_scx) == 0) + ptscript = PT_SCX; + + else + { + *errorcodeptr = ERR47; + return FALSE; + } + + /* Adjust the string in name as needed */ + + memmove(name + offset, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR)); + if (offset != 0) memmove(name, sname, offset*sizeof(PCRE2_UCHAR)); + } + +/* Search for a recognized property using binary chop. */ bot = 0; top = PRIV(utt_size); @@ -2139,15 +2202,37 @@ int r; i = (bot + top) >> 1; r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)i.name_offset); + + /* When a matching property is found, some extra checking is needed when the + \p{xx:yy} syntax is used and xx is either sc or scx. */ + if (r == 0) { - *ptypeptr = PRIV(utt)i.type; *pdataptr = PRIV(utt)i.value; - return TRUE; + if (vptr == NULL || ptscript == PT_NOTSCRIPT) + { + *ptypeptr = PRIV(utt)i.type; + return TRUE; + } + + switch (PRIV(utt)i.type) + { + case PT_SC: + *ptypeptr = PT_SC; + return TRUE; + + case PT_SCX: + *ptypeptr = ptscript; + return TRUE; + } + + break; /* Non-script found */ } + if (r > 0) bot = i + 1; else top = i; } -*errorcodeptr = ERR47; /* Unrecognized name */ + +*errorcodeptr = ERR47; /* Unrecognized property */ return FALSE; ERROR_RETURN: /* Malformed \P or \p */ @@ -5285,9 +5370,9 @@ pptrptr points to the current parsed pattern pointer errorcodeptr points to error code variable firstcuptr place to put the first required code unit - firstcuflagsptr place to put the first code unit flags, or a negative number + firstcuflagsptr place to put the first code unit flags
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_dfa_match.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_dfa_match.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -1193,6 +1193,11 @@ OK = prop->script == code2; break; + case PT_SCX: + OK = (prop->script == code2 || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code2) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -1240,6 +1245,15 @@ c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code2; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code2) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1451,6 +1465,11 @@ OK = prop->script == code3; break; + case PT_SCX: + OK = (prop->script == code3 || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code3) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -1498,6 +1517,15 @@ c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code3; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code3) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1692,6 +1720,11 @@ OK = prop->script == code3; break; + case PT_SCX: + OK = (prop->script == code3 || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code3) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -1739,6 +1772,15 @@ c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code3; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code3) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1958,6 +2000,12 @@ OK = prop->script == code1 + IMM2_SIZE + 2; break; + case PT_SCX: + OK = (prop->script == code1 + IMM2_SIZE + 2 || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), + code1 + IMM2_SIZE + 2) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -2005,6 +2053,15 @@ c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code1 + IMM2_SIZE + 2; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code1 + IMM2_SIZE + 2) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -3285,20 +3342,22 @@ rws->size = RWS_BASE_SIZE; rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE; -/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated -subject string. */ +/* Recognize NULL, length 0 as an empty string. */ -if (length == PCRE2_ZERO_TERMINATED) - { - length = PRIV(strlen)(subject); - was_zero_terminated = 1; - } +if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; /* Plausibility checks */ if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) return PCRE2_ERROR_NULL; + +if (length == PCRE2_ZERO_TERMINATED) + { + length = PRIV(strlen)(subject); + was_zero_terminated = 1; + } + if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_error.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_error.c
Changed
@@ -119,7 +119,7 @@ /* 45 */ "this version of PCRE2 does not have support for \\P, \\p, or \\X\0" "malformed \\P or \\p sequence\0" - "unknown property name after \\P or \\p\0" + "unknown property after \\P or \\p\0" "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0" "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" /* 50 */ @@ -253,7 +253,7 @@ "unknown substring\0" /* 50 */ "non-unique substring name\0" - "NULL argument passed\0" + "NULL argument passed with non-zero length\0" "nested recursion at the same subject position\0" "matching depth limit exceeded\0" "requested value is not available\0"
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_extuni.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_extuni.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -105,7 +105,7 @@ /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { int ricount = 0; PCRE2_SPTR bptr = eptr - 1; @@ -123,7 +123,7 @@ } else c = *bptr; - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break; + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; } if ((ricount & 1) != 0) break; /* Grapheme break required */
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_fuzzsupport.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_fuzzsupport.c
Changed
@@ -151,6 +151,10 @@ int j; uint32_t save_match_options = match_options; +#ifdef SUPPORT_JIT + pcre2_jit_compile(code, PCRE2_JIT_COMPLETE); +#endif + /* Create match data and context blocks only when we first need them. Set low match and depth limits to avoid wasting too much searching large pattern trees. Almost all matches are going to fail. */
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_internal.h -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_internal.h
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -954,6 +954,13 @@ #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #define STRING_MARK "MARK" +#define STRING_bc "bc" +#define STRING_bidiclass "bidiclass" +#define STRING_sc "sc" +#define STRING_script "script" +#define STRING_scriptextensions "scriptextensions" +#define STRING_scx "scx" + #else /* SUPPORT_UNICODE */ /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This @@ -1248,26 +1255,39 @@ #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #define STRING_MARK STR_M STR_A STR_R STR_K +#define STRING_bc STR_b STR_c +#define STRING_bidiclass STR_b STR_i STR_d STR_i STR_c STR_l STR_a STR_s STR_s +#define STRING_sc STR_s STR_c +#define STRING_script STR_s STR_c STR_r STR_i STR_p STR_t +#define STRING_scriptextensions STR_s STR_c STR_r STR_i STR_p STR_t STR_e STR_x STR_t STR_e STR_n STR_s STR_i STR_o STR_n STR_s +#define STRING_scx STR_s STR_c STR_x + + #endif /* SUPPORT_UNICODE */ /* -------------------- End of character and string names -------------------*/ /* -------------------- Definitions for compiled patterns -------------------*/ -/* Codes for different types of Unicode property */ +/* Codes for different types of Unicode property. If these definitions are +changed, the autopossessifying table in pcre2_auto_possess.c must be updated to +match. */ #define PT_ANY 0 /* Any property - matches all chars */ #define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ #define PT_GC 2 /* Specified general characteristic (e.g. L) */ #define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ -#define PT_SC 4 /* Script (e.g. Han) */ -#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */ -#define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */ -#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ -#define PT_WORD 8 /* Word - L plus N plus underscore */ -#define PT_CLIST 9 /* Pseudo-property: match character list */ -#define PT_UCNC 10 /* Universal Character nameable character */ -#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */ +#define PT_SC 4 /* Script only (e.g. Han) */ +#define PT_SCX 5 /* Script extensions (includes SC) */ +#define PT_ALNUM 6 /* Alphanumeric - the union of L and N */ +#define PT_SPACE 7 /* Perl space - general category Z plus 9,10,12,13 */ +#define PT_PXSPACE 8 /* POSIX space - Z plus 9,10,11,12,13 */ +#define PT_WORD 9 /* Word - L plus N plus underscore */ +#define PT_CLIST 10 /* Pseudo-property: match character list */ +#define PT_UCNC 11 /* Universal Character nameable character */ +#define PT_BIDICL 12 /* Specified bidi class */ +#define PT_BOOL 13 /* Boolean property */ +#define PT_TABSIZE 14 /* Size of square table for autopossessify tests */ /* The following special properties are used only in XCLASS items, when POSIX classes are specified and PCRE2_UCP is set - in other words, for Unicode @@ -1275,22 +1295,27 @@ those in the above list, and so they do not take part in the autopossessifying table. */ -#define PT_PXGRAPH 11 /* :graph: - characters that mark the paper */ -#define PT_PXPRINT 12 /* :print: - :graph: plus non-control spaces */ -#define PT_PXPUNCT 13 /* :punct: - punctuation characters */ +#define PT_PXGRAPH 14 /* :graph: - characters that mark the paper */ +#define PT_PXPRINT 15 /* :print: - :graph: plus non-control spaces */ +#define PT_PXPUNCT 16 /* :punct: - punctuation characters */ + +/* This value is used when parsing \p and \P escapes to indicate that neither +\p{script:...} nor \p{scx:...} has been encountered. */ + +#define PT_NOTSCRIPT 255 /* Flag bits and data types for the extended class (OP_XCLASS) for classes that contain characters with values greater than 255. */ -#define XCL_NOT 0x01 /* Flag: this is a negative class */ -#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ -#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ +#define XCL_NOT 0x01 /* Flag: this is a negative class */ +#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ +#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ -#define XCL_END 0 /* Marks end of individual items */ -#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ -#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ -#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ -#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ +#define XCL_END 0 /* Marks end of individual items */ +#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ +#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ +#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ +#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ /* These are escaped items that aren't just an encoding of a particular data value such as \n. They must have non-zero values, as check_escape() returns 0 @@ -1797,8 +1822,8 @@ uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */ uint8_t caseset; /* offset to multichar other cases or zero */ int32_t other_case; /* offset to other case, or zero if none */ - int16_t scriptx; /* script extension value */ - int16_t dummy; /* spare - to round to multiple of 4 bytes */ + uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */ + uint16_t bprops; /* binary properties offset */ } ucd_record; /* UCD access macros */ @@ -1815,13 +1840,30 @@ #define GET_UCD(ch) REAL_GET_UCD(ch) #endif +#define UCD_SCRIPTX_MASK 0x3ff +#define UCD_BIDICLASS_SHIFT 11 +#define UCD_BPROPS_MASK 0xfff + +#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK) +#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT) +#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK) + #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype #define UCD_SCRIPT(ch) GET_UCD(ch)->script #define UCD_CATEGORY(ch) PRIV(ucp_gentype)UCD_CHARTYPE(ch) #define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop #define UCD_CASESET(ch) GET_UCD(ch)->caseset #define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case))) -#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx +#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch)) +#define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch)) +#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch)) + +/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words +that form a bitmap representing a list of scripts or boolean properties. These +macros test or set a bit in the map by number. */ + +#define MAPBIT(map,n) ((map)(n)/32&(1u<<((n)%32))) +#define MAPSET(map,n) ((map)(n)/32|=(1u<<((n)%32))) /* Header for serialized pcre2 codes. */ @@ -1878,6 +1920,7 @@ #endif #define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_) #define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_) +#define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_) #define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_) #define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_) #define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_) @@ -1901,9 +1944,10 @@ extern const uint8_t PRIV(default_tables); extern const uint32_t PRIV(hspace_list); extern const uint32_t PRIV(vspace_list); +extern const uint32_t PRIV(ucd_boolprop_sets); extern const uint32_t PRIV(ucd_caseless_sets); extern const uint32_t PRIV(ucd_digit_sets); -extern const uint8_t PRIV(ucd_script_sets); +extern const uint32_t PRIV(ucd_script_sets); extern const ucd_record PRIV(ucd_records); #if PCRE2_CODE_UNIT_WIDTH == 32 extern const ucd_record PRIV(dummy_ucd_record);
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_intmodedep.h -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_intmodedep.h
Changed
@@ -519,7 +519,7 @@ macro because almost all calls are already within a block of UTF-32 only code. -These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */ +These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */ #define BACKCHAR(eptr) do { } while (0) @@ -747,8 +747,8 @@ uint32_t class_range_start; /* Overall class range start */ uint32_t class_range_end; /* Overall class range end */ PCRE2_UCHAR nl4; /* Newline string when fixed length */ + uint32_t req_varyopt; /* "After variable item" flag for reqbyte */ int max_lookbehind; /* Maximum lookbehind (characters) */ - int req_varyopt; /* "After variable item" flag for reqbyte */ BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ BOOL had_recurse; /* Had a recursion or subroutine call */ @@ -764,7 +764,7 @@ } pcre2_real_jit_stack; /* Structure for items in a linked list that represents an explicit recursive -call within the pattern when running pcre_dfa_match(). */ +call within the pattern when running pcre2_dfa_match(). */ typedef struct dfa_recursion_info { struct dfa_recursion_info *prevrec; @@ -838,6 +838,17 @@ typedef char check_heapframe_size ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1); +/* Structure for computing the alignment of heapframe. */ + +typedef struct heapframe_align { + char unalign; /* Completely unalign the current offset */ + heapframe frame; /* Offset is its alignment */ +} heapframe_align; + +/* This define is the minimum alignment required for a heapframe, in bytes. */ + +#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame) + /* Structure for passing "static" information around between the functions doing traditional NFA matching (pcre2_match() and friends). */
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_jit_compile.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_jit_compile.c
Changed
@@ -8,7 +8,7 @@ Written by Philip Hazel This module by Zoltan Herczeg Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -413,6 +413,9 @@ /* Locals used by fast fail optimization. */ sljit_s32 early_fail_start_ptr; sljit_s32 early_fail_end_ptr; + /* Variables used by recursive call generator. */ + sljit_s32 recurse_bitset_size; + uint8_t *recurse_bitset; /* Flipped and lower case tables. */ const sljit_u8 *fcc; @@ -613,6 +616,8 @@ sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) +#define OP2U(op, src1, src1w, src2, src2w) \ + sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w)) #define OP_SRC(op, src, srcw) \ sljit_emit_op_src(compiler, (op), (src), (srcw)) #define LABEL() \ @@ -1621,7 +1626,7 @@ /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/ * Skip the check of the second part. */ -if (PRIVATE_DATA(end - LINK_SIZE) == 0) +if (PRIVATE_DATA(end - LINK_SIZE) != 0) return TRUE; next = end; @@ -2315,22 +2320,47 @@ #undef RECURSE_TMP_REG_COUNT -static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, - BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept) +static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index) +{ +uint8_t *byte; +uint8_t mask; + +SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0); + +bit_index >>= SLJIT_WORD_SHIFT; + +SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size); + +mask = 1 << (bit_index & 0x7); +byte = common->recurse_bitset + (bit_index >> 3); + +if (*byte & mask) + return FALSE; + +*byte |= mask; +return TRUE; +} + +enum get_recurse_flags { + recurse_flag_quit_found = (1 << 0), + recurse_flag_accept_found = (1 << 1), + recurse_flag_setsom_found = (1 << 2), + recurse_flag_setmark_found = (1 << 3), + recurse_flag_control_head_found = (1 << 4), +}; + +static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags) { int length = 1; -int size; +int size, offset; PCRE2_SPTR alternative; -BOOL quit_found = FALSE; -BOOL accept_found = FALSE; -BOOL setsom_found = FALSE; -BOOL setmark_found = FALSE; -BOOL capture_last_found = FALSE; -BOOL control_head_found = FALSE; +uint32_t recurse_flags = 0; + +memset(common->recurse_bitset, 0, common->recurse_bitset_size); #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD SLJIT_ASSERT(common->control_head_ptr != 0); -control_head_found = TRUE; +recurse_flags |= recurse_flag_control_head_found; #endif /* Calculate the sum of the private machine words. */ @@ -2341,24 +2371,26 @@ { case OP_SET_SOM: SLJIT_ASSERT(common->has_set_som); - setsom_found = TRUE; + recurse_flags |= recurse_flag_setsom_found; cc += 1; break; case OP_RECURSE: if (common->has_set_som) - setsom_found = TRUE; + recurse_flags |= recurse_flag_setsom_found; if (common->mark_ptr != 0) - setmark_found = TRUE; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; + recurse_flags |= recurse_flag_setmark_found; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; cc += 1 + LINK_SIZE; break; case OP_KET: - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0) { - length++; + if (recurse_check_bit(common, offset)) + length++; SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); cc += PRIVATE_DATA(cc + 1); } @@ -2377,39 +2409,55 @@ case OP_SBRA: case OP_SBRAPOS: case OP_SCOND: - length++; SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; cc += 1 + LINK_SIZE; break; case OP_CBRA: case OP_SCBRA: - length += 2; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; - if (common->optimized_cbracketGET2(cc, 1 + LINK_SIZE) == 0) + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (common->optimized_cbracketoffset == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) length++; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: - length += 2 + 2; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: /* Might be a hidden SCOND. */ alternative = cc + GET(cc, 1); - if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) + if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc))) length++; cc += 1 + LINK_SIZE; break; CASE_ITERATOR_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length++; cc += 2; #ifdef SUPPORT_UNICODE @@ -2418,8 +2466,12 @@ break; CASE_ITERATOR_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc) != 0)
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_jit_match.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_jit_match.c
Changed
@@ -120,7 +120,7 @@ if (functions == NULL || functions->executable_funcsindex == NULL) return PCRE2_ERROR_JIT_BADOPTION; -/* Sanity checks should be handled by pcre_exec. */ +/* Sanity checks should be handled by pcre2_match. */ arguments.str = subject + start_offset; arguments.begin = subject; arguments.end = subject + length;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_jit_misc.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_jit_misc.c
Changed
@@ -135,7 +135,7 @@ pcre2_jit_stack *jit_stack; -if (startsize < 1 || maxsize < 1) +if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE) return NULL; if (startsize > maxsize) startsize = maxsize;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_jit_simd_inc.h -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_jit_simd_inc.h
Changed
@@ -339,7 +339,7 @@ { JUMPHERE(partial_quit0); JUMPHERE(partial_quit1); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); } else @@ -537,7 +537,7 @@ OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); CMOV(SLJIT_LESS, STR_END, TMP1, 0); } @@ -883,14 +883,14 @@ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf)); else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); #endif } else @@ -904,14 +904,14 @@ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf)); else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); #endif } else @@ -922,14 +922,14 @@ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf)); else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); #endif } } @@ -1067,7 +1067,7 @@ OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0); CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0); } @@ -1084,31 +1084,31 @@ if (char1a == char1b && char2a == char2b) { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf)); else #endif - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0)); } else { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf)); else #endif - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1)); } } else { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf)); else #endif - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default)); } /* Restore STR_PTR register. */ @@ -1418,7 +1418,7 @@ { JUMPHERE(partial_quit0); JUMPHERE(partial_quit1); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); } else @@ -1673,7 +1673,7 @@ OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); CMOV(SLJIT_LESS, STR_END, TMP1, 0); }
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_jit_test.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_jit_test.c
Changed
@@ -108,7 +108,7 @@ pcre2_config_32(PCRE2_CONFIG_JIT, &jit); #endif if (!jit) { - printf("JIT must be enabled to run pcre_jit_test\n"); + printf("JIT must be enabled to run pcre2_jit_test\n"); return 1; } return regression_tests() @@ -291,6 +291,7 @@ { MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, { MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, { MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" }, + { MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" }, /* Greedy and non-greedy * operators */ { CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" }, @@ -412,6 +413,9 @@ { MUP, A, 0, 0 | F_PROPERTY, "\\P{L&}{2}^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" }, { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "a-b\\s{2,5}^a", "AB baaa" }, { MUP, 0, 0, 0 | F_NOMATCH, "^\\p{Hangul}\\p{Z}", " " }, + { MUP, 0, 0, 0, "\\p{Lu}\\P{Latin}+", "c\xEA\xA4\xAE,A,b" }, + { MUP, 0, 0, 0, "\\x{a92e}\\p{Lu}\\P{Latin}+", "c\xEA\xA4\xAE,A,b" }, + { CMUP, 0, 0, 0, "^S\\B", "\xe2\x80\x8a" }, /* Possible empty brackets. */ { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, @@ -747,6 +751,7 @@ { MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" }, { MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" }, { MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" }, + { MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" }, /* 16 bit specific tests. */ { CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" }, @@ -1199,8 +1204,8 @@ #endif /* This test compares the behaviour of interpreter and JIT. Although disabling - utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is - still considered successful from pcre_jit_test point of view. */ + utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is + still considered successful from pcre2_jit_test point of view. */ #if defined SUPPORT_PCRE2_8 pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_match.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_match.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2015-2021 University of Cambridge + New API code Copyright (c) 2015-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -49,7 +49,7 @@ /* #define DEBUG_SHOW_OPS */ /* #define DEBUG_SHOW_RMATCH */ -#ifdef DEBUG_FRAME_DISPLAY +#ifdef DEBUG_FRAMES_DISPLAY #include <stdarg.h> #endif @@ -159,7 +159,8 @@ #ifdef SUPPORT_UNICODE enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207, RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215, - RM216, RM217, RM218, RM219, RM220, RM221, RM222 }; + RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223, + RM224, RM225 }; #endif /* Define short names for general fields in the current backtrack frame, which @@ -2421,40 +2422,49 @@ { const uint32_t *cp; const ucd_record *prop = GET_UCD(fc); + BOOL notmatch = Fop == OP_NOTPROP; switch(Fecode1) { case PT_ANY: - if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; case PT_LAMP: if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP)) + prop->chartype == ucp_Lt) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_GC: - if ((Fecode2 != PRIV(ucp_gentype)prop->chartype) == (Fop == OP_PROP)) + if ((Fecode2 == PRIV(ucp_gentype)prop->chartype) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_PC: - if ((Fecode2 != prop->chartype) == (Fop == OP_PROP)) + if ((Fecode2 == prop->chartype) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_SC: - if ((Fecode2 != prop->script) == (Fop == OP_PROP)) + if ((Fecode2 == prop->script) == notmatch) RRETURN(MATCH_NOMATCH); break; + case PT_SCX: + { + BOOL ok = (Fecode2 == prop->script || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode2) != 0); + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + /* These are specials */ case PT_ALNUM: if ((PRIV(ucp_gentype)prop->chartype == ucp_L || - PRIV(ucp_gentype)prop->chartype == ucp_N) == (Fop == OP_NOTPROP)) + PRIV(ucp_gentype)prop->chartype == ucp_N) == notmatch) RRETURN(MATCH_NOMATCH); break; @@ -2468,12 +2478,12 @@ { HSPACE_CASES: VSPACE_CASES: - if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; default: - if ((PRIV(ucp_gentype)prop->chartype == ucp_Z) == - (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); + if ((PRIV(ucp_gentype)prop->chartype == ucp_Z) == notmatch) + RRETURN(MATCH_NOMATCH); break; } break; @@ -2481,7 +2491,7 @@ case PT_WORD: if ((PRIV(ucp_gentype)prop->chartype == ucp_L || PRIV(ucp_gentype)prop->chartype == ucp_N || - fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP)) + fc == CHAR_UNDERSCORE) == notmatch) RRETURN(MATCH_NOMATCH); break; @@ -2490,19 +2500,32 @@ for (;;) { if (fc < *cp) - { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; } + { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } } if (fc == *cp++) - { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } } + { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; } } break; case PT_UCNC: if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || - fc >= 0xe000) == (Fop == OP_NOTPROP)) + fc >= 0xe000) == notmatch) RRETURN(MATCH_NOMATCH); break; + case PT_BIDICL: + if ((UCD_BIDICLASS_PROP(prop) == Fecode2) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_BOOL: + { + BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Fecode2) != 0; + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + /* This should never occur */ default: @@ -2616,18 +2639,20 @@ /* First, ensure the minimum number of matches are present. Use inline code for maximizing the speed, and do the type test once at the start - (i.e. keep it out of the loop). The code for UTF mode is separated out for - tidiness, except for Unicode property tests. */ + (i.e. keep it out of the loops). As there are no calls to RMATCH in the + loops, we can use an ordinary variable for "notmatch". The code for UTF + mode is separated out for tidiness, except for Unicode property tests. */ if (Lmin > 0) { #ifdef SUPPORT_UNICODE if (proptype >= 0) /* Property tests in all modes */ { + BOOL notmatch = Lctype == OP_NOTPROP; switch(proptype) { case PT_ANY: - if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); for (i = 1; i <= Lmin; i++) { if (Feptr >= mb->end_subject) @@ -2652,7 +2677,7 @@ chartype = UCD_CHARTYPE(fc); if ((chartype == ucp_Lu || chartype == ucp_Ll || - chartype == ucp_Lt) == (Lctype == OP_NOTPROP)) + chartype == ucp_Lt) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2666,7 +2691,7 @@ RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2680,7 +2705,7 @@ RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2694,7 +2719,26 @@ RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr);
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_printint.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_printint.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -230,18 +230,48 @@ /* When there is no UTF/UCP support, the table of names does not exist. This function should not be called in such configurations, because a pattern that tries to use Unicode properties won't compile. Rather than put lots of #ifdefs -into the main code, however, we just put one into this function. */ +into the main code, however, we just put one into this function. + +Now that the table contains both full names and their abbreviations, we do some +fiddling to try to get the full name, which is either the longer of two found +names, or a 3-character script name. */ static const char * get_ucpname(unsigned int ptype, unsigned int pvalue) { #ifdef SUPPORT_UNICODE -int i; -for (i = PRIV(utt_size) - 1; i >= 0; i--) +int count = 0; +const char *yield = "??"; +size_t len = 0; +unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype; + +for (int i = PRIV(utt_size) - 1; i >= 0; i--) { - if (ptype == PRIV(utt)i.type && pvalue == PRIV(utt)i.value) break; + const ucp_type_table *u = PRIV(utt) + i; + + if ((ptype == u->type || ptypex == u->type) && pvalue == u->value) + { + const char *s = PRIV(utt_names) + u->name_offset; + size_t sl = strlen(s); + + if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX)) + { + yield = s; + break; + } + + if (sl > len) + { + yield = s; + len = sl; + } + + if (++count >= 2) break; + } } -return (i >= 0)? PRIV(utt_names) + PRIV(utt)i.name_offset : "??"; + +return yield; + #else /* No UTF support */ (void)ptype; (void)pvalue; @@ -273,8 +303,9 @@ { if (code1 != PT_CLIST) { - fprintf(f, "%s%s %s%s", before, OP_names*code, get_ucpname(code1, - code2), after); + const char *sc = (code1 == PT_SC)? "script:" : ""; + const char *s = get_ucpname(code1, code2); + fprintf(f, "%s%s %s%c%s%s", before, OP_names*code, sc, toupper(s0), s+1, after); } else { @@ -724,6 +755,7 @@ { unsigned int ptype = *ccode++; unsigned int pvalue = *ccode++; + const char *s; switch(ptype) { @@ -740,8 +772,8 @@ break; default: - fprintf(f, "\\%c{%s}", (not? 'P':'p'), - get_ucpname(ptype, pvalue)); + s = get_ucpname(ptype, pvalue); + fprintf(f, "\\%c{%c%s}", (not? 'P':'p'), toupper(s0), s+1); break; } }
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_script_run.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_script_run.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -68,26 +68,26 @@ Returns: TRUE if this is a valid script run */ -/* These dummy values must be less than the negation of the largest offset in -the PRIV(ucd_script_sets) vector, which is held in a 16-bit field in UCD -records (and is only likely to be a few hundred). */ +/* These are states in the checking process. */ -#define SCRIPT_UNSET (-99999) -#define SCRIPT_HANPENDING (-99998) -#define SCRIPT_HANHIRAKATA (-99997) -#define SCRIPT_HANBOPOMOFO (-99996) -#define SCRIPT_HANHANGUL (-99995) -#define SCRIPT_LIST (-99994) +enum { SCRIPT_UNSET, /* Requirement as yet unknown */ + SCRIPT_MAP, /* Bitmap contains acceptable scripts */ + SCRIPT_HANPENDING, /* Have had only Han characters */ + SCRIPT_HANHIRAKATA, /* Expect Han or Hirikata */ + SCRIPT_HANBOPOMOFO, /* Expect Han or Bopomofo */ + SCRIPT_HANHANGUL /* Expect Han or Hangul */ + }; -#define INTERSECTION_LIST_SIZE 50 +#define UCD_MAPSIZE (ucp_Unknown/32 + 1) +#define FULL_MAPSIZE (ucp_Script_Count/32 + 1) BOOL PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf) { #ifdef SUPPORT_UNICODE -int require_script = SCRIPT_UNSET; -uint8_t intersection_listINTERSECTION_LIST_SIZE; -const uint8_t *require_list = NULL; +uint32_t require_state = SCRIPT_UNSET; +uint32_t require_mapFULL_MAPSIZE; +uint32_t mapFULL_MAPSIZE; uint32_t require_digitset = 0; uint32_t c; @@ -101,11 +101,17 @@ GETCHARINCTEST(c, ptr); if (ptr >= endptr) return TRUE; +/* Initialize the require map. This is a full-size bitmap that has a bit for +every script, as opposed to the maps in ucd_script_sets, which only have bits +for scripts less than ucp_Unknown - those that appear in script extension +lists. */ + +for (int i = 0; i < FULL_MAPSIZE; i++) require_mapi = 0; + /* Scan strings of two or more characters, checking the Unicode characteristics -of each code point. We make use of the Script Extensions property. There is -special code for scripts that can be combined with characters from the Han -Chinese script. This may be used in conjunction with four other scripts in -these combinations: +of each code point. There is special code for scripts that can be combined with +characters from the Han Chinese script. This may be used in conjunction with +four other scripts in these combinations: . Han with Hiragana and Katakana is allowed (for Japanese). . Han with Bopomofo is allowed (for Taiwanese Mandarin). @@ -119,310 +125,207 @@ for (;;) { const ucd_record *ucd = GET_UCD(c); - int32_t scriptx = ucd->scriptx; + uint32_t script = ucd->script; - /* If the script extension is Unknown, the string is not a valid script run. - Such characters can only form script runs of length one. */ + /* If the script is Unknown, the string is not a valid script run. Such + characters can only form script runs of length one (see test above). */ - if (scriptx == ucp_Unknown) return FALSE; + if (script == ucp_Unknown) return FALSE; - /* A character whose script extension is Inherited is always accepted with - any script, and plays no further part in this testing. A character whose - script is Common is always accepted, but must still be tested for a digit - below. The scriptx value at this point is non-zero, because zero is - ucp_Unknown, tested for above. */ + /* A character without any script extensions whose script is Inherited or + Common is always accepted with any script. If there are extensions, the + following processing happens for all scripts. */ - if (scriptx != ucp_Inherited) + if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common)) { - if (scriptx != ucp_Common) + BOOL OK; + + /* Set up a full-sized map for this character that can include bits for all + scripts. Copy the scriptx map for this character (which covers those + scripts that appear in script extension lists), set the remaining values to + zero, and then, except for Common or Inherited, add this script's bit to + the map. */ + + memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t)); + memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t)); + if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script); + + /* Handle the different checking states */ + + switch(require_state) { - /* If the script extension value is positive, the character is not a mark - that can be used with many scripts. In the simple case we either set or - compare with the required script. However, handling the scripts that can - combine with Han are more complicated, as is the case when the previous - characters have been man-script marks. */ + /* First significant character - it might follow Common or Inherited + characters that do not have any script extensions. */ - if (scriptx > 0) + case SCRIPT_UNSET: + switch(script) { - switch(require_script) - { - /* Either the first significant character (require_script unset) or - after only Han characters. */ - - case SCRIPT_UNSET: - case SCRIPT_HANPENDING: - switch(scriptx) - { - case ucp_Han: - require_script = SCRIPT_HANPENDING; - break; - - case ucp_Hiragana: - case ucp_Katakana: - require_script = SCRIPT_HANHIRAKATA; - break; - - case ucp_Bopomofo: - require_script = SCRIPT_HANBOPOMOFO; - break; - - case ucp_Hangul: - require_script = SCRIPT_HANHANGUL; - break; - - /* Not a Han-related script. If expecting one, fail. Otherise set - the requirement to this script. */ - - default: - if (require_script == SCRIPT_HANPENDING) return FALSE; - require_script = scriptx; - break; - } - break; + case ucp_Han: + require_state = SCRIPT_HANPENDING; + break; + + case ucp_Hiragana: + case ucp_Katakana: + require_state = SCRIPT_HANHIRAKATA; + break; + + case ucp_Bopomofo: + require_state = SCRIPT_HANBOPOMOFO; + break; + + case ucp_Hangul: + require_state = SCRIPT_HANHANGUL; + break; + + default: + memcpy(require_map, map, FULL_MAPSIZE * sizeof(uint32_t)); + require_state = SCRIPT_MAP; + break; + } + break; - /* Previously encountered one of the "with Han" scripts. Check that - this character is appropriate. */ + /* The first significant character was Han. An inspection of the Unicode + 11.0.0 files shows that there are the following types of Script Extension + list that involve the Han, Bopomofo, Hiragana, Katakana, and Hangul + scripts: - case SCRIPT_HANHIRAKATA: - if (scriptx != ucp_Han && scriptx != ucp_Hiragana && - scriptx != ucp_Katakana) - return FALSE; - break; + . Bopomofo + Han + . Han + Hiragana + Katakana
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_string_utils.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_string_utils.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2018 University of Cambridge + New API code Copyright (c) 2018-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_study.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_study.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -908,7 +908,7 @@ { uint32_t c; for (c = 0; c < table_limit; c++) - re->start_bitmapc |= ~(re->tablesc+cbits_offset+cbit_type); + re->start_bitmapc |= (uint8_t)(~(re->tablesc+cbits_offset+cbit_type)); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmapc = 0xff; #endif
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_substitute.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_substitute.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -259,6 +259,18 @@ if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0) return PCRE2_ERROR_BADOPTION; + +/* Validate length and find the end of the replacement. A NULL replacement of +zero length is interpreted as an empty string. */ + +if (replacement == NULL) + { + if (rlength != 0) return PCRE2_ERROR_NULL; + replacement = (PCRE2_SPTR)""; + } + +if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); +repend = replacement + rlength; /* Check for using a match that has already happened. Note that the subject pointer in the match data may be NULL after a no-match. */ @@ -312,11 +324,18 @@ scb.output = (PCRE2_SPTR)buffer; scb.ovector = ovector; -/* Find lengths of zero-terminated strings and the end of the replacement. */ +/* A NULL subject of zero length is treated as an empty string. */ -if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); -if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); -repend = replacement + rlength; +if (subject == NULL) + { + if (length != 0) return PCRE2_ERROR_NULL; + subject = (PCRE2_SPTR)""; + } + +/* Find length of zero-terminated subject */ + +if (length == PCRE2_ZERO_TERMINATED) + length = subject? PRIV(strlen)(subject) : 0; /* Check UTF replacement string if necessary. */
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_tables.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_tables.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -51,10 +51,10 @@ #include "pcre2_internal.h" #endif /* PCRE2_PCRE2TEST */ - /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that the definition is next to the definition of the opcodes in pcre2_internal.h. -This is mode-dependent, so is skipped when this file is included by pcre2test. */ +This is mode-dependent, so it is skipped when this file is included by +pcre2test. */ #ifndef PCRE2_PCRE2TEST const uint8_t PRIV(OP_lengths) = { OP_LENGTHS }; @@ -119,6 +119,9 @@ #endif /* UTF-8 support needed */ +/* Tables concerned with Unicode properties are relevant only when Unicode +support is enabled. See also the pcre2_ucptables.c file, which is generated by +a Python script from Unicode data files. */ #ifdef SUPPORT_UNICODE @@ -190,7 +193,7 @@ ESZ|(1u<<ucp_gbPrepend)| /* 4 Prepend */ (1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)| (1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)| - (1u<<ucp_gbRegionalIndicator), + (1u<<ucp_gbRegional_Indicator), ESZ, /* 5 SpacingMark */ ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)| /* 6 L */ (1u<<ucp_gbLVT), @@ -198,7 +201,7 @@ ESZ|(1u<<ucp_gbT), /* 8 T */ ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT), /* 9 LV */ ESZ|(1u<<ucp_gbT), /* 10 LVT */ - (1u<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */ + (1u<<ucp_gbRegional_Indicator), /* 11 Regional Indicator */ ESZ, /* 12 Other */ ESZ, /* 13 ZWJ */ ESZ|(1u<<ucp_gbExtended_Pictographic) /* 14 Extended Pictographic */ @@ -221,648 +224,10 @@ }; #endif /* SUPPORT_JIT */ -/* The PRIV(utt) table below translates Unicode property names into type and -code values. It is searched by binary chop, so must be in collating sequence of -name. Originally, the table contained pointers to the name strings in the first -field of each entry. However, that leads to a large number of relocations when -a shared library is dynamically loaded. A significant reduction is made by -putting all the names into a single, large string and then using offsets in the -table itself. Maintenance is more error-prone, but frequent changes to this -data are unlikely. - -July 2008: There is now a script called maint/GenerateUtt.py that can be used -to generate this data automatically instead of maintaining it by hand. - -The script was updated in March 2009 to generate a new EBCDIC-compliant -version. Like all other character and string literals that are compared against -the regular expression pattern, we must use STR_ macros instead of literal -strings to make sure that UTF-8 support works on EBCDIC platforms. */ - -#define STRING_Adlam0 STR_A STR_d STR_l STR_a STR_m "\0" -#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0" -#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" -#define STRING_Any0 STR_A STR_n STR_y "\0" -#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0" -#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0" -#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0" -#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0" -#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0" -#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0" -#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0" -#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0" -#define STRING_Bhaiksuki0 STR_B STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0" -#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0" -#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0" -#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0" -#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0" -#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0" -#define STRING_C0 STR_C "\0" -#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0" -#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0" -#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0" -#define STRING_Cc0 STR_C STR_c "\0" -#define STRING_Cf0 STR_C STR_f "\0" -#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0" -#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0" -#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0" -#define STRING_Chorasmian0 STR_C STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0" -#define STRING_Cn0 STR_C STR_n "\0" -#define STRING_Co0 STR_C STR_o "\0" -#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0" -#define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0" -#define STRING_Cs0 STR_C STR_s "\0" -#define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0" -#define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0" -#define STRING_Cypro_Minoan0 STR_C STR_y STR_p STR_r STR_o STR_UNDERSCORE STR_M STR_i STR_n STR_o STR_a STR_n "\0" -#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" -#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0" -#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" -#define STRING_Dives_Akuru0 STR_D STR_i STR_v STR_e STR_s STR_UNDERSCORE STR_A STR_k STR_u STR_r STR_u "\0" -#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0" -#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0" -#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" -#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0" -#define STRING_Elymaic0 STR_E STR_l STR_y STR_m STR_a STR_i STR_c "\0" -#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0" -#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0" -#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0" -#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0" -#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0" -#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0" -#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0" -#define STRING_Gunjala_Gondi0 STR_G STR_u STR_n STR_j STR_a STR_l STR_a STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0" -#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0" -#define STRING_Han0 STR_H STR_a STR_n "\0" -#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0" -#define STRING_Hanifi_Rohingya0 STR_H STR_a STR_n STR_i STR_f STR_i STR_UNDERSCORE STR_R STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0" -#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0" -#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0" -#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0" -#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0" -#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0" -#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0" -#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0" -#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0" -#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0" -#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0" -#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0" -#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0" -#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0" -#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0" -#define STRING_Khitan_Small_Script0 STR_K STR_h STR_i STR_t STR_a STR_n STR_UNDERSCORE STR_S STR_m STR_a STR_l STR_l STR_UNDERSCORE STR_S STR_c STR_r STR_i STR_p STR_t "\0" -#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0" -#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0" -#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0" -#define STRING_L0 STR_L "\0" -#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0" -#define STRING_Lao0 STR_L STR_a STR_o "\0" -#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0" -#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0" -#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0" -#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0" -#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0" -#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0" -#define STRING_Ll0 STR_L STR_l "\0" -#define STRING_Lm0 STR_L STR_m "\0" -#define STRING_Lo0 STR_L STR_o "\0" -#define STRING_Lt0 STR_L STR_t "\0" -#define STRING_Lu0 STR_L STR_u "\0" -#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0" -#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0" -#define STRING_M0 STR_M "\0" -#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0" -#define STRING_Makasar0 STR_M STR_a STR_k STR_a STR_s STR_a STR_r "\0" -#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0" -#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0" -#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0" -#define STRING_Marchen0 STR_M STR_a STR_r STR_c STR_h STR_e STR_n "\0" -#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0" -#define STRING_Mc0 STR_M STR_c "\0" -#define STRING_Me0 STR_M STR_e "\0" -#define STRING_Medefaidrin0 STR_M STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0" -#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0" -#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0" -#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0" -#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" -#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0" -#define STRING_Mn0 STR_M STR_n "\0" -#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0" -#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0" -#define STRING_Mro0 STR_M STR_r STR_o "\0" -#define STRING_Multani0 STR_M STR_u STR_l STR_t STR_a STR_n STR_i "\0" -#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0" -#define STRING_N0 STR_N "\0" -#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0" -#define STRING_Nandinagari0 STR_N STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0" -#define STRING_Nd0 STR_N STR_d "\0" -#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0" -#define STRING_Newa0 STR_N STR_e STR_w STR_a "\0" -#define STRING_Nko0 STR_N STR_k STR_o "\0" -#define STRING_Nl0 STR_N STR_l "\0" -#define STRING_No0 STR_N STR_o "\0" -#define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0" -#define STRING_Nyiakeng_Puachue_Hmong0 STR_N STR_y STR_i STR_a STR_k STR_e STR_n STR_g STR_UNDERSCORE STR_P STR_u STR_a STR_c STR_h STR_u STR_e STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0" -#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0" -#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0" -#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0" -#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0" -#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_ucd.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_ucd.c
Changed
@@ -1,36 +1,71 @@ -/* This module is generated by the maint/MultiStage2.py script. -Do not modify it by hand. Instead modify the script and run it -to regenerate this code. - -As well as being part of the PCRE2 library, this module is #included -by the pcre2test program, which redefines the PRIV macro to change -table names from _pcre2_xxx to xxxx, thereby avoiding name clashes -with the library. At present, just one of these tables is actually -needed. */ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! +Instead, modify the maint/GenerateUcd.py script and run it to generate +a new version of this code. + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ -#ifndef PCRE2_PCRE2TEST +/* This file contains tables of Unicode properties that are extracted from +Unicode data files. See the comments at the start of maint/GenerateUcd.py for +details. + +As well as being part of the PCRE2 library, this file is #included by the +pcre2test program, which redefines the PRIV macro to change table names from +_pcre2_xxx to xxxx, thereby avoiding name clashes with the library. At present, +just one of these tables is actually needed. When compiling the library, some +headers are needed. */ +#ifndef PCRE2_PCRE2TEST #ifdef HAVE_CONFIG_H #include "config.h" #endif - #include "pcre2_internal.h" - #endif /* PCRE2_PCRE2TEST */ -/* Unicode character database. */ -/* This file was autogenerated by the MultiStage2.py script. */ -/* Total size: 102844 bytes, block size: 128. */ - -/* The tables herein are needed only when UCP support is built, -and in PCRE2 that happens automatically with UTF support. -This module should not be referenced otherwise, so -it should not matter whether it is compiled or not. However -a comment was received about space saving - maybe the guy linked -all the modules rather than using a library - so we include a -condition to cut out the tables when not needed. But don't leave -a totally empty module because some compilers barf at that. -Instead, just supply some small dummy tables. */ +/* The tables herein are needed only when UCP support is built, and in PCRE2 +that happens automatically with UTF support. This module should not be +referenced otherwise, so it should not matter whether it is compiled or not. +However a comment was received about space saving - maybe the guy linked all +the modules rather than using a library - so we include a condition to cut out +the tables when not needed. But don't leave a totally empty module because some +compilers barf at that. Instead, just supply some small dummy tables. */ #ifndef SUPPORT_UNICODE const ucd_record PRIV(ucd_records) = {{0,0,0,0,0,0,0 }}; @@ -39,11 +74,27 @@ const uint32_t PRIV(ucd_caseless_sets) = {0}; #else +/* Total size: 111116 bytes, block size: 128. */ + const char *PRIV(unicode_version) = "14.0.0"; -/* If the 32-bit library is run in non-32-bit mode, character values -greater than 0x10ffff may be encountered. For these we set up a -special record. */ +/* When recompiling tables with a new Unicode version, please check the types +in this structure definition with those in pcre2_internal.h (the actual field +names will be different). + +typedef struct { +uint8_t property_0; +uint8_t property_1; +uint8_t property_2; +uint8_t property_3; +int32_t property_4; +uint16_t property_5; +uint16_t property_6; +} ucd_record; +*/ + +/* If the 32-bit library is run in non-32-bit mode, character values greater +than 0x10ffff may be encountered. For these we set up a special record. */ #if PCRE2_CODE_UNIT_WIDTH == 32 const ucd_record PRIV(dummy_ucd_record) = {{ @@ -52,68 +103,53 @@ ucp_gbOther, /* grapheme break property */ 0, /* case set */ 0, /* other case */ - ucp_Unknown, /* script extension */ - 0, /* dummy filler */ + 0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */ + 0, /* bool properties offset */ }}; #endif -/* When recompiling tables with a new Unicode version, please check the -types in this structure definition from pcre2_internal.h (the actual -field names will be different): - -typedef struct { -uint8_t property_0; -uint8_t property_1; -uint8_t property_2; -uint8_t property_3; -pcre_int32 property_4; -pcre_int16 property_5; -uint16_t property_6; -} ucd_record; -*/ - /* This table contains lists of characters that are caseless sets of more than one character. Each list is terminated by NOTACHAR. */ const uint32_t PRIV(ucd_caseless_sets) = { NOTACHAR, - 0x0053, 0x0073, 0x017f, NOTACHAR, - 0x01c4, 0x01c5, 0x01c6, NOTACHAR, - 0x01c7, 0x01c8, 0x01c9, NOTACHAR, - 0x01ca, 0x01cb, 0x01cc, NOTACHAR, - 0x01f1, 0x01f2, 0x01f3, NOTACHAR, - 0x0345, 0x0399, 0x03b9, 0x1fbe, NOTACHAR, - 0x00b5, 0x039c, 0x03bc, NOTACHAR, - 0x03a3, 0x03c2, 0x03c3, NOTACHAR, - 0x0392, 0x03b2, 0x03d0, NOTACHAR, - 0x0398, 0x03b8, 0x03d1, 0x03f4, NOTACHAR, - 0x03a6, 0x03c6, 0x03d5, NOTACHAR, - 0x03a0, 0x03c0, 0x03d6, NOTACHAR, - 0x039a, 0x03ba, 0x03f0, NOTACHAR, - 0x03a1, 0x03c1, 0x03f1, NOTACHAR, - 0x0395, 0x03b5, 0x03f5, NOTACHAR, - 0x0412, 0x0432, 0x1c80, NOTACHAR, - 0x0414, 0x0434, 0x1c81, NOTACHAR, - 0x041e, 0x043e, 0x1c82, NOTACHAR, - 0x0421, 0x0441, 0x1c83, NOTACHAR, - 0x0422, 0x0442, 0x1c84, 0x1c85, NOTACHAR, - 0x042a, 0x044a, 0x1c86, NOTACHAR, - 0x0462, 0x0463, 0x1c87, NOTACHAR, - 0x1e60, 0x1e61, 0x1e9b, NOTACHAR, - 0x03a9, 0x03c9, 0x2126, NOTACHAR, - 0x004b, 0x006b, 0x212a, NOTACHAR, - 0x00c5, 0x00e5, 0x212b, NOTACHAR, - 0x1c88, 0xa64a, 0xa64b, NOTACHAR, + 0x0053, 0x0073, 0x017f, NOTACHAR, + 0x01c4, 0x01c5, 0x01c6, NOTACHAR, + 0x01c7, 0x01c8, 0x01c9, NOTACHAR, + 0x01ca, 0x01cb, 0x01cc, NOTACHAR, + 0x01f1, 0x01f2, 0x01f3, NOTACHAR, + 0x0345, 0x0399, 0x03b9, 0x1fbe, NOTACHAR, + 0x00b5, 0x039c, 0x03bc, NOTACHAR, + 0x03a3, 0x03c2, 0x03c3, NOTACHAR, + 0x0392, 0x03b2, 0x03d0, NOTACHAR, + 0x0398, 0x03b8, 0x03d1, 0x03f4, NOTACHAR, + 0x03a6, 0x03c6, 0x03d5, NOTACHAR, + 0x03a0, 0x03c0, 0x03d6, NOTACHAR, + 0x039a, 0x03ba, 0x03f0, NOTACHAR, + 0x03a1, 0x03c1, 0x03f1, NOTACHAR,
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_ucp.h -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_ucp.h
Changed
@@ -7,7 +7,11 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! +Instead, modify the maint/GenerateUcpHeader.py script and run it to generate +a new version of this code. ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -38,31 +42,27 @@ ----------------------------------------------------------------------------- */ - #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD #define PCRE2_UCP_H_IDEMPOTENT_GUARD -/* This file contains definitions of the property values that are returned by -the UCD access macros. New values that are added for new releases of Unicode -should always be at the end of each enum, for backwards compatibility. +/* This file contains definitions of the Unicode property values that are +returned by the UCD access macros and used throughout PCRE2. -IMPORTANT: Note also that the specific numeric values of the enums have to be -the same as the values that are generated by the maint/MultiStage2.py script, -where the equivalent property descriptive names are listed in vectors. - -ALSO: The specific values of the first two enums are assumed for the table -called catposstab in pcre2_compile.c. */ +IMPORTANT: The specific values of the first two enums (general and particular +character categories) are assumed by the table called catposstab in the file +pcre2_auto_possess.c. They are unlikely to change, but should be checked after +an update. */ /* These are the general character categories. */ enum { - ucp_C, /* Other */ - ucp_L, /* Letter */ - ucp_M, /* Mark */ - ucp_N, /* Number */ - ucp_P, /* Punctuation */ - ucp_S, /* Symbol */ - ucp_Z /* Separator */ + ucp_C, + ucp_L, + ucp_M, + ucp_N, + ucp_P, + ucp_S, + ucp_Z, }; /* These are the particular character categories. */ @@ -97,7 +97,98 @@ ucp_So, /* Other symbol */ ucp_Zl, /* Line separator */ ucp_Zp, /* Paragraph separator */ - ucp_Zs /* Space separator */ + ucp_Zs, /* Space separator */ +}; + +/* These are Boolean properties. */ + +enum { + ucp_ASCII, + ucp_ASCII_Hex_Digit, + ucp_Alphabetic, + ucp_Bidi_Control, + ucp_Bidi_Mirrored, + ucp_Case_Ignorable, + ucp_Cased, + ucp_Changes_When_Casefolded, + ucp_Changes_When_Casemapped, + ucp_Changes_When_Lowercased, + ucp_Changes_When_Titlecased, + ucp_Changes_When_Uppercased, + ucp_Dash, + ucp_Default_Ignorable_Code_Point, + ucp_Deprecated, + ucp_Diacritic, + ucp_Emoji, + ucp_Emoji_Component, + ucp_Emoji_Modifier, + ucp_Emoji_Modifier_Base, + ucp_Emoji_Presentation, + ucp_Extended_Pictographic, + ucp_Extender, + ucp_Grapheme_Base, + ucp_Grapheme_Extend, + ucp_Grapheme_Link, + ucp_Hex_Digit, + ucp_IDS_Binary_Operator, + ucp_IDS_Trinary_Operator, + ucp_ID_Continue, + ucp_ID_Start, + ucp_Ideographic, + ucp_Join_Control, + ucp_Logical_Order_Exception, + ucp_Lowercase, + ucp_Math, + ucp_Noncharacter_Code_Point, + ucp_Pattern_Syntax, + ucp_Pattern_White_Space, + ucp_Prepended_Concatenation_Mark, + ucp_Quotation_Mark, + ucp_Radical, + ucp_Regional_Indicator, + ucp_Sentence_Terminal, + ucp_Soft_Dotted, + ucp_Terminal_Punctuation, + ucp_Unified_Ideograph, + ucp_Uppercase, + ucp_Variation_Selector, + ucp_White_Space, + ucp_XID_Continue, + ucp_XID_Start, + /* This must be last */ + ucp_Bprop_Count +}; + +/* Size of entries in ucd_boolprop_sets */ + +#define ucd_boolprop_sets_item_size 2 + +/* These are the bidi class values. */ + +enum { + ucp_bidiAL, /* Arabic letter */ + ucp_bidiAN, /* Arabic number */ + ucp_bidiB, /* Paragraph separator */ + ucp_bidiBN, /* Boundary neutral */ + ucp_bidiCS, /* Common separator */ + ucp_bidiEN, /* European number */ + ucp_bidiES, /* European separator */ + ucp_bidiET, /* European terminator */ + ucp_bidiFSI, /* First strong isolate */ + ucp_bidiL, /* Left to right */ + ucp_bidiLRE, /* Left to right embedding */ + ucp_bidiLRI, /* Left to right isolate */ + ucp_bidiLRO, /* Left to right override */ + ucp_bidiNSM, /* Non-spacing mark */ + ucp_bidiON, /* Other neutral */ + ucp_bidiPDF, /* Pop directional format */ + ucp_bidiPDI, /* Pop directional isolate */ + ucp_bidiR, /* Right to left */ + ucp_bidiRLE, /* Right to left embedding */ + ucp_bidiRLI, /* Right to left isolate */ + ucp_bidiRLO, /* Right to left override */ + ucp_bidiS, /* Segment separator */ + ucp_bidiWS, /* White space */ }; /* These are grapheme break properties. The Extended Pictographic property @@ -115,191 +206,189 @@ ucp_gbT, /* 8 Hangul syllable type T */ ucp_gbLV, /* 9 Hangul syllable type LV */ ucp_gbLVT, /* 10 Hangul syllable type LVT */ - ucp_gbRegionalIndicator, /* 11 */ + ucp_gbRegional_Indicator, /* 11 */ ucp_gbOther, /* 12 */ ucp_gbZWJ, /* 13 */ - ucp_gbExtended_Pictographic /* 14 */ + ucp_gbExtended_Pictographic, /* 14 */ }; /* These are the script identifications. */ enum { - ucp_Unknown, - ucp_Arabic, - ucp_Armenian, - ucp_Bengali, - ucp_Bopomofo, - ucp_Braille, - ucp_Buginese, - ucp_Buhid, - ucp_Canadian_Aboriginal, - ucp_Cherokee, - ucp_Common, - ucp_Coptic, - ucp_Cypriot, + /* Scripts which has characters in other scripts. */ + ucp_Latin, + ucp_Greek, ucp_Cyrillic, - ucp_Deseret, + ucp_Arabic, + ucp_Syriac, + ucp_Thaana, ucp_Devanagari, - ucp_Ethiopic, - ucp_Georgian, - ucp_Glagolitic, - ucp_Gothic, - ucp_Greek,
View file
_service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_ucptables.c
Added
@@ -0,0 +1,1524 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! +Instead, modify the maint/GenerateUcpTables.py script and run it to generate +a new version of this code. + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifdef SUPPORT_UNICODE + +/* The PRIV(utt) table below translates Unicode property names into type and +code values. It is searched by binary chop, so must be in collating sequence of +name. Originally, the table contained pointers to the name strings in the first +field of each entry. However, that leads to a large number of relocations when +a shared library is dynamically loaded. A significant reduction is made by +putting all the names into a single, large string and using offsets instead. +All letters are lower cased, and underscores are removed, in accordance with +the "loose matching" rules that Unicode advises and Perl uses. */ + +#define STRING_adlam0 STR_a STR_d STR_l STR_a STR_m "\0" +#define STRING_adlm0 STR_a STR_d STR_l STR_m "\0" +#define STRING_aghb0 STR_a STR_g STR_h STR_b "\0" +#define STRING_ahex0 STR_a STR_h STR_e STR_x "\0" +#define STRING_ahom0 STR_a STR_h STR_o STR_m "\0" +#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0" +#define STRING_alphabetic0 STR_a STR_l STR_p STR_h STR_a STR_b STR_e STR_t STR_i STR_c "\0" +#define STRING_anatolianhieroglyphs0 STR_a STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" +#define STRING_any0 STR_a STR_n STR_y "\0" +#define STRING_arab0 STR_a STR_r STR_a STR_b "\0" +#define STRING_arabic0 STR_a STR_r STR_a STR_b STR_i STR_c "\0" +#define STRING_armenian0 STR_a STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0" +#define STRING_armi0 STR_a STR_r STR_m STR_i "\0" +#define STRING_armn0 STR_a STR_r STR_m STR_n "\0" +#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0" +#define STRING_asciihexdigit0 STR_a STR_s STR_c STR_i STR_i STR_h STR_e STR_x STR_d STR_i STR_g STR_i STR_t "\0" +#define STRING_avestan0 STR_a STR_v STR_e STR_s STR_t STR_a STR_n "\0" +#define STRING_avst0 STR_a STR_v STR_s STR_t "\0" +#define STRING_bali0 STR_b STR_a STR_l STR_i "\0" +#define STRING_balinese0 STR_b STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0" +#define STRING_bamu0 STR_b STR_a STR_m STR_u "\0" +#define STRING_bamum0 STR_b STR_a STR_m STR_u STR_m "\0" +#define STRING_bass0 STR_b STR_a STR_s STR_s "\0" +#define STRING_bassavah0 STR_b STR_a STR_s STR_s STR_a STR_v STR_a STR_h "\0" +#define STRING_batak0 STR_b STR_a STR_t STR_a STR_k "\0" +#define STRING_batk0 STR_b STR_a STR_t STR_k "\0" +#define STRING_beng0 STR_b STR_e STR_n STR_g "\0" +#define STRING_bengali0 STR_b STR_e STR_n STR_g STR_a STR_l STR_i "\0" +#define STRING_bhaiksuki0 STR_b STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0" +#define STRING_bhks0 STR_b STR_h STR_k STR_s "\0" +#define STRING_bidial0 STR_b STR_i STR_d STR_i STR_a STR_l "\0" +#define STRING_bidian0 STR_b STR_i STR_d STR_i STR_a STR_n "\0" +#define STRING_bidib0 STR_b STR_i STR_d STR_i STR_b "\0" +#define STRING_bidibn0 STR_b STR_i STR_d STR_i STR_b STR_n "\0" +#define STRING_bidic0 STR_b STR_i STR_d STR_i STR_c "\0" +#define STRING_bidicontrol0 STR_b STR_i STR_d STR_i STR_c STR_o STR_n STR_t STR_r STR_o STR_l "\0" +#define STRING_bidics0 STR_b STR_i STR_d STR_i STR_c STR_s "\0" +#define STRING_bidien0 STR_b STR_i STR_d STR_i STR_e STR_n "\0" +#define STRING_bidies0 STR_b STR_i STR_d STR_i STR_e STR_s "\0" +#define STRING_bidiet0 STR_b STR_i STR_d STR_i STR_e STR_t "\0" +#define STRING_bidifsi0 STR_b STR_i STR_d STR_i STR_f STR_s STR_i "\0" +#define STRING_bidil0 STR_b STR_i STR_d STR_i STR_l "\0" +#define STRING_bidilre0 STR_b STR_i STR_d STR_i STR_l STR_r STR_e "\0" +#define STRING_bidilri0 STR_b STR_i STR_d STR_i STR_l STR_r STR_i "\0" +#define STRING_bidilro0 STR_b STR_i STR_d STR_i STR_l STR_r STR_o "\0" +#define STRING_bidim0 STR_b STR_i STR_d STR_i STR_m "\0" +#define STRING_bidimirrored0 STR_b STR_i STR_d STR_i STR_m STR_i STR_r STR_r STR_o STR_r STR_e STR_d "\0" +#define STRING_bidinsm0 STR_b STR_i STR_d STR_i STR_n STR_s STR_m "\0" +#define STRING_bidion0 STR_b STR_i STR_d STR_i STR_o STR_n "\0" +#define STRING_bidipdf0 STR_b STR_i STR_d STR_i STR_p STR_d STR_f "\0" +#define STRING_bidipdi0 STR_b STR_i STR_d STR_i STR_p STR_d STR_i "\0" +#define STRING_bidir0 STR_b STR_i STR_d STR_i STR_r "\0" +#define STRING_bidirle0 STR_b STR_i STR_d STR_i STR_r STR_l STR_e "\0" +#define STRING_bidirli0 STR_b STR_i STR_d STR_i STR_r STR_l STR_i "\0" +#define STRING_bidirlo0 STR_b STR_i STR_d STR_i STR_r STR_l STR_o "\0" +#define STRING_bidis0 STR_b STR_i STR_d STR_i STR_s "\0" +#define STRING_bidiws0 STR_b STR_i STR_d STR_i STR_w STR_s "\0" +#define STRING_bopo0 STR_b STR_o STR_p STR_o "\0" +#define STRING_bopomofo0 STR_b STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0" +#define STRING_brah0 STR_b STR_r STR_a STR_h "\0" +#define STRING_brahmi0 STR_b STR_r STR_a STR_h STR_m STR_i "\0" +#define STRING_brai0 STR_b STR_r STR_a STR_i "\0" +#define STRING_braille0 STR_b STR_r STR_a STR_i STR_l STR_l STR_e "\0" +#define STRING_bugi0 STR_b STR_u STR_g STR_i "\0" +#define STRING_buginese0 STR_b STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0" +#define STRING_buhd0 STR_b STR_u STR_h STR_d "\0" +#define STRING_buhid0 STR_b STR_u STR_h STR_i STR_d "\0" +#define STRING_c0 STR_c "\0" +#define STRING_cakm0 STR_c STR_a STR_k STR_m "\0" +#define STRING_canadianaboriginal0 STR_c STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_a STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0" +#define STRING_cans0 STR_c STR_a STR_n STR_s "\0" +#define STRING_cari0 STR_c STR_a STR_r STR_i "\0" +#define STRING_carian0 STR_c STR_a STR_r STR_i STR_a STR_n "\0" +#define STRING_cased0 STR_c STR_a STR_s STR_e STR_d "\0" +#define STRING_caseignorable0 STR_c STR_a STR_s STR_e STR_i STR_g STR_n STR_o STR_r STR_a STR_b STR_l STR_e "\0" +#define STRING_caucasianalbanian0 STR_c STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_a STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0" +#define STRING_cc0 STR_c STR_c "\0" +#define STRING_cf0 STR_c STR_f "\0" +#define STRING_chakma0 STR_c STR_h STR_a STR_k STR_m STR_a "\0" +#define STRING_cham0 STR_c STR_h STR_a STR_m "\0" +#define STRING_changeswhencasefolded0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_c STR_a STR_s STR_e STR_f STR_o STR_l STR_d STR_e STR_d "\0" +#define STRING_changeswhencasemapped0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_c STR_a STR_s STR_e STR_m STR_a STR_p STR_p STR_e STR_d "\0" +#define STRING_changeswhenlowercased0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_l STR_o STR_w STR_e STR_r STR_c STR_a STR_s STR_e STR_d "\0" +#define STRING_changeswhentitlecased0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_t STR_i STR_t STR_l STR_e STR_c STR_a STR_s STR_e STR_d "\0" +#define STRING_changeswhenuppercased0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_u STR_p STR_p STR_e STR_r STR_c STR_a STR_s STR_e STR_d "\0" +#define STRING_cher0 STR_c STR_h STR_e STR_r "\0" +#define STRING_cherokee0 STR_c STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0" +#define STRING_chorasmian0 STR_c STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0" +#define STRING_chrs0 STR_c STR_h STR_r STR_s "\0" +#define STRING_ci0 STR_c STR_i "\0" +#define STRING_cn0 STR_c STR_n "\0" +#define STRING_co0 STR_c STR_o "\0" +#define STRING_common0 STR_c STR_o STR_m STR_m STR_o STR_n "\0" +#define STRING_copt0 STR_c STR_o STR_p STR_t "\0" +#define STRING_coptic0 STR_c STR_o STR_p STR_t STR_i STR_c "\0" +#define STRING_cpmn0 STR_c STR_p STR_m STR_n "\0" +#define STRING_cprt0 STR_c STR_p STR_r STR_t "\0" +#define STRING_cs0 STR_c STR_s "\0" +#define STRING_cuneiform0 STR_c STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0" +#define STRING_cwcf0 STR_c STR_w STR_c STR_f "\0" +#define STRING_cwcm0 STR_c STR_w STR_c STR_m "\0" +#define STRING_cwl0 STR_c STR_w STR_l "\0" +#define STRING_cwt0 STR_c STR_w STR_t "\0" +#define STRING_cwu0 STR_c STR_w STR_u "\0" +#define STRING_cypriot0 STR_c STR_y STR_p STR_r STR_i STR_o STR_t "\0" +#define STRING_cyprominoan0 STR_c STR_y STR_p STR_r STR_o STR_m STR_i STR_n STR_o STR_a STR_n "\0" +#define STRING_cyrillic0 STR_c STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" +#define STRING_cyrl0 STR_c STR_y STR_r STR_l "\0" +#define STRING_dash0 STR_d STR_a STR_s STR_h "\0" +#define STRING_defaultignorablecodepoint0 STR_d STR_e STR_f STR_a STR_u STR_l STR_t STR_i STR_g STR_n STR_o STR_r STR_a STR_b STR_l STR_e STR_c STR_o STR_d STR_e STR_p STR_o STR_i STR_n STR_t "\0" +#define STRING_dep0 STR_d STR_e STR_p "\0" +#define STRING_deprecated0 STR_d STR_e STR_p STR_r STR_e STR_c STR_a STR_t STR_e STR_d "\0" +#define STRING_deseret0 STR_d STR_e STR_s STR_e STR_r STR_e STR_t "\0" +#define STRING_deva0 STR_d STR_e STR_v STR_a "\0" +#define STRING_devanagari0 STR_d STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" +#define STRING_di0 STR_d STR_i "\0" +#define STRING_dia0 STR_d STR_i STR_a "\0" +#define STRING_diacritic0 STR_d STR_i STR_a STR_c STR_r STR_i STR_t STR_i STR_c "\0" +#define STRING_diak0 STR_d STR_i STR_a STR_k "\0" +#define STRING_divesakuru0 STR_d STR_i STR_v STR_e STR_s STR_a STR_k STR_u STR_r STR_u "\0" +#define STRING_dogr0 STR_d STR_o STR_g STR_r "\0" +#define STRING_dogra0 STR_d STR_o STR_g STR_r STR_a "\0" +#define STRING_dsrt0 STR_d STR_s STR_r STR_t "\0" +#define STRING_dupl0 STR_d STR_u STR_p STR_l "\0" +#define STRING_duployan0 STR_d STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0" +#define STRING_ebase0 STR_e STR_b STR_a STR_s STR_e "\0" +#define STRING_ecomp0 STR_e STR_c STR_o STR_m STR_p "\0" +#define STRING_egyp0 STR_e STR_g STR_y STR_p "\0" +#define STRING_egyptianhieroglyphs0 STR_e STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" +#define STRING_elba0 STR_e STR_l STR_b STR_a "\0" +#define STRING_elbasan0 STR_e STR_l STR_b STR_a STR_s STR_a STR_n "\0" +#define STRING_elym0 STR_e STR_l STR_y STR_m "\0" +#define STRING_elymaic0 STR_e STR_l STR_y STR_m STR_a STR_i STR_c "\0" +#define STRING_emod0 STR_e STR_m STR_o STR_d "\0" +#define STRING_emoji0 STR_e STR_m STR_o STR_j STR_i "\0" +#define STRING_emojicomponent0 STR_e STR_m STR_o STR_j STR_i STR_c STR_o STR_m STR_p STR_o STR_n STR_e STR_n STR_t "\0" +#define STRING_emojimodifier0 STR_e STR_m STR_o STR_j STR_i STR_m STR_o STR_d STR_i STR_f STR_i STR_e STR_r "\0" +#define STRING_emojimodifierbase0 STR_e STR_m STR_o STR_j STR_i STR_m STR_o STR_d STR_i STR_f STR_i STR_e STR_r STR_b STR_a STR_s STR_e "\0" +#define STRING_emojipresentation0 STR_e STR_m STR_o STR_j STR_i STR_p STR_r STR_e STR_s STR_e STR_n STR_t STR_a STR_t STR_i STR_o STR_n "\0" +#define STRING_epres0 STR_e STR_p STR_r STR_e STR_s "\0" +#define STRING_ethi0 STR_e STR_t STR_h STR_i "\0" +#define STRING_ethiopic0 STR_e STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0" +#define STRING_ext0 STR_e STR_x STR_t "\0" +#define STRING_extendedpictographic0 STR_e STR_x STR_t STR_e STR_n STR_d STR_e STR_d STR_p STR_i STR_c STR_t STR_o STR_g STR_r STR_a STR_p STR_h STR_i STR_c "\0"
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2_xclass.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2_xclass.c
Changed
@@ -7,7 +7,7 @@ Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -135,6 +135,7 @@ { const ucd_record *prop = GET_UCD(c); BOOL isprop = t == XCL_PROP; + BOOL ok; switch(*data) { @@ -160,6 +161,12 @@ if ((data1 == prop->script) == isprop) return !negated; break; + case PT_SCX: + ok = (data1 == prop->script || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data1) != 0); + if (ok == isprop) return !negated; + break; + case PT_ALNUM: if ((PRIV(ucp_gentype)prop->chartype == ucp_L || PRIV(ucp_gentype)prop->chartype == ucp_N) == isprop) @@ -207,6 +214,17 @@ } break; + case PT_BIDICL: + if ((UCD_BIDICLASS_PROP(prop) == data1) == isprop) + return !negated; + break; + + case PT_BOOL: + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), data1) != 0; + if (ok == isprop) return !negated; + break; + /* The following three properties can occur only in an XCLASS, as there is no \p or \P coding for them. */
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2grep.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2grep.c
Changed
@@ -208,14 +208,6 @@ /* Jeffrey Friedl has some debugging requirements that are not part of the regular code. */ -#ifdef JFRIEDL_DEBUG -static int S_arg = -1; -static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */ -static unsigned int jfriedl_XT = 0; /* replicate text this many times */ -static const char *jfriedl_prefix = ""; -static const char *jfriedl_postfix = ""; -#endif - static const char *colour_string = "1;31"; static const char *colour_option = NULL; static const char *dee_option = NULL; @@ -481,9 +473,6 @@ { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" }, { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" }, { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" }, -#ifdef JFRIEDL_DEBUG - { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, -#endif { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" }, { OP_NODATA, 'u', NULL, "utf", "use UTF mode" }, @@ -1816,7 +1805,7 @@ Arguments: matchptr the start of the subject length the length of the subject to match - options options for pcre_exec + options options for pcre2_match startoffset where to start matching mrc address of where to put the result of pcre2_match() @@ -2538,6 +2527,7 @@ BOOL lines_printed = FALSE; BOOL input_line_buffered = line_buffered; FILE *in = NULL; /* Ensure initialized */ +long stream_start = -1; /* Only non-negative if relevant */ /* Do the first read into the start of the buffer and set up the pointer to end of what we have. In the case of libz, a non-zipped .gz file will be read as a @@ -2547,7 +2537,15 @@ if (frtype != FR_LIBZ && frtype != FR_LIBBZ2) { in = (FILE *)handle; - if (is_file_tty(in)) input_line_buffered = TRUE; + if (feof(in)) + return 1; + if (is_file_tty(in)) + input_line_buffered = TRUE; + else + { + if (count_limit >= 0 && filename == stdin_name) + stream_start = ftell(in); + } } else input_line_buffered = FALSE; @@ -2594,8 +2592,8 @@ if (count_limit >= 0 && count_matched_lines >= count_limit) { - if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle)) - (void)fseek(handle, (long int)filepos, SEEK_SET); + if (stream_start >= 0) + (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET); rc = (count_limit == 0)? 1 : 0; break; } @@ -2671,56 +2669,6 @@ } } - /* Extra processing for Jeffrey Friedl's debugging. */ - -#ifdef JFRIEDL_DEBUG - if (jfriedl_XT || jfriedl_XR) - { -# include <sys/time.h> -# include <time.h> - struct timeval start_time, end_time; - struct timezone dummy; - int i; - - if (jfriedl_XT) - { - unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix); - const char *orig = ptr; - ptr = malloc(newlen + 1); - if (!ptr) { - printf("out of memory"); - pcre2grep_exit(2); - } - endptr = ptr; - strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix); - for (i = 0; i < jfriedl_XT; i++) { - strncpy(endptr, orig, length); - endptr += length; - } - strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix); - length = newlen; - } - - if (gettimeofday(&start_time, &dummy) != 0) - perror("bad gettimeofday"); - - - for (i = 0; i < jfriedl_XR; i++) - match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0, - PCRE2_NOTEMPTY, offsets, offset_size) >= 0); - - if (gettimeofday(&end_time, &dummy) != 0) - perror("bad gettimeofday"); - - double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0)) - - - (start_time.tv_sec + (start_time.tv_usec / 1000000.0))); - - printf("%s TIMER%.4f\n", match ? "MATCH" : "FAIL", delta); - return 0; - } -#endif - /* We come back here after a match when only_matching_count is non-zero, in order to find any further matches in the same line. This applies to --only-matching, --file-offsets, and --line-offsets. */ @@ -2975,22 +2923,6 @@ if (printname != NULL) fprintf(stdout, "%s:", printname); if (number) fprintf(stdout, "%lu:", linenumber); - /* This extra option, for Jeffrey Friedl's debugging requirements, - replaces the matched string, or a specific captured string if it exists, - with X. When this happens, colouring is ignored. */ - -#ifdef JFRIEDL_DEBUG - if (S_arg >= 0 && S_arg < mrc) - { - int first = S_arg * 2; - int last = first + 1; - FWRITE_IGNORE(ptr, 1, offsetsfirst, stdout); - fprintf(stdout, "X"); - FWRITE_IGNORE(ptr + offsetslast, 1, linelength - offsetslast, stdout); - } - else -#endif - /* In multiline mode, or if colouring, we have to split the line(s) up and search for further matches, but not of course if the line is a non-match. In multiline mode this is necessary in case there is another @@ -3266,6 +3198,7 @@ if (strcmp(pathname, "-") == 0) { + if (count_limit >= 0) setbuf(stdin, NULL); return pcre2grep(stdin, FR_PLAIN, stdin_name, (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? stdin_name : NULL); @@ -3964,29 +3897,6 @@ } } - /* Jeffrey Friedl's debugging harness uses these additional options which - are not in the right form for putting in the option table because they use - only one hyphen, yet are more than one character long. By putting them - separately here, they will not get displayed as part of the help() output, - but I don't think Jeffrey will care about that. */ - -#ifdef JFRIEDL_DEBUG - else if (strcmp(argvi, "-pre") == 0) { - jfriedl_prefix = argv++i; - continue; - } else if (strcmp(argvi, "-post") == 0) { - jfriedl_postfix = argv++i; - continue; - } else if (strcmp(argvi, "-XT") == 0) { - sscanf(argv++i, "%d", &jfriedl_XT); - continue; - } else if (strcmp(argvi, "-XR") == 0) { - sscanf(argv++i, "%d", &jfriedl_XR); - continue; - } -#endif - - /* One-char options; many that have no data may be in a single argument; we continue till we hit the last one or one that needs data. */ @@ -4049,7 +3959,7 @@ /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that either has a value or defaults to something. It cannot have data in a separate item. At the moment, the only such options are "colo(u)r", - "only-matching", and Jeffrey Friedl's special -S debugging option. */ + and "only-matching". */ if (*option_data == 0 && (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER || @@ -4065,12 +3975,6 @@ only_matching_last = add_number(0, only_matching_last); if (only_matching == NULL) only_matching = only_matching_last;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2posix.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2posix.c
Changed
@@ -368,6 +368,8 @@ int options = 0; pcre2_match_data *md = (pcre2_match_data *)preg->re_match_data; +if (string == NULL) return REG_INVARG; + if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL; if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL; if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/pcre2test.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/pcre2test.c
Changed
@@ -11,7 +11,7 @@ Written by Philip Hazel Original code Copyright (c) 1997-2012 University of Cambridge - Rewritten code Copyright (c) 2016-2021 University of Cambridge + Rewritten code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -82,11 +82,7 @@ /* #define DEBUG_SHOW_MALLOC_ADDRESSES */ -/* Both libreadline and libedit are optionally supported. The user-supplied -original patch uses readline/readline.h for libedit, but in at least one system -it is installed as editline/readline.h, so the configuration code now looks for -that first, falling back to readline/readline.h. */ - +/* Both libreadline and libedit are optionally supported */ #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) #if defined(SUPPORT_LIBREADLINE) #include <readline/readline.h> @@ -94,8 +90,15 @@ #else #if defined(HAVE_EDITLINE_READLINE_H) #include <editline/readline.h> +#elif defined(HAVE_EDIT_READLINE_READLINE_H) +#include <edit/readline/readline.h> #else -#include <readline/readline.h> +#include <readline.h> +/* GNU readline defines this macro but libedit doesn't, if that ever changes +this needs to be updated or the build could break */ +#ifdef RL_VERSION_MAJOR +#include <history.h> +#endif #endif #endif #endif @@ -441,6 +444,7 @@ MOD_PAT, /* Applies to a pattern */ MOD_PATP, /* Ditto, OK for Perl test */ MOD_DAT, /* Applies to a data line */ + MOD_DATP, /* Ditto, OK for Perl test */ MOD_PD, /* Applies to a pattern or a data line */ MOD_PDP, /* As MOD_PD, OK for Perl test */ MOD_PND, /* As MOD_PD, but not for a default pattern */ @@ -516,6 +520,8 @@ #define CTL2_CALLOUT_NO_WHERE 0x00000200u #define CTL2_CALLOUT_EXTRA 0x00000400u #define CTL2_ALLVECTOR 0x00000800u +#define CTL2_NULL_SUBJECT 0x00001000u +#define CTL2_NULL_REPLACEMENT 0x00002000u #define CTL2_NL_SET 0x40000000u /* Informational */ #define CTL2_BSR_SET 0x80000000u /* Informational */ @@ -698,7 +704,7 @@ { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) }, - { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) }, + { "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) }, { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) }, { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) }, @@ -706,6 +712,8 @@ { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) }, + { "null_replacement", MOD_DAT, MOD_CTL, CTL2_NULL_REPLACEMENT, DO(control2) }, + { "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) }, { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)}, { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, @@ -767,7 +775,7 @@ PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL) #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT) -#define POSIX_SUPPORTED_MATCH_CONTROLS2 (0) +#define POSIX_SUPPORTED_MATCH_CONTROLS2 (CTL2_NULL_SUBJECT) /* Control bits that are not ignored with 'push'. */ @@ -3147,7 +3155,7 @@ OR -3 if a value > 0xffff is encountered when not in UTF mode */ -static PCRE2_SIZE +static int to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr) { uint16_t *pp; @@ -3234,7 +3242,7 @@ OR -2 if a value > 0x10ffff is encountered in UTF mode */ -static PCRE2_SIZE +static int to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr) { uint32_t *pp; @@ -3579,6 +3587,7 @@ { case MOD_PNDP: case MOD_PATP: + case MOD_DATP: case MOD_PDP: break; @@ -3600,7 +3609,8 @@ else if (ctx == CTX_DAT) field = PTR(dat_context); break; - case MOD_DAT: /* Data line modifier */ + case MOD_DAT: /* Data line modifier */ + case MOD_DATP: /* Allowed for Perl test */ if (dctl != NULL) field = dctl; break; @@ -4102,7 +4112,7 @@ static void show_controls(uint32_t controls, uint32_t controls2, const char *before) { -fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", @@ -4132,6 +4142,8 @@ ((controls & CTL_MEMORY) != 0)? " memory" : "", ((controls2 & CTL2_NL_SET) != 0)? " newline" : "", ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "", + ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "", + ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "", ((controls & CTL_POSIX) != 0)? " posix" : "", ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "", ((controls & CTL_PUSH) != 0)? " push" : "", @@ -5481,24 +5493,27 @@ if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0) { show_compile_options( - pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, ""); + pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS), + msg, ""); msg = ""; } if ((FLD(pat_context, extra_options) & - ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0) + (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0) { show_compile_extra_options( - FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS, - msg, ""); + FLD(pat_context, extra_options) & + (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, ""); msg = ""; } - if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 || - (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0) + if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 || + (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0) { - show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, - pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg); + show_controls( + pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS), + pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2), + msg); msg = ""; } @@ -7064,9 +7079,14 @@ VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c)); #endif -/* Now pp points to the subject string. POSIX matching is only possible in -8-bit mode, and it does not support timing or other fancy features. Some were -checked at compile time, but we need to check the match-time settings here. */ +/* Now pp points to the subject string, but if null_subject was specified, set +it to NULL to test PCRE2's behaviour. */ + +if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL; + +/* POSIX matching is only possible in 8-bit mode, and it does not support +timing or other fancy features. Some were checked at compile time, but we need +to check the match-time settings here. */ #ifdef SUPPORT_PCRE2_8 if ((pat_patctl.control & CTL_POSIX) != 0) @@ -7293,6 +7313,7 @@ uint8_t *pr; uint8_t rbufferREPLACE_BUFFSIZE; uint8_t nbufferREPLACE_BUFFSIZE; + uint8_t *rbptr; uint32_t xoptions; uint32_t emoption; /* External match option */ PCRE2_SIZE j, rlen, nsize, erroroffset; @@ -7443,9 +7464,14 @@
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitConfigInternal.h -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitConfigInternal.h
Changed
@@ -60,7 +60,7 @@ SLJIT_LITTLE_ENDIAN : little endian architecture SLJIT_BIG_ENDIAN : big endian architecture SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) - SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information Constants: SLJIT_NUMBER_OF_REGISTERS : number of available registers @@ -148,7 +148,7 @@ #endif #elif defined (__aarch64__) #define SLJIT_CONFIG_ARM_64 1 -#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__)) +#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__)) #define SLJIT_CONFIG_PPC_64 1 #elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) #define SLJIT_CONFIG_PPC_32 1 @@ -156,7 +156,7 @@ #define SLJIT_CONFIG_MIPS_32 1 #elif defined(__mips64) #define SLJIT_CONFIG_MIPS_64 1 -#elif defined(__sparc__) || defined(__sparc) +#elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64) #define SLJIT_CONFIG_SPARC_32 1 #elif defined(__s390x__) #define SLJIT_CONFIG_S390X 1 @@ -274,9 +274,13 @@ #ifndef SLJIT_INLINE /* Inline functions. Some old compilers do not support them. */ -#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510 +#ifdef __SUNPRO_C +#if __SUNPRO_C < 0x560 #define SLJIT_INLINE #else +#define SLJIT_INLINE inline +#endif /* __SUNPRO_C */ +#else #define SLJIT_INLINE __inline #endif #endif /* !SLJIT_INLINE */ @@ -319,18 +323,36 @@ /* Instruction cache flush. */ /****************************/ +/* + * TODO: + * + * clang >= 15 could be safe to enable below + * older versions are known to abort in some targets + * https://github.com/PhilipHazel/pcre2/issues/92 + * + * beware APPLE is known to have removed the code in iOS so + * it will need to be excempted or result in broken builds + */ #if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) -#if __has_builtin(__builtin___clear_cache) +#if __has_builtin(__builtin___clear_cache) && !defined(__clang__) +/* + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248 + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811 + * gcc's clear_cache builtin for power and sparc are broken + */ +#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32) #define SLJIT_CACHE_FLUSH(from, to) \ __builtin___clear_cache((char*)(from), (char*)(to)) +#endif -#endif /* __has_builtin(__builtin___clear_cache) */ +#endif /* gcc >= 10 */ #endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */ #ifndef SLJIT_CACHE_FLUSH -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) /* Not required to implement on archs with unified caches. */ #define SLJIT_CACHE_FLUSH(from, to) @@ -340,9 +362,9 @@ /* Supported by all macs since Mac OS 10.5. However, it does not work on non-jailbroken iOS devices, although the compilation is successful. */ - +#include <libkern/OSCacheControl.h> #define SLJIT_CACHE_FLUSH(from, to) \ - sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) + sys_icache_invalidate((void*)(from), (size_t)((char*)(to) - (char*)(from))) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) @@ -351,33 +373,33 @@ ppc_cache_flush((from), (to)) #define SLJIT_CACHE_FLUSH_OWN_IMPL 1 -#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ #define SLJIT_CACHE_FLUSH(from, to) \ - __builtin___clear_cache((char*)(from), (char*)(to)) - -#elif defined __ANDROID__ + sparc_cache_flush((from), (to)) +#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 -/* Android lacks __clear_cache; instead, cacheflush should be used. */ +#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__) #define SLJIT_CACHE_FLUSH(from, to) \ - cacheflush((long)(from), (long)(to), 0) + __builtin___clear_cache((char*)(from), (char*)(to)) -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#elif defined __ANDROID__ -/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ +/* Android ARMv7 with gcc lacks __clear_cache; use cacheflush instead. */ +#include <sys/cachectl.h> #define SLJIT_CACHE_FLUSH(from, to) \ - sparc_cache_flush((from), (to)) -#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 + cacheflush((long)(from), (long)(to), 0) #elif defined _WIN32 #define SLJIT_CACHE_FLUSH(from, to) \ - FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from)) + FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from)) #else -/* Calls __ARM_NR_cacheflush on ARM-Linux. */ +/* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */ #define SLJIT_CACHE_FLUSH(from, to) \ __clear_cache((char*)(from), (char*)(to)) @@ -645,18 +667,23 @@ #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 9 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) #define SLJIT_PREF_SHIFT_REG SLJIT_R2 #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #define SLJIT_NUMBER_OF_REGISTERS 13 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 #ifndef _WIN64 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_LOCALS_OFFSET_BASE 0 #else /* _WIN64 */ #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 -#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10 +#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw)) #endif /* !_WIN64 */ #define SLJIT_PREF_SHIFT_REG SLJIT_R3 @@ -664,31 +691,39 @@ #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) #define SLJIT_NUMBER_OF_REGISTERS 26 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 -#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw)) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) #define SLJIT_NUMBER_OF_REGISTERS 23 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) -#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw)) #elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitExecAllocator.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitExecAllocator.c
Changed
@@ -66,7 +66,7 @@ /* --------------------------------------------------------------------- */ /* 64 KByte. */ -#define CHUNK_SIZE 0x10000 +#define CHUNK_SIZE (sljit_uw)0x10000u /* alloc_chunk / free_chunk : @@ -112,7 +112,7 @@ static SLJIT_INLINE int get_map_jit_flag() { - sljit_sw page_size; + size_t page_size; void *ptr; struct utsname name; static int map_jit_flag = -1; @@ -139,8 +139,9 @@ #endif /* MAP_ANON */ #else /* !SLJIT_CONFIG_X86 */ #if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) -#error Unsupported architecture +#error "Unsupported architecture" #endif /* SLJIT_CONFIG_ARM */ +#include <AvailabilityMacros.h> #include <pthread.h> #define SLJIT_MAP_JIT (MAP_JIT) @@ -149,7 +150,11 @@ static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) { +#if MAC_OS_X_VERSION_MIN_REQUIRED >= 110000 pthread_jit_write_protect_np(enable_exec); +#else +#error "Must target Big Sur or newer" +#endif /* BigSur */ } #endif /* SLJIT_CONFIG_X86 */ #else /* !TARGET_OS_OSX */ @@ -187,10 +192,13 @@ if (retval == MAP_FAILED) return NULL; +#ifdef __FreeBSD__ + /* HardenedBSD's mmap lies, so check permissions again */ if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { munmap(retval, size); return NULL; } +#endif /* FreeBSD */ SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0); @@ -227,7 +235,7 @@ #define AS_FREE_BLOCK(base, offset) \ ((struct free_block*)(((sljit_u8*)base) + offset)) #define MEM_START(base) ((void*)(((sljit_u8*)base) + sizeof(struct block_header))) -#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7) static struct free_block* free_blocks; static sljit_uw allocated_size;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitLir.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitLir.c
Changed
@@ -90,26 +90,28 @@ #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#define SSIZE_OF(type) ((sljit_s32)sizeof(sljit_ ## type)) + #define VARIABLE_FLAG_SHIFT (10) #define VARIABLE_FLAG_MASK (0x3f << VARIABLE_FLAG_SHIFT) #define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT) #define GET_OPCODE(op) \ - ((op) & ~(SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + ((op) & ~(SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #define HAS_FLAGS(op) \ ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #define GET_ALL_FLAGS(op) \ - ((op) & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + ((op) & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #define TYPE_CAST_NEEDED(op) \ ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32) -#else +#else /* !SLJIT_64BIT_ARCHITECTURE */ #define TYPE_CAST_NEEDED(op) \ ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) -#endif +#endif /* SLJIT_64BIT_ARCHITECTURE */ #define BUF_SIZE 4096 @@ -126,11 +128,10 @@ #define TO_OFFS_REG(reg) ((reg) << 8) /* When reg cannot be unused. */ #define FAST_IS_REG(reg) ((reg) <= REG_MASK) -/* When reg can be unused. */ -#define SLOW_IS_REG(reg) ((reg) > 0 && (reg) <= REG_MASK) /* Mask for argument types. */ -#define SLJIT_DEF_MASK ((1 << SLJIT_DEF_SHIFT) - 1) +#define SLJIT_ARG_MASK 0x7 +#define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG) /* Jump flags. */ #define JUMP_LABEL 0x1 @@ -247,8 +248,11 @@ #define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ - (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \ - extra) * sizeof(sljit_sw)) + (saveds) + (sljit_s32)(extra)) * (sljit_s32)sizeof(sljit_sw)) + +#define GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, size) \ + (((fscratches < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS ? 0 : (fscratches - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)) + \ + (fsaveds)) * (sljit_s32)(size)) #define ADJUST_LOCAL_OFFSET(p, i) \ if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ @@ -379,9 +383,7 @@ && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), invalid_integer_types); - SLJIT_COMPILE_ASSERT(SLJIT_I32_OP == SLJIT_F32_OP, - int_op_and_single_op_must_be_the_same); - SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_F32_OP, + SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_32, rewritable_jump_and_single_op_must_not_be_the_same); SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_EQUAL_F64 & 0x1) && !(SLJIT_JUMP & 0x1), conditional_flags_must_be_even_numbers); @@ -415,7 +417,7 @@ compiler->local_size = -1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->args = -1; + compiler->args_size = -1; #endif #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) @@ -439,6 +441,13 @@ compiler->delay_slot = UNMOVABLE_INS; #endif +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->last_flags = 0; + compiler->last_return = -1; + compiler->logical_local_size = 0; +#endif + #if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) if (!compiler_initialized) { init_compiler(); @@ -488,7 +497,7 @@ SLJIT_UNUSED_ARG(exec_allocator_data); /* Remove thumb mode flag. */ - SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~0x1), exec_allocator_data); + SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~(sljit_uw)0x1), exec_allocator_data); } #elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) @@ -511,7 +520,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) { if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { - jump->flags &= ~JUMP_ADDR; + jump->flags &= (sljit_uw)~JUMP_ADDR; jump->flags |= JUMP_LABEL; jump->u.label = label; } @@ -520,7 +529,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) { if (SLJIT_LIKELY(!!jump)) { - jump->flags &= ~JUMP_LABEL; + jump->flags &= (sljit_uw)~JUMP_LABEL; jump->flags |= JUMP_ADDR; jump->u.target = target; } @@ -533,7 +542,7 @@ } #define SLJIT_CURRENT_FLAGS_ALL \ - (SLJIT_CURRENT_FLAGS_I32_OP | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE) + (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE) SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) { @@ -547,7 +556,7 @@ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->last_flags = 0; if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ALL)) == 0) { - compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_32 | SLJIT_SET_Z)); } #endif } @@ -607,7 +616,7 @@ return NULL; size = (size + 3) & ~3; #endif - return ensure_abuf(compiler, size); + return ensure_abuf(compiler, (sljit_uw)size); } static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) @@ -626,20 +635,6 @@ compiler->buf = prev; } -static SLJIT_INLINE sljit_s32 get_arg_count(sljit_s32 arg_types) -{ - sljit_s32 arg_count = 0; - - arg_types >>= SLJIT_DEF_SHIFT; - while (arg_types) { - arg_count++; - arg_types >>= SLJIT_DEF_SHIFT; - } - - return arg_count; -} - - /* Only used in RISC architectures where the instruction size is constant */ #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) @@ -679,6 +674,7 @@ compiler->fscratches = fscratches; compiler->fsaveds = fsaveds; #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; compiler->logical_local_size = local_size; #endif } @@ -696,6 +692,7 @@ compiler->fscratches = fscratches; compiler->fsaveds = fsaveds; #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; compiler->logical_local_size = local_size; #endif } @@ -711,7 +708,7 @@ compiler->last_label = label; } -static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_s32 flags) +static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_u32 flags) { jump->next = NULL; jump->flags = flags; @@ -751,6 +748,58 @@ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) +static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches)
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitLir.h -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitLir.h
Changed
@@ -163,13 +163,6 @@ is not available at all. */ -/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1 - or sljit_emit_op2 operations the result is discarded. Some status - flags must be set when the destination is SLJIT_UNUSED, because the - operation would have no effect otherwise. Other SLJIT operations do - not support SLJIT_UNUSED as a destination operand. */ -#define SLJIT_UNUSED 0 - /* Scratch registers. */ #define SLJIT_R0 1 #define SLJIT_R1 2 @@ -231,9 +224,6 @@ value. The FR and FS register sets are overlap in the same way as R and S register sets. See above. */ -/* Note: SLJIT_UNUSED as destination is not valid for floating point - operations, since they cannot be used for setting flags. */ - /* Floating point scratch registers. */ #define SLJIT_FR0 1 #define SLJIT_FR1 2 @@ -263,39 +253,38 @@ /* Argument type definitions */ /* --------------------------------------------------------------------- */ -/* Argument type definitions. - Used by SLJIT_DEF_ARGx and SLJIT_DEF_RET macros. */ - -#define SLJIT_ARG_TYPE_VOID 0 -#define SLJIT_ARG_TYPE_SW 1 -#define SLJIT_ARG_TYPE_UW 2 -#define SLJIT_ARG_TYPE_S32 3 -#define SLJIT_ARG_TYPE_U32 4 -#define SLJIT_ARG_TYPE_F32 5 -#define SLJIT_ARG_TYPE_F64 6 - /* The following argument type definitions are used by sljit_emit_enter, sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. - The following return type definitions are used by sljit_emit_call - and sljit_emit_icall functions. - When a function is called, the first integer argument must be placed - in SLJIT_R0, the second in SLJIT_R1, and so on. Similarly the first - floating point argument must be placed in SLJIT_FR0, the second in - SLJIT_FR1, and so on. + As for sljit_emit_call and sljit_emit_icall, the first integer argument + must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. + Similarly the first floating point argument must be placed into SLJIT_FR0, + the second one into SLJIT_FR1, and so on. + + As for sljit_emit_enter, the integer arguments can be stored in scratch + or saved registers. The first integer argument without _R postfix is + stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer + arguments with _R postfix are placed into scratch registers. The index + of the scratch register is the count of the previous integer arguments + starting from SLJIT_R0. The floating point arguments are always placed + into SLJIT_FR0, SLJIT_FR1, and so on. + + Note: if a function is called by sljit_emit_call/sljit_emit_icall and + an argument is stored in a scratch register by sljit_emit_enter, + that argument uses the same scratch register index for both + integer and floating point arguments. Example function definition: - sljit_f32 SLJIT_FUNC example_c_callback(sljit_sw arg_a, + sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); Argument type definition: - SLJIT_DEF_RET(SLJIT_ARG_TYPE_F32) - | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F64) - | SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_U32) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F32) + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4) Short form of argument type definition: - SLJIT_RET(F32) | SLJIT_ARG1(SW) | SLJIT_ARG2(F64) - | SLJIT_ARG3(S32) | SLJIT_ARG4(F32) + SLJIT_ARGS4(32, P, F64, 32, F32) Argument passing: arg_a must be placed in SLJIT_R0 @@ -303,34 +292,73 @@ arg_b must be placed in SLJIT_FR0 arg_d must be placed in SLJIT_FR1 -Note: - The SLJIT_ARG_TYPE_VOID type is only supported by - SLJIT_DEF_RET, and SLJIT_ARG_TYPE_VOID is also the - default value when SLJIT_DEF_RET is not specified. */ -#define SLJIT_DEF_SHIFT 4 -#define SLJIT_DEF_RET(type) (type) -#define SLJIT_DEF_ARG1(type) ((type) << SLJIT_DEF_SHIFT) -#define SLJIT_DEF_ARG2(type) ((type) << (2 * SLJIT_DEF_SHIFT)) -#define SLJIT_DEF_ARG3(type) ((type) << (3 * SLJIT_DEF_SHIFT)) -#define SLJIT_DEF_ARG4(type) ((type) << (4 * SLJIT_DEF_SHIFT)) + Examples for argument processing by sljit_emit_enter: + SLJIT_ARGS4(VOID, P, 32_R, F32, W) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1 + + SLJIT_ARGS4(VOID, W, W_R, W, W_R) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3 -/* Short form of the macros above. + SLJIT_ARGS4(VOID, F64, W, F32, W_R) + Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1 - For example the following definition: - SLJIT_DEF_RET(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_F32) + Note: it is recommended to pass the scratch arguments first + followed by the saved arguments: + + SLJIT_ARGS4(VOID, W_R, W_R, W, W) + Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1 +*/ + +/* The following flag is only allowed for the integer arguments of + sljit_emit_enter. When the flag is set, the integer argument is + stored in a scratch register instead of a saved register. */ +#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8 + +/* Void result, can only be used by SLJIT_ARG_RETURN. */ +#define SLJIT_ARG_TYPE_VOID 0 +/* Machine word sized integer argument or result. */ +#define SLJIT_ARG_TYPE_W 1 +#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 32 bit integer argument or result. */ +#define SLJIT_ARG_TYPE_32 2 +#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG) +/* Pointer sized integer argument or result. */ +#define SLJIT_ARG_TYPE_P 3 +#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 64 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F64 4 +/* 32 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F32 5 + +#define SLJIT_ARG_SHIFT 4 +#define SLJIT_ARG_RETURN(type) (type) +#define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT)) + +/* Simplified argument list definitions. + + The following definition: + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1) can be shortened to: - SLJIT_RET(SW) | SLJIT_ARG1(F32) - -Note: - The VOID type is only supported by SLJIT_RET, and - VOID is also the default value when SLJIT_RET is - not specified. */ -#define SLJIT_RET(type) SLJIT_DEF_RET(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG1(type) SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG2(type) SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG3(type) SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG4(type) SLJIT_DEF_ARG4(SLJIT_ARG_TYPE_ ## type) + SLJIT_ARGS1(W, F32) +*/ + +#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type + +#define SLJIT_ARGS0(ret) \ + SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret)) + +#define SLJIT_ARGS1(ret, arg1) \ + (SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) + +#define SLJIT_ARGS2(ret, arg1, arg2) \ + (SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) + +#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \ + (SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) + +#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \ + (SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) /* --------------------------------------------------------------------- */ /* Main structures and functions */ @@ -408,7 +436,7 @@ /* Code size. */ sljit_uw size; /* Relative offset of the executable mapping from the writable mapping. */ - sljit_uw executable_offset; + sljit_sw executable_offset; /* Executable size for statistical purposes. */ sljit_uw executable_size; @@ -417,17 +445,13 @@ #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_s32 args; + sljit_s32 args_size; sljit_s32 locals_offset;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeARM_32.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeARM_32.c
Changed
@@ -65,12 +65,17 @@ }; static const sljit_u8 freg_mapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 3 = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 }; -#define RM(rm) (reg_maprm) -#define RD(rd) (reg_maprd << 12) -#define RN(rn) (reg_maprn << 16) +#define RM(rm) ((sljit_uw)reg_maprm) +#define RM8(rm) ((sljit_uw)reg_maprm << 8) +#define RD(rd) ((sljit_uw)reg_maprd << 12) +#define RN(rn) ((sljit_uw)reg_maprn << 16) + +#define VM(rm) ((sljit_uw)freg_maprm) +#define VD(rd) ((sljit_uw)freg_maprd << 12) +#define VN(rn) ((sljit_uw)freg_maprn << 16) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -107,6 +112,7 @@ #define SBC 0xe0c00000 #define SMULL 0xe0c00090 #define SUB 0xe0400000 +#define TST 0xe1000000 #define UMULL 0xe0800090 #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 @@ -115,12 +121,15 @@ #define VCVT_F64_F32 0xeeb70ac0 #define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 +#define VLDR_F32 0xed100a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 #define VMOV2 0xec400a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 @@ -204,7 +213,7 @@ cpool_unique_ptr = compiler->cpool_unique; do { if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { - cpool_index = cpool_ptr - compiler->cpool; + cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool); break; } cpool_ptr++; @@ -293,7 +302,7 @@ while (last_pc_patch < code_ptr) { /* Data transfer instruction with Rn == r15. */ if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { - diff = const_pool - last_pc_patch; + diff = (sljit_uw)(const_pool - last_pc_patch); ind = (*last_pc_patch) & 0xfff; /* Must be a load instruction with immediate offset. */ @@ -308,12 +317,12 @@ SLJIT_ASSERT(diff >= 1); if (diff >= 2 || ind > 0) { - diff = (diff + ind - 2) << 2; + diff = (diff + (sljit_uw)ind - 2) << 2; SLJIT_ASSERT(diff <= 0xfff); - *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff; } else - *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004; } last_pc_patch++; } @@ -329,24 +338,24 @@ static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) { - sljit_s32 value; + sljit_u32 value; struct future_patch *curr_patch, *prev_patch; SLJIT_UNUSED_ARG(compiler); /* Using the values generated by patch_pc_relative_loads. */ if (!*first_patch) - value = (sljit_s32)cpool_start_addresscpool_current_index; + value = cpool_start_addresscpool_current_index; else { curr_patch = *first_patch; prev_patch = NULL; while (1) { if (!curr_patch) { - value = (sljit_s32)cpool_start_addresscpool_current_index; + value = cpool_start_addresscpool_current_index; break; } if ((sljit_uw)curr_patch->index == cpool_current_index) { - value = curr_patch->value; + value = (sljit_uw)curr_patch->value; if (prev_patch) prev_patch->next = curr_patch->next; else @@ -359,8 +368,8 @@ } } - if (value >= 0) { - if ((sljit_uw)value > cpool_current_index) { + if ((sljit_sw)value >= 0) { + if (value > cpool_current_index) { curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); if (!curr_patch) { while (*first_patch) { @@ -371,8 +380,8 @@ return SLJIT_ERR_ALLOC_FAILED; } curr_patch->next = *first_patch; - curr_patch->index = value; - curr_patch->value = cpool_start_addressvalue; + curr_patch->index = (sljit_sw)value; + curr_patch->value = (sljit_sw)cpool_start_addressvalue; *first_patch = curr_patch; } cpool_start_addressvalue = *buf_ptr; @@ -395,8 +404,8 @@ static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { - FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); - return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff))); + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff)); } #endif @@ -554,8 +563,9 @@ } static sljit_uw get_imm(sljit_uw imm); +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm); -static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache) +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw *ptr = (sljit_uw*)addr; @@ -658,7 +668,7 @@ sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_sw addr; + sljit_uw addr; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw cpool_size; sljit_uw cpool_skip_alignment; @@ -737,7 +747,7 @@ if (label && label->size == word_count) { /* Points after the current instruction. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; next_addr = compute_next_addr(label, jump, const_, put_label); @@ -770,7 +780,7 @@ if (label && label->size == word_count) { /* code_ptr can be affected above. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); - label->size = (code_ptr + 1) - code; + label->size = (sljit_uw)((code_ptr + 1) - code); label = label->next; } if (const_ && const_->addr == word_count) { @@ -799,8 +809,8 @@ cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); if (cpool_current_index > 0) { /* Unconditional branch. */ - *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); - code_ptr = cpool_start_address + cpool_current_index; + *code_ptr = B | (((sljit_uw)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); } cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; cpool_current_index = 0; @@ -822,7 +832,7 @@ cpool_start_address = ALIGN_INSTRUCTION(code_ptr); cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); if (cpool_current_index > 0) - code_ptr = cpool_start_address + cpool_current_index; + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); buf_ptr = compiler->cpool; buf_end = buf_ptr + compiler->cpool_fill;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeARM_64.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeARM_64.c
Changed
@@ -48,19 +48,20 @@ }; static const sljit_u8 freg_mapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 3 = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 15, 14, 13, 12, 11, 10, 9, 8, 30, 31 }; -#define W_OP (1u << 31) -#define RD(rd) (reg_maprd) -#define RT(rt) (reg_maprt) -#define RN(rn) (reg_maprn << 5) -#define RT2(rt2) (reg_maprt2 << 10) -#define RM(rm) (reg_maprm << 16) -#define VD(vd) (freg_mapvd) -#define VT(vt) (freg_mapvt) -#define VN(vn) (freg_mapvn << 5) -#define VM(vm) (freg_mapvm << 16) +#define W_OP ((sljit_ins)1 << 31) +#define RD(rd) ((sljit_ins)reg_maprd) +#define RT(rt) ((sljit_ins)reg_maprt) +#define RN(rn) ((sljit_ins)reg_maprn << 5) +#define RT2(rt2) ((sljit_ins)reg_maprt2 << 10) +#define RM(rm) ((sljit_ins)reg_maprm << 16) +#define VD(vd) ((sljit_ins)freg_mapvd) +#define VT(vt) ((sljit_ins)freg_mapvt) +#define VT2(vt) ((sljit_ins)freg_mapvt << 10) +#define VN(vn) ((sljit_ins)freg_mapvn << 5) +#define VM(vm) ((sljit_ins)freg_mapvm << 16) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -96,8 +97,10 @@ #define FNEG 0x1e614000 #define FSUB 0x1e603800 #define LDRI 0xf9400000 +#define LDRI_F64 0xfd400000 #define LDP 0xa9400000 -#define LDP_PRE 0xa9c00000 +#define LDP_F64 0x6d400000 +#define LDP_POST 0xa8c00000 #define LDR_PRE 0xf8400c00 #define LSLV 0x9ac02000 #define LSRV 0x9ac02400 @@ -117,10 +120,12 @@ #define SMADDL 0x9b200000 #define SMULH 0x9b403c00 #define STP 0xa9000000 +#define STP_F64 0x6d000000 #define STP_PRE 0xa9800000 #define STRB 0x38206800 #define STRBI 0x39000000 #define STRI 0xf9000000 +#define STRI_F64 0xfd000000 #define STR_FI 0x3d000000 #define STR_FR 0x3c206800 #define STUR_FI 0x3c000000 @@ -145,10 +150,10 @@ static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21))); - return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21)); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm & 0xffff) << 5))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 16) & 0xffff) << 5) | (1 << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 32) & 0xffff) << 5) | (2 << 21))); + return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21)); } static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) @@ -171,14 +176,14 @@ diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; if (jump->flags & IS_COND) { - diff += sizeof(sljit_ins); + diff += SSIZE_OF(ins); if (diff <= 0xfffff && diff >= -0x100000) { code_ptr-5 ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; jump->addr -= sizeof(sljit_ins); jump->flags |= PATCH_COND; return 5; } - diff -= sizeof(sljit_ins); + diff -= SSIZE_OF(ins); } if (diff <= 0x7ffffff && diff >= -0x8000000) { @@ -231,8 +236,8 @@ sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_uw addr; - sljit_s32 dst; + sljit_sw addr; + sljit_u32 dst; struct sljit_label *label; struct sljit_jump *jump; @@ -271,7 +276,7 @@ /* These structures are ordered by their address. */ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -300,7 +305,7 @@ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -313,58 +318,58 @@ jump = compiler->jumps; while (jump) { do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); - buf_ptr0 = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); + buf_ptr0 = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); if (jump->flags & IS_COND) buf_ptr-1 -= (4 << 5); break; } if (jump->flags & PATCH_COND) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); - buf_ptr0 = (buf_ptr0 & ~0xffffe0) | ((addr & 0x7ffff) << 5); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); + buf_ptr0 = (buf_ptr0 & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); break; } - SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl); - SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl); + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); dst = buf_ptr0 & 0x1f; - buf_ptr0 = MOVZ | dst | ((addr & 0xffff) << 5); - buf_ptr1 = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21); + buf_ptr0 = MOVZ | dst | (((sljit_ins)addr & 0xffff) << 5); + buf_ptr1 = MOVK | dst | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21); if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) - buf_ptr2 = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21); + buf_ptr2 = MOVK | dst | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21); if (jump->flags & PATCH_ABS64) - buf_ptr3 = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21); + buf_ptr3 = MOVK | dst | ((sljit_ins)(addr >> 48) << 5) | (3 << 21); } while (0); jump = jump->next; } put_label = compiler->put_labels; while (put_label) { - addr = put_label->label->addr; - buf_ptr = (sljit_ins *)put_label->addr; + addr = (sljit_sw)put_label->label->addr; + buf_ptr = (sljit_ins*)put_label->addr; - buf_ptr0 |= (addr & 0xffff) << 5; - buf_ptr1 |= ((addr >> 16) & 0xffff) << 5; + buf_ptr0 |= ((sljit_ins)addr & 0xffff) << 5; + buf_ptr1 |= ((sljit_ins)(addr >> 16) & 0xffff) << 5; if (put_label->flags >= 1) - buf_ptr2 |= ((addr >> 32) & 0xffff) << 5; + buf_ptr2 |= ((sljit_ins)(addr >> 32) & 0xffff) << 5; if (put_label->flags >= 2) - buf_ptr3 |= ((addr >> 48) & 0xffff) << 5; + buf_ptr3 |= (sljit_ins)(addr >> 48) << 5; put_label = put_label->next; } compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -426,11 +431,12 @@ value >>= 1; \ }
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeARM_T2_32.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeARM_T2_32.c
Changed
@@ -50,40 +50,42 @@ }; static const sljit_u8 freg_mapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 3 = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 }; #define COPY_BITS(src, from, to, bits) \ - ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to)) + ((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to)) + +#define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm)) /* Thumb16 encodings. */ -#define RD3(rd) (reg_maprd) -#define RN3(rn) (reg_maprn << 3) -#define RM3(rm) (reg_maprm << 6) -#define RDN3(rdn) (reg_maprdn << 8) -#define IMM3(imm) (imm << 6) -#define IMM8(imm) (imm) +#define RD3(rd) ((sljit_ins)reg_maprd) +#define RN3(rn) ((sljit_ins)reg_maprn << 3) +#define RM3(rm) ((sljit_ins)reg_maprm << 6) +#define RDN3(rdn) ((sljit_ins)reg_maprdn << 8) +#define IMM3(imm) ((sljit_ins)imm << 6) +#define IMM8(imm) ((sljit_ins)imm) /* Thumb16 helpers. */ #define SET_REGS44(rd, rn) \ - ((reg_maprn << 3) | (reg_maprd & 0x7) | ((reg_maprd & 0x8) << 4)) + (((sljit_ins)reg_maprn << 3) | ((sljit_ins)reg_maprd & 0x7) | (((sljit_ins)reg_maprd & 0x8) << 4)) #define IS_2_LO_REGS(reg1, reg2) \ (reg_mapreg1 <= 7 && reg_mapreg2 <= 7) #define IS_3_LO_REGS(reg1, reg2, reg3) \ (reg_mapreg1 <= 7 && reg_mapreg2 <= 7 && reg_mapreg3 <= 7) /* Thumb32 encodings. */ -#define RD4(rd) (reg_maprd << 8) -#define RN4(rn) (reg_maprn << 16) -#define RM4(rm) (reg_maprm) -#define RT4(rt) (reg_maprt << 12) -#define DD4(dd) (freg_mapdd << 12) -#define DN4(dn) (freg_mapdn << 16) -#define DM4(dm) (freg_mapdm) +#define RD4(rd) ((sljit_ins)reg_maprd << 8) +#define RN4(rn) ((sljit_ins)reg_maprn << 16) +#define RM4(rm) ((sljit_ins)reg_maprm) +#define RT4(rt) ((sljit_ins)reg_maprt << 12) +#define DD4(dd) ((sljit_ins)freg_mapdd << 12) +#define DN4(dn) ((sljit_ins)freg_mapdn << 16) +#define DM4(dm) ((sljit_ins)freg_mapdm) #define IMM5(imm) \ - (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6)) + (COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6)) #define IMM12(imm) \ - (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)) + (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff)) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -100,7 +102,8 @@ #define ADDSI8 0x3000 #define ADD_W 0xeb000000 #define ADDWI 0xf2000000 -#define ADD_SP 0xb000 +#define ADD_SP 0x4485 +#define ADD_SP_I 0xb000 #define ADD_W 0xeb000000 #define ADD_WI 0xf1000000 #define ANDI 0xf0000000 @@ -126,6 +129,8 @@ #define EORS 0x4040 #define EOR_W 0xea800000 #define IT 0xbf00 +#define LDR_SP 0x9800 +#define LDR 0xf8d00000 #define LDRI 0xf8500800 #define LSLS 0x4080 #define LSLSI 0x0000 @@ -168,13 +173,15 @@ #define SUBSI8 0x3800 #define SUB_W 0xeba00000 #define SUBWI 0xf2a00000 -#define SUB_SP 0xb080 +#define SUB_SP_I 0xb080 #define SUB_WI 0xf1a00000 #define SXTB 0xb240 #define SXTB_W 0xfa4ff080 #define SXTH 0xb200 #define SXTH_W 0xfa0ff080 #define TST 0x4200 +#define TSTI 0xf0000f00 +#define TST_W 0xea000f00 #define UDIV 0xfbb0f0f0 #define UMULL 0xfba00000 #define UXTB 0xb2c0 @@ -188,12 +195,15 @@ #define VCVT_F64_F32 0xeeb70ac0 #define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 +#define VLDR_F32 0xed100a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 #define VMOV2 0xec400a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 @@ -204,7 +214,7 @@ ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16)); FAIL_IF(!ptr); - *ptr = inst; + *ptr = (sljit_u16)(inst); compiler->size++; return SLJIT_SUCCESS; } @@ -213,8 +223,8 @@ { sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); - *ptr++ = inst >> 16; - *ptr = inst; + *ptr++ = (sljit_u16)(inst >> 16); + *ptr = (sljit_u16)(inst); compiler->size += 2; return SLJIT_SUCCESS; } @@ -229,12 +239,12 @@ static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) { - sljit_s32 dst = inst1 & 0x0f00; + sljit_ins dst = inst1 & 0x0f00; SLJIT_ASSERT(((inst0 & 0xfbf0) == (MOVW >> 16)) && ((inst2 & 0xfbf0) == (MOVT >> 16)) && dst == (inst3 & 0x0f00)); - inst0 = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1); - inst1 = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff); - inst2 = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1); - inst3 = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); + inst0 = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1)); + inst1 = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff)); + inst2 = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1)); + inst3 = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16)); } static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) @@ -318,24 +328,24 @@ case 1: /* Encoding T1 of 'B' instruction */ SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); - jump_inst0 = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff); + jump_inst0 = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff)); return; case 2: /* Encoding T3 of 'B' instruction */ SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); - jump_inst0 = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1); - jump_inst1 = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff); + jump_inst0 = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1)); + jump_inst1 = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff)); return; case 3: SLJIT_ASSERT(jump->flags & IS_COND); - *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8; + *jump_inst++ = (sljit_u16)(IT | ((jump->flags >> 4) & 0xf0) | 0x8); diff--; type = 5; break; case 4: /* Encoding T2 of 'B' instruction */ SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); - jump_inst0 = 0xe000 | (diff & 0x7ff); + jump_inst0 = (sljit_u16)(0xe000 | (diff & 0x7ff)); return; } @@ -345,8 +355,8 @@ s = (diff >> 23) & 0x1; j1 = (~(diff >> 22) ^ s) & 0x1; j2 = (~(diff >> 21) ^ s) & 0x1; - jump_inst0 = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); - jump_inst1 = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); + jump_inst0 = (sljit_u16)(0xf000 | ((sljit_ins)s << 10) | COPY_BITS(diff, 11, 0, 10)); + jump_inst1 = (sljit_u16)((j1 << 13) | (j2 << 11) | (diff & 0x7ff)); /* The others have a common form. */ if (type == 5) /* Encoding T4 of 'B' instruction */ @@ -405,7 +415,7 @@ /* These structures are ordered by their address. */ if (label && label->size == half_count) { label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; }
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeMIPS_32.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeMIPS_32.c
Changed
@@ -73,50 +73,49 @@ switch (GET_OPCODE(op)) { case SLJIT_MOV: - case SLJIT_MOV_U32: - case SLJIT_MOV_S32: - case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S8) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); #else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); #endif /* SLJIT_MIPS_REV >= 1 */ - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S16) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); #else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); #endif /* SLJIT_MIPS_REV >= 1 */ - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_NOT: @@ -438,131 +437,120 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr, sljit_u32 *extra_space) { - sljit_s32 stack_offset = 0; - sljit_s32 arg_count = 0; + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; sljit_s32 float_arg_count = 0; sljit_s32 word_arg_count = 0; sljit_s32 types = 0; - sljit_s32 arg_count_save, types_save; sljit_ins prev_ins = NOP; sljit_ins ins = NOP; sljit_u8 offsets4; + sljit_u8 *offsets_ptr = offsets; SLJIT_ASSERT(reg_mapTMP_REG1 == 4 && freg_mapTMP_FREG1 == 12); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; + + /* See ABI description in sljit_emit_enter. */ while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + *offsets_ptr = (sljit_u8)offset; - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - offsetsarg_count = (sljit_u8)stack_offset; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) { + offset += sizeof(sljit_sw); + *offsets_ptr = (sljit_u8)offset; + } - if (word_arg_count == 0 && arg_count <= 1) - offsetsarg_count = 254 + arg_count; + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); - stack_offset += sizeof(sljit_f32); - arg_count++; + offset += sizeof(sljit_f64); float_arg_count++; break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - offsetsarg_count = (sljit_u8)stack_offset; - - if (word_arg_count == 0 && arg_count <= 1) - offsetsarg_count = 254 + arg_count; + case SLJIT_ARG_TYPE_F32: + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); - stack_offset += sizeof(sljit_f64); - arg_count++; + offset += sizeof(sljit_f32); float_arg_count++; break; default: - offsetsarg_count = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_sw); - arg_count++; + offset += sizeof(sljit_sw); word_arg_count++; break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; + offsets_ptr++; } - /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */ - if (stack_offset > 16) - FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + /* Stack is aligned to 16 bytes. */ + SLJIT_ASSERT(offset <= 8 * sizeof(sljit_sw)); - types_save = types; - arg_count_save = arg_count; + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + if (is_tail_call) { + offset = (offset + sizeof(sljit_sw) + 15) & ~(sljit_uw)0xf; + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset, &prev_ins)); + *extra_space = offset; + } else { + FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + *extra_space = 16; + } + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, 0, &prev_ins)); + *extra_space = 0; + } while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - arg_count--; - if (offsetsarg_count < 254) - ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsetsarg_count); - float_arg_count--; - break; - case SLJIT_ARG_TYPE_F64: - arg_count--; - if (offsetsarg_count < 254) - ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsetsarg_count); - float_arg_count--; - break; - default: - if (offsetsarg_count - 1 >= 16) - ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(offsetsarg_count - 1); - else if (arg_count != word_arg_count) - ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsetsarg_count - 1 >> 2)); - else if (arg_count == 1)
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeMIPS_64.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeMIPS_64.c
Changed
@@ -46,9 +46,9 @@ } /* Zero extended number. */ - uimm = imm; + uimm = (sljit_uw)imm; if (imm < 0) { - uimm = ~imm; + uimm = ~(sljit_uw)imm; inv = 1; } @@ -119,7 +119,7 @@ } #define SELECT_OP(a, b) \ - (!(op & SLJIT_I32_OP) ? a : b) + (!(op & SLJIT_32) ? a : b) #define EMIT_LOGICAL(op_imm, op_norm) \ if (flags & SRC2_IMM) { \ @@ -138,19 +138,19 @@ #define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \ if (flags & SRC2_IMM) { \ if (src2 >= 32) { \ - SLJIT_ASSERT(!(op & SLJIT_I32_OP)); \ + SLJIT_ASSERT(!(op & SLJIT_32)); \ ins = op_dimm32; \ src2 -= 32; \ } \ else \ - ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \ + ins = (op & SLJIT_32) ? op_imm : op_dimm; \ if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ } \ else { \ - ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \ + ins = (op & SLJIT_32) ? op_v : op_dv; \ if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ if (!(flags & UNUSED_DEST)) \ @@ -165,50 +165,71 @@ switch (GET_OPCODE(op)) { case SLJIT_MOV: - case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S8) { - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S16) { - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U32: - SLJIT_ASSERT(!(op & SLJIT_I32_OP)); - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); - return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + if (dst == src2) + return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); + return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; case SLJIT_MOV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; case SLJIT_NOT: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); @@ -234,7 +255,7 @@ FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); /* Check zero. */ FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_I32_OP) ? 32 : 64), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_32) ? 32 : 64), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst))); /* Loop for searching the highest bit. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst))); @@ -462,7 +483,7 @@ #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); #elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); return push_inst(compiler, MFLO | D(dst), DR(dst)); @@ -528,10 +549,10 @@ SLJIT_UNUSED_ARG(executable_offset); SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0); - inst0 = (inst0 & 0xffff0000) | ((new_target >> 48) & 0xffff); - inst1 = (inst1 & 0xffff0000) | ((new_target >> 32) & 0xffff); - inst3 = (inst3 & 0xffff0000) | ((new_target >> 16) & 0xffff); - inst5 = (inst5 & 0xffff0000) | (new_target & 0xffff); + inst0 = (inst0 & 0xffff0000) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst1 = (inst1 & 0xffff0000) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst3 = (inst3 & 0xffff0000) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst5 = (inst5 & 0xffff0000) | ((sljit_ins)new_target & 0xffff); SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1); inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 6); @@ -539,7 +560,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) @@ -548,19 +569,19 @@ sljit_s32 word_arg_count = 0; sljit_s32 float_arg_count = 0; sljit_s32 types = 0; - sljit_ins prev_ins = NOP; + sljit_ins prev_ins = *ins_ptr; sljit_ins ins = NOP; SLJIT_ASSERT(reg_mapTMP_REG1 == 4 && freg_mapTMP_FREG1 == 12); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (arg_types & SLJIT_ARG_MASK) {
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeMIPS_common.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeMIPS_common.c
Changed
@@ -86,13 +86,13 @@ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) static const sljit_u8 freg_mapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 4 = { - 0, 0, 14, 2, 4, 6, 8, 12, 10, 16 + 0, 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20, 12, 10, 16 }; #else static const sljit_u8 freg_mapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 4 = { - 0, 0, 13, 14, 15, 16, 17, 12, 18, 10 + 0, 0, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 30, 29, 28, 27, 26, 25, 24, 12, 11, 10 }; #endif @@ -101,23 +101,23 @@ /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define S(s) (reg_maps << 21) -#define T(t) (reg_mapt << 16) -#define D(d) (reg_mapd << 11) -#define FT(t) (freg_mapt << 16) -#define FS(s) (freg_maps << 11) -#define FD(d) (freg_mapd << 6) +#define S(s) ((sljit_ins)reg_maps << 21) +#define T(t) ((sljit_ins)reg_mapt << 16) +#define D(d) ((sljit_ins)reg_mapd << 11) +#define FT(t) ((sljit_ins)freg_mapt << 16) +#define FS(s) ((sljit_ins)freg_maps << 11) +#define FD(d) ((sljit_ins)freg_mapd << 6) /* Absolute registers. */ -#define SA(s) ((s) << 21) -#define TA(t) ((t) << 16) -#define DA(d) ((d) << 11) -#define IMM(imm) ((imm) & 0xffff) -#define SH_IMM(imm) ((imm) << 6) +#define SA(s) ((sljit_ins)(s) << 21) +#define TA(t) ((sljit_ins)(t) << 16) +#define DA(d) ((sljit_ins)(d) << 11) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define SH_IMM(imm) ((sljit_ins)(imm) << 6) #define DR(dr) (reg_mapdr) #define FR(dr) (freg_mapdr) -#define HI(opcode) ((opcode) << 26) -#define LO(opcode) (opcode) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode)) #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) /* CMP.cond.fmt */ /* S = (20 << 21) D = (21 << 21) */ @@ -186,6 +186,7 @@ #define DMULTU (HI(0) | LO(29)) #endif /* SLJIT_MIPS_REV >= 6 */ #define DIV_S (HI(17) | FMT_S | LO(3)) +#define DINSU (HI(31) | LO(6)) #define DSLL (HI(0) | LO(56)) #define DSLL32 (HI(0) | LO(60)) #define DSLLV (HI(0) | LO(20)) @@ -205,8 +206,10 @@ #define JR (HI(0) | LO(8)) #endif /* SLJIT_MIPS_REV >= 6 */ #define LD (HI(55)) +#define LDC1 (HI(53)) #define LUI (HI(15)) #define LW (HI(35)) +#define LWC1 (HI(49)) #define MFC1 (HI(17)) #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) #define MOD (HI(0) | (3 << 6) | LO(26)) @@ -292,7 +295,8 @@ { sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS - || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f)); + || (sljit_ins)delay_slot == ((ins >> 11) & 0x1f) + || (sljit_ins)delay_slot == ((ins >> 16) & 0x1f)); FAIL_IF(!ptr); *ptr = ins; compiler->size++; @@ -300,7 +304,7 @@ return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags) +static SLJIT_INLINE sljit_ins invert_branch(sljit_uw flags) { if (flags & IS_BIT26_COND) return (1 << 26); @@ -371,7 +375,7 @@ inst1 = NOP; return inst + 1; } - inst0 = inst0 ^ invert_branch(jump->flags); + inst0 ^= invert_branch(jump->flags); inst1 = NOP; jump->addr -= sizeof(sljit_ins); return inst + 1; @@ -379,7 +383,7 @@ } if (jump->flags & IS_COND) { - if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~0xfffffff)) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; saved_inst = inst0; inst0 = inst-1; @@ -388,7 +392,7 @@ inst2 = NOP; return inst + 2; } - else if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) { + else if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst0 = (inst0 & 0xffff0000) | 3; inst1 = NOP; @@ -400,7 +404,7 @@ } else { /* J instuctions. */ - if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == (jump->addr & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst0 = inst-1; inst-1 = (jump->flags & IS_JAL) ? JAL : J; @@ -408,7 +412,7 @@ return inst; } - if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) { + if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst0 = (jump->flags & IS_JAL) ? JAL : J; inst1 = NOP; @@ -472,7 +476,7 @@ { sljit_uw addr = put_label->label->addr; sljit_ins *inst = (sljit_ins *)put_label->addr; - sljit_s32 reg = *inst; + sljit_u32 reg = *inst; if (put_label->flags == 0) { SLJIT_ASSERT(addr < 0x80000000l); @@ -548,7 +552,7 @@ /* These structures are ordered by their address. */ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -584,7 +588,7 @@ if (label && label->size == word_count) { label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -601,39 +605,46 @@ buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins))) >> 2; + addr = (sljit_uw)((sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) - sizeof(sljit_ins)) >> 2); SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); - buf_ptr0 = (buf_ptr0 & 0xffff0000) | (addr & 0xffff); + buf_ptr0 = (buf_ptr0 & 0xffff0000) | ((sljit_ins)addr & 0xffff); break; } if (jump->flags & PATCH_J) { - SLJIT_ASSERT((addr & ~0xfffffff) == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~0xfffffff)); - buf_ptr0 |= (addr >> 2) & 0x03ffffff; + SLJIT_ASSERT((addr & ~(sljit_uw)0xfffffff) + == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)); + buf_ptr0 |= (sljit_ins)(addr >> 2) & 0x03ffffff; break; } /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - buf_ptr0 = (buf_ptr0 & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr1 = (buf_ptr1 & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr0 | buf_ptr1) & 0xffff) == 0); + buf_ptr0 |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr1 |= (sljit_ins)addr & 0xffff; #else if (jump->flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= 0x7fffffff); - buf_ptr0 = (buf_ptr0 & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr1 = (buf_ptr1 & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr0 | buf_ptr1) & 0xffff) == 0); + buf_ptr0 |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr1 |= (sljit_ins)addr & 0xffff; + break; }
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativePPC_32.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativePPC_32.c
Changed
@@ -86,11 +86,6 @@ SLJIT_ASSERT(src1 == TMP_REG1); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); - case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); return push_inst(compiler, CNTLZW | S(src2) | A(dst)); @@ -158,7 +153,9 @@ if (flags & ALT_FORM3) { /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); } if (flags & ALT_FORM4) { @@ -167,11 +164,17 @@ return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (!(flags & ALT_SET_FLAGS)) + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + if (flags & ALT_FORM5) return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); case SLJIT_SUBC: return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); @@ -277,5 +280,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); }
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativePPC_64.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativePPC_64.c
Changed
@@ -57,20 +57,20 @@ } /* Count leading zeroes. */ - tmp = (imm >= 0) ? imm : ~imm; + tmp = (sljit_uw)((imm >= 0) ? imm : ~imm); ASM_SLJIT_CLZ(tmp, shift); SLJIT_ASSERT(shift > 0); shift--; - tmp = (imm << shift); + tmp = ((sljit_uw)imm << shift); if ((tmp & ~0xffff000000000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); shift += 15; return PUSH_RLDICR(reg, shift); } if ((tmp & ~0xffffffff00000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32))); shift += 31; return PUSH_RLDICR(reg, shift); @@ -78,18 +78,18 @@ /* Cut out the 16 bit from immediate. */ shift += 15; - tmp2 = imm & ((1ul << (63 - shift)) - 1); + tmp2 = (sljit_uw)imm & (((sljit_uw)1 << (63 - shift)) - 1); if (tmp2 <= 0xffff) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); FAIL_IF(PUSH_RLDICR(reg, shift)); - return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2); + return push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)tmp2); } if (tmp2 <= 0xffffffff) { FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); FAIL_IF(PUSH_RLDICR(reg, shift)); - FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16))); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 16))); return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS; } @@ -97,16 +97,16 @@ tmp2 <<= shift2; if ((tmp2 & ~0xffff000000000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); shift2 += 15; shift += (63 - shift2); FAIL_IF(PUSH_RLDICR(reg, shift)); - FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 48))); return PUSH_RLDICR(reg, shift2); } /* The general version. */ - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48))); + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)((sljit_uw)imm >> 48))); FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32))); FAIL_IF(PUSH_RLDICR(reg, 31)); FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16))); @@ -199,19 +199,6 @@ UN_EXTS(); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - - if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) { - FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); - FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2))); - return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); - } - - UN_EXTS(); - /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); - case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); if (flags & ALT_FORM1) @@ -299,13 +286,22 @@ if (flags & ALT_FORM3) { if (flags & ALT_SIGN_EXT) { - FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); - src1 = TMP_REG1; - FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); - src2 = TMP_REG2; + if (src1 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); + src1 = TMP_REG1; + } + if (src2 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + src2 = TMP_REG2; + } } + /* Setting XER SO is not enough, CR SO is also needed. */ - FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + if (src1 != TMP_ZERO) + FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + else + FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2))); + if (flags & ALT_SIGN_EXT) return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); return SLJIT_SUCCESS; @@ -317,12 +313,18 @@ return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (!(flags & ALT_SET_FLAGS)) + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + BIN_EXTS(); if (flags & ALT_FORM5) return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); case SLJIT_SUBC: BIN_EXTS(); @@ -432,14 +434,14 @@ if (src) reg = *src & REG_MASK; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count++; break; default: @@ -453,13 +455,13 @@ break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count--; break; default: @@ -471,7 +473,7 @@ break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -492,10 +494,10 @@ SLJIT_UNUSED_ARG(executable_offset); SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); - inst0 = (inst0 & 0xffff0000) | ((new_target >> 48) & 0xffff); - inst1 = (inst1 & 0xffff0000) | ((new_target >> 32) & 0xffff); - inst3 = (inst3 & 0xffff0000) | ((new_target >> 16) & 0xffff); - inst4 = (inst4 & 0xffff0000) | (new_target & 0xffff); + inst0 = (inst0 & 0xffff0000u) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst1 = (inst1 & 0xffff0000u) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst3 = (inst3 & 0xffff0000u) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst4 = (inst4 & 0xffff0000u) | ((sljit_ins)new_target & 0xffff); SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 5); @@ -503,5 +505,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativePPC_common.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativePPC_common.c
Changed
@@ -109,32 +109,32 @@ }; static const sljit_u8 freg_mapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 3 = { - 0, 1, 2, 3, 4, 5, 6, 0, 7 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 0, 13 }; /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define D(d) (reg_mapd << 21) -#define S(s) (reg_maps << 21) -#define A(a) (reg_mapa << 16) -#define B(b) (reg_mapb << 11) -#define C(c) (reg_mapc << 6) -#define FD(fd) (freg_mapfd << 21) -#define FS(fs) (freg_mapfs << 21) -#define FA(fa) (freg_mapfa << 16) -#define FB(fb) (freg_mapfb << 11) -#define FC(fc) (freg_mapfc << 6) -#define IMM(imm) ((imm) & 0xffff) -#define CRD(d) ((d) << 21) +#define D(d) ((sljit_ins)reg_mapd << 21) +#define S(s) ((sljit_ins)reg_maps << 21) +#define A(a) ((sljit_ins)reg_mapa << 16) +#define B(b) ((sljit_ins)reg_mapb << 11) +#define C(c) ((sljit_ins)reg_mapc << 6) +#define FD(fd) ((sljit_ins)freg_mapfd << 21) +#define FS(fs) ((sljit_ins)freg_mapfs << 21) +#define FA(fa) ((sljit_ins)freg_mapfa << 16) +#define FB(fb) ((sljit_ins)freg_mapfb << 11) +#define FC(fc) ((sljit_ins)freg_mapfc << 6) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define CRD(d) ((sljit_ins)(d) << 21) /* Instruction bit sections. OE and Rc flag (see ALT_SET_FLAGS). */ #define OE(flags) ((flags) & ALT_SET_FLAGS) /* Rc flag (see ALT_SET_FLAGS). */ #define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10) -#define HI(opcode) ((opcode) << 26) -#define LO(opcode) ((opcode) << 1) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode) << 1) #define ADD (HI(31) | LO(266)) #define ADDC (HI(31) | LO(10)) @@ -182,6 +182,7 @@ #define FSUB (HI(63) | LO(20)) #define FSUBS (HI(59) | LO(20)) #define LD (HI(58) | 0) +#define LFD (HI(50)) #define LWZ (HI(32)) #define MFCR (HI(31) | LO(19)) #define MFLR (HI(31) | LO(339) | 0x80000) @@ -215,6 +216,7 @@ #define STD (HI(62) | 0) #define STDU (HI(62) | 1) #define STDUX (HI(31) | LO(181)) +#define STFD (HI(54)) #define STFIWX (HI(31) | LO(983)) #define STW (HI(36)) #define STWU (HI(37)) @@ -232,15 +234,18 @@ #define UIMM_MAX (0xffff) #define RLDI(dst, src, sh, mb, type) \ - (HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20)) + (HI(30) | S(src) | A(dst) | ((sljit_ins)(type) << 2) | (((sljit_ins)(sh) & 0x1f) << 11) \ + | (((sljit_ins)(sh) & 0x20) >> 4) | (((sljit_ins)(mb) & 0x1f) << 6) | ((sljit_ins)(mb) & 0x20)) #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func) { - sljit_sw* ptrs; + sljit_uw* ptrs; + if (func_ptr) *func_ptr = (void*)context; - ptrs = (sljit_sw*)func; + + ptrs = (sljit_uw*)func; context->addr = addr ? addr : ptrs0; context->r2 = ptrs1; context->r11 = ptrs2; @@ -260,7 +265,7 @@ { sljit_sw diff; sljit_uw target_addr; - sljit_sw extra_jump_flags; + sljit_uw extra_jump_flags; #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) @@ -296,7 +301,7 @@ } extra_jump_flags = REMOVE_COND; - diff -= sizeof(sljit_ins); + diff -= SSIZE_OF(ins); } if (diff <= 0x01ffffff && diff >= -0x02000000) { @@ -349,7 +354,7 @@ { sljit_uw addr = put_label->label->addr; sljit_ins *inst = (sljit_ins *)put_label->addr; - sljit_s32 reg = *inst; + sljit_u32 reg = *inst; if (put_label->flags == 0) { SLJIT_ASSERT(addr < 0x100000000l); @@ -433,7 +438,7 @@ if (label && label->size == word_count) { /* Just recording the address. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -501,7 +506,7 @@ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -511,7 +516,7 @@ SLJIT_ASSERT(!put_label); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) - SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins))); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)(compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)))); #else SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); #endif @@ -527,22 +532,22 @@ if (!(jump->flags & PATCH_ABS_B)) { addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); - *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); + *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); } else { SLJIT_ASSERT(addr <= 0xffff); - *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); + *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); } } else { if (!(jump->flags & PATCH_ABS_B)) { addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); - *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); + *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | ((*buf_ptr) & 0x1); } else { SLJIT_ASSERT(addr <= 0x03ffffff); - *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); + *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); } } break; @@ -550,26 +555,32 @@ /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - buf_ptr0 = (buf_ptr0 & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr1 = (buf_ptr1 & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr0 | buf_ptr1) & 0xffff) == 0); + buf_ptr0 |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr1 |= (sljit_ins)addr & 0xffff; #else if (jump->flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= 0x7fffffff); - buf_ptr0 = (buf_ptr0 & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr1 = (buf_ptr1 & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr0 | buf_ptr1) & 0xffff) == 0); + buf_ptr0 |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr1 |= (sljit_ins)addr & 0xffff; break; } + if (jump->flags & PATCH_ABS48) { SLJIT_ASSERT(addr <= 0x7fffffffffff); - buf_ptr0 = (buf_ptr0 & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr1 = (buf_ptr1 & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr3 = (buf_ptr3 & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr0 | buf_ptr1 | buf_ptr3) & 0xffff) == 0); + buf_ptr0 |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr1 |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr3 |= (sljit_ins)addr & 0xffff; break; } - buf_ptr0 = (buf_ptr0 & 0xffff0000) | ((addr >> 48) & 0xffff);
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeS390X.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeS390X.c
Changed
@@ -44,6 +44,9 @@ /* Instruction tags (most significant halfword). */ static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) + static const sljit_u8 reg_mapSLJIT_NUMBER_OF_REGISTERS + 4 = { 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 }; @@ -97,20 +100,37 @@ * link register doesn't need to change */ +/* When reg cannot be unused. */ +#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP) + /* Link registers. The normal link register is r14, but since we use that for flags we need to use r0 instead to do fast calls so that flags are preserved. */ static const sljit_gpr link_r = 14; /* r14 */ static const sljit_gpr fast_link_r = 0; /* r0 */ -/* Flag register layout: +#define TMP_FREG1 (0) + +static const sljit_u8 freg_mapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 1 = { + 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, +}; + +#define R0A(r) (r) +#define R4A(r) ((r) << 4) +#define R8A(r) ((r) << 8) +#define R12A(r) ((r) << 12) +#define R16A(r) ((r) << 16) +#define R20A(r) ((r) << 20) +#define R28A(r) ((r) << 28) +#define R32A(r) ((r) << 32) +#define R36A(r) ((r) << 36) - 0 32 33 34 36 64 - +---------------+---+---+-------+-------+ - | ZERO | 0 | 0 | C C |///////| - +---------------+---+---+-------+-------+ -*/ -static const sljit_gpr flag_r = 14; /* r14 */ +#define R0(r) ((sljit_ins)reg_mapr) + +#define F0(r) ((sljit_ins)freg_mapr) +#define F4(r) (R4A((sljit_ins)freg_mapr)) +#define F20(r) (R20A((sljit_ins)freg_mapr)) +#define F36(r) (R36A((sljit_ins)freg_mapr)) struct sljit_s390x_const { struct sljit_const const_; /* must be first */ @@ -124,19 +144,25 @@ return reg_mapr; } +static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r) +{ + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map0))); + return freg_mapr; +} + /* Size of instruction in bytes. Tags must already be cleared. */ static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins) { /* keep faulting instructions */ if (ins == 0) - return 2; + return 2; if ((ins & 0x00000000ffffL) == ins) - return 2; + return 2; if ((ins & 0x0000ffffffffL) == ins) - return 4; + return 4; if ((ins & 0xffffffffffffL) == ins) - return 6; + return 6; SLJIT_UNREACHABLE(); return (sljit_uw)-1; @@ -172,7 +198,8 @@ } #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ - (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB) + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \ + && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE)) /* Map the given type to a 4-bit condition code mask. */ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { @@ -191,6 +218,7 @@ return (cc0 | cc3); return (cc0 | cc2); } + /* fallthrough */ case SLJIT_EQUAL_F64: return cc0; @@ -204,6 +232,7 @@ return (cc1 | cc2); return (cc1 | cc3); } + /* fallthrough */ case SLJIT_NOT_EQUAL_F64: return (cc1 | cc2 | cc3); @@ -228,10 +257,20 @@ case SLJIT_LESS_F64: return cc1; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc2 | cc3); + /* fallthrough */ + case SLJIT_SIG_LESS_EQUAL: case SLJIT_LESS_EQUAL_F64: return (cc0 | cc1); + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc0 | cc1); + /* fallthrough */ + case SLJIT_SIG_GREATER: /* Overflow is considered greater, see SLJIT_SUB. */ return cc2 | cc3; @@ -242,6 +281,7 @@ case SLJIT_OVERFLOW: if (compiler->status_flags_state & SLJIT_SET_Z) return (cc2 | cc3); + /* fallthrough */ case SLJIT_UNORDERED_F64: return cc3; @@ -249,6 +289,7 @@ case SLJIT_NOT_OVERFLOW: if (compiler->status_flags_state & SLJIT_SET_Z) return (cc0 | cc1); + /* fallthrough */ case SLJIT_ORDERED_F64: return (cc0 | cc1 | cc2); @@ -444,7 +485,7 @@ #define SLJIT_S390X_RRE(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ { \ - return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ + return (pattern) | R4A(dst) | R0A(src); \ } /* AND */ @@ -504,7 +545,7 @@ #define SLJIT_S390X_RIA(name, pattern, imm_type) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ { \ - return (pattern) | ((reg & 0xf) << 20) | (imm & 0xffff); \ + return (pattern) | R20A(reg) | (imm & 0xffff); \ } /* ADD HALFWORD IMMEDIATE */ @@ -534,7 +575,7 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ { \ SLJIT_ASSERT(have_eimm()); \ - return (pattern) | ((sljit_ins)(reg & 0xf) << 36) | (imm & 0xffffffff); \ + return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \ } /* ADD IMMEDIATE */ @@ -567,17 +608,11 @@ /* RX-a form instructions */ #define SLJIT_S390X_RXA(name, pattern) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ { \ - sljit_ins ri, xi, bi, di; \ -\ SLJIT_ASSERT((d & 0xfff) == d); \ - ri = (sljit_ins)(r & 0xf) << 20; \ - xi = (sljit_ins)(x & 0xf) << 16; \ - bi = (sljit_ins)(b & 0xf) << 12; \ - di = (sljit_ins)(d & 0xfff); \ \ - return (pattern) | ri | xi | bi | di; \ + return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \ } /* LOAD */ @@ -607,15 +642,9 @@ #define SLJIT_S390X_RXYA(name, pattern, cond) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ { \ - sljit_ins ri, xi, bi, di; \
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeSPARC_32.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeSPARC_32.c
Changed
@@ -35,16 +35,13 @@ #define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) -static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); switch (op) { case SLJIT_MOV: - case SLJIT_MOV_U32: - case SLJIT_MOV_S32: - case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); @@ -59,8 +56,7 @@ FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); } - else if (dst != src2) - SLJIT_UNREACHABLE(); + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: @@ -70,13 +66,12 @@ FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); } - else if (dst != src2) - SLJIT_UNREACHABLE(); + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_NOT: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DRF(dst, flags)); case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); @@ -89,22 +84,24 @@ /* Loop. */ FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, BICC | DA(0xe) | ((sljit_ins)-2 & DISP_MASK), UNMOVABLE_INS)); return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_ADDC: - return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SUB: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SUBC: - return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_MUL: compiler->status_flags_state = 0; @@ -116,13 +113,13 @@ return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); case SLJIT_AND: - return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_OR: - return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_XOR: - return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SHL: FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); @@ -147,7 +144,7 @@ sljit_s32 word_reg_index = 8; sljit_s32 float_arg_index = 1; sljit_s32 double_arg_count = 0; - sljit_s32 float_offset = (16 + 6) * sizeof(sljit_sw); + sljit_u32 float_offset = (16 + 6) * sizeof(sljit_sw); sljit_s32 types = 0; sljit_s32 reg = 0; sljit_s32 move_to_tmp2 = 0; @@ -155,18 +152,12 @@ if (src) reg = reg_map*src & REG_MASK; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_index++; - if (reg_index == reg) - move_to_tmp2 = 1; - reg_index++; - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_index++; double_arg_count++; @@ -174,36 +165,37 @@ move_to_tmp2 = 1; reg_index += 2; break; + case SLJIT_ARG_TYPE_F32: + float_arg_index++; + if (reg_index == reg) + move_to_tmp2 = 1; + reg_index++; + break; default: - if (reg_index != word_reg_index && reg_index < 14 && reg_index == reg) + if (reg_index != word_reg_index && reg_index == reg) move_to_tmp2 = 1; reg_index++; word_reg_index++; break; } - if (move_to_tmp2) { - move_to_tmp2 = 0; - if (reg < 14) - FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1))); - *src = TMP_REG1; - } + arg_types >>= SLJIT_ARG_SHIFT; + } - arg_types >>= SLJIT_DEF_SHIFT; + if (move_to_tmp2) { + if (reg < 14) + FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1))); + *src = TMP_REG1; } arg_types = types; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_index--; - FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - float_offset -= sizeof(sljit_f64); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_index--; if (float_arg_index == 4 && double_arg_count == 4) { + /* The address is not doubleword aligned, so two instructions are required to store the double. */ FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM((16 + 7) * sizeof(sljit_sw)), MOVABLE_INS)); FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | (1 << 25) | S1(SLJIT_SP) | IMM((16 + 8) * sizeof(sljit_sw)), MOVABLE_INS)); } @@ -211,36 +203,41 @@ FAIL_IF(push_inst(compiler, STDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); float_offset -= sizeof(sljit_f64); break; + case SLJIT_ARG_TYPE_F32: + float_arg_index--; + FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + float_offset -= sizeof(sljit_f64); + break; default: break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } float_offset = (16 + 6) * sizeof(sljit_sw); while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - reg_index--; - if (reg_index < 14)
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeSPARC_common.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeSPARC_common.c
Changed
@@ -98,36 +98,37 @@ #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) static const sljit_u8 reg_mapSLJIT_NUMBER_OF_REGISTERS + 6 = { - 0, 8, 9, 10, 11, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 12, 13, 15 + 0, 8, 9, 10, 11, 23, 22, 21, 20, 19, 18, 17, 16, 29, 28, 27, 26, 25, 24, 14, 1, 12, 13, 15 }; static const sljit_u8 freg_mapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 3 = { - 0, 0, 2, 4, 6, 8, 10, 12, 14 + 0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }; /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define D(d) (reg_mapd << 25) -#define FD(d) (freg_mapd << 25) -#define FDN(d) ((freg_mapd | 0x1) << 25) -#define DA(d) ((d) << 25) -#define S1(s1) (reg_maps1 << 14) -#define FS1(s1) (freg_maps1 << 14) -#define S1A(s1) ((s1) << 14) -#define S2(s2) (reg_maps2) -#define FS2(s2) (freg_maps2) -#define FS2N(s2) (freg_maps2 | 0x1) -#define S2A(s2) (s2) +#define D(d) ((sljit_ins)reg_mapd << 25) +#define FD(d) ((sljit_ins)freg_mapd << 25) +#define FDN(d) (((sljit_ins)freg_mapd | 0x1) << 25) +#define DA(d) ((sljit_ins)(d) << 25) +#define S1(s1) ((sljit_ins)reg_maps1 << 14) +#define FS1(s1) ((sljit_ins)freg_maps1 << 14) +#define S1A(s1) ((sljit_ins)(s1) << 14) +#define S2(s2) ((sljit_ins)reg_maps2) +#define FS2(s2) ((sljit_ins)freg_maps2) +#define FS2N(s2) ((sljit_ins)freg_maps2 | 0x1) +#define S2A(s2) ((sljit_ins)(s2)) #define IMM_ARG 0x2000 -#define DOP(op) ((op) << 5) -#define IMM(imm) (((imm) & 0x1fff) | IMM_ARG) +#define DOP(op) ((sljit_ins)(op) << 5) +#define IMM(imm) (((sljit_ins)(imm) & 0x1fff) | IMM_ARG) #define DR(dr) (reg_mapdr) -#define OPC1(opcode) ((opcode) << 30) -#define OPC2(opcode) ((opcode) << 22) -#define OPC3(opcode) ((opcode) << 19) +#define DRF(dr, flags) ((sljit_s32)(reg_mapdr | ((flags) & SET_FLAGS))) +#define OPC1(opcode) ((sljit_ins)(opcode) << 30) +#define OPC2(opcode) ((sljit_ins)(opcode) << 22) +#define OPC3(opcode) ((sljit_ins)(opcode) << 19) #define SET_FLAGS OPC3(0x10) #define ADD (OPC1(0x2) | OPC3(0x00)) @@ -156,6 +157,8 @@ #define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) #define JMPL (OPC1(0x2) | OPC3(0x38)) #define LDD (OPC1(0x3) | OPC3(0x03)) +#define LDDF (OPC1(0x3) | OPC3(0x23)) +#define LDF (OPC1(0x3) | OPC3(0x20)) #define LDUW (OPC1(0x3) | OPC3(0x00)) #define NOP (OPC1(0x0) | OPC2(0x04)) #define OR (OPC1(0x2) | OPC3(0x02)) @@ -170,6 +173,7 @@ #define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12)) #define SRL (OPC1(0x2) | OPC3(0x26)) #define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12)) +#define STD (OPC1(0x3) | OPC3(0x07)) #define STDF (OPC1(0x3) | OPC3(0x27)) #define STF (OPC1(0x3) | OPC3(0x24)) #define STW (OPC1(0x3) | OPC3(0x04)) @@ -183,7 +187,7 @@ #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) #define MAX_DISP (0x1fffff) #define MIN_DISP (-0x200000) -#define DISP_MASK (0x3fffff) +#define DISP_MASK ((sljit_ins)0x3fffff) #define BICC (OPC1(0x0) | OPC2(0x2)) #define FBFCC (OPC1(0x0) | OPC2(0x6)) @@ -274,7 +278,7 @@ } } - diff += sizeof(sljit_ins); + diff += SSIZE_OF(ins); if (diff <= MAX_DISP && diff >= MIN_DISP) { jump->flags |= PATCH_B; @@ -300,7 +304,7 @@ sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_uw addr; + sljit_sw addr; struct sljit_label *label; struct sljit_jump *jump; @@ -340,7 +344,7 @@ if (label && label->size == word_count) { /* Just recording the address. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -373,7 +377,7 @@ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -386,27 +390,27 @@ jump = compiler->jumps; while (jump) { do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_CALL) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000); - buf_ptr0 = CALL | (addr & 0x3fffffff); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1fffffff && addr >= -0x20000000); + buf_ptr0 = CALL | ((sljit_ins)addr & 0x3fffffff); break; } if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP); - buf_ptr0 = (buf_ptr0 & ~DISP_MASK) | (addr & DISP_MASK); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= MAX_DISP && addr >= MIN_DISP); + buf_ptr0 = (buf_ptr0 & ~DISP_MASK) | ((sljit_ins)addr & DISP_MASK); break; } /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) SLJIT_ASSERT(((buf_ptr0 & 0xc1cfffff) == 0x01000000) && ((buf_ptr1 & 0xc1f83fff) == 0x80102000)); - buf_ptr0 |= (addr >> 10) & 0x3fffff; - buf_ptr1 |= addr & 0x3ff; + buf_ptr0 |= (sljit_ins)(addr >> 10) & 0x3fffff; + buf_ptr1 |= (sljit_ins)addr & 0x3ff; #else #error "Implementation required" #endif @@ -416,7 +420,7 @@ put_label = compiler->put_labels; while (put_label) { - addr = put_label->label->addr; + addr = (sljit_sw)put_label->label->addr; buf_ptr = (sljit_ins *)put_label->addr; #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) @@ -431,7 +435,7 @@ compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -487,13 +491,14 @@ #define ALT_KEEP_CACHE 0x00040 #define CUMULATIVE_OP 0x00080 #define IMM_OP 0x00100 -#define SRC2_IMM 0x00200 +#define MOVE_OP 0x00200 +#define SRC2_IMM 0x00400 -#define REG_DEST 0x00400 -#define REG2_SOURCE 0x00800 -#define SLOW_SRC1 0x01000 -#define SLOW_SRC2 0x02000 -#define SLOW_DEST 0x04000 +#define REG_DEST 0x00800 +#define REG2_SOURCE 0x01000 +#define SLOW_SRC1 0x02000 +#define SLOW_SRC2 0x04000 +#define SLOW_DEST 0x08000 /* SET_FLAGS (0x10 << 19) also belong here! */ @@ -507,6 +512,10 @@ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { + sljit_s32 reg_index, types, tmp; + sljit_u32 float_offset, args_offset;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeX86_32.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeX86_32.c
Changed
@@ -26,6 +26,10 @@ /* x86 32-bit arch dependent functions. */ +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm) { sljit_u8 *inst; @@ -38,9 +42,183 @@ return SLJIT_SUCCESS; } +/* Size contains the flags as well. */ +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, + /* The register or immediate operand. */ + sljit_s32 a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_u8 reg_map_b; + sljit_uw flags = size; + sljit_uw inst_size; + + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); + SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) + && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) + && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + + size &= 0xf; + inst_size = size; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) + inst_size++; + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if (!(b & REG_MASK)) + inst_size += sizeof(sljit_sw); + else if (immb != 0 && !(b & OFFS_REG_MASK)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_sw); + } + else if (reg_mapb & REG_MASK == 5) + inst_size += sizeof(sljit_s8); + + if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); + + if (b & OFFS_REG_MASK) + inst_size += 1; /* SIB byte. */ + } + + /* Calculate size of a. */ + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + imma &= 0x1f; + if (imma != 1) { + inst_size ++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_sw); + } + else + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + if (flags & EX86_PREF_66) + *inst++ = 0x66; + + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if (a & SLJIT_IMM) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = U8(reg_mapa << 3); + else + *buf_ptr = U8(a << 3); + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_mapb : b)); + buf_ptr++; + } else if (b & REG_MASK) { + reg_map_b = reg_mapb & REG_MASK; + + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_map_b == 5) { + if (immb != 0 || reg_map_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if (!(b & OFFS_REG_MASK)) + *buf_ptr++ |= reg_map_b; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = U8(reg_map_b | (reg_mapOFFS_REG(b) << 3)); + } + + if (immb != 0 || reg_map_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = U8(immb); /* 8 bit displacement. */ + else { + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = U8(reg_map_b | (reg_mapOFFS_REG(b) << 3) | (immb << 6)); + } + } + else { + *buf_ptr++ |= 0x05; + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = U8(imma); + else if (flags & EX86_HALF_ARG) + sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma); + else if (!(flags & EX86_SHIFT_INS)) + sljit_unaligned_store_sw(buf_ptr, imma); + } + + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); +} + +/* --------------------------------------------------------------------- */ +/* Enter / return */ +/* --------------------------------------------------------------------- */ + static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) { - sljit_s32 type = jump->flags >> TYPE_SHIFT; + sljit_uw type = jump->flags >> TYPE_SHIFT; if (type == SLJIT_JUMP) { *code_ptr++ = JMP_i32; @@ -59,18 +237,29 @@ if (jump->flags & JUMP_LABEL) jump->flags |= PATCH_MW; else
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeX86_64.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeX86_64.c
Changed
@@ -26,6 +26,10 @@ /* x86 64-bit arch dependent functions. */ +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { sljit_u8 *inst; @@ -34,14 +38,246 @@ FAIL_IF(!inst); INC_SIZE(2 + sizeof(sljit_sw)); *inst++ = REX_W | ((reg_mapreg <= 7) ? 0 : REX_B); - *inst++ = MOV_r_i32 + (reg_mapreg & 0x7); + *inst++ = U8(MOV_r_i32 | (reg_mapreg & 0x7)); sljit_unaligned_store_sw(inst, imm); return SLJIT_SUCCESS; } +static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) +{ + sljit_u8 *inst; + sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + length); + FAIL_IF(!inst); + INC_SIZE(length); + if (rex) + *inst++ = rex; + *inst++ = opcode; + sljit_unaligned_store_s32(inst, (sljit_s32)imm); + return SLJIT_SUCCESS; +} + +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, + /* The register or immediate operand. */ + sljit_s32 a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_u8 rex = 0; + sljit_u8 reg_lmap_b; + sljit_uw flags = size; + sljit_uw inst_size; + + /* The immediate operand must be 32 bit. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); + SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) + && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) + && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + + size &= 0xf; + inst_size = size; + + if (!compiler->mode32 && !(flags & EX86_NO_REXW)) + rex |= REX_W; + else if (flags & EX86_REX) + rex |= REX; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) + inst_size++; + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if (!(b & OFFS_REG_MASK)) { + if (NOT_HALFWORD(immb)) { + PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); + immb = 0; + if (b & REG_MASK) + b |= TO_OFFS_REG(TMP_REG2); + else + b |= TMP_REG2; + } + else if (reg_lmapb & REG_MASK == 4) + b |= TO_OFFS_REG(SLJIT_SP); + } + + if (!(b & REG_MASK)) + inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ + else { + if (reg_mapb & REG_MASK >= 8) + rex |= REX_B; + + if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_s32); + } + else if (reg_lmapb & REG_MASK == 5) + inst_size += sizeof(sljit_s8); + + if (b & OFFS_REG_MASK) { + inst_size += 1; /* SIB byte. */ + if (reg_mapOFFS_REG(b) >= 8) + rex |= REX_X; + } + } + } + else if (!(flags & EX86_SSE2_OP2)) { + if (reg_mapb >= 8) + rex |= REX_B; + } + else if (freg_mapb >= 8) + rex |= REX_B; + + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + imma &= compiler->mode32 ? 0x1f : 0x3f; + if (imma != 1) { + inst_size ++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_s32); + } + else { + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + /* reg_mapSLJIT_PREF_SHIFT_REG is less than 8. */ + if (!(flags & EX86_SSE2_OP1)) { + if (reg_mapa >= 8) + rex |= REX_R; + } + else if (freg_mapa >= 8) + rex |= REX_R; + } + + if (rex) + inst_size++; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + if (flags & EX86_PREF_66) + *inst++ = 0x66; + if (rex) + *inst++ = rex; + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if (a & SLJIT_IMM) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = U8(reg_lmapa << 3); + else + *buf_ptr = U8(freg_lmapa << 3); + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmapb : freg_lmapb)); + buf_ptr++; + } else if (b & REG_MASK) { + reg_lmap_b = reg_lmapb & REG_MASK;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitNativeX86_common.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitNativeX86_common.c
Changed
@@ -65,6 +65,8 @@ 15 - R15 */ +#define TMP_FREG (0) + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) /* Last register + 1. */ @@ -77,9 +79,9 @@ #define CHECK_EXTRA_REGS(p, w, do) \ if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ if (p <= compiler->scratches) \ - w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \ + w = compiler->scratches_offset + ((p) - SLJIT_R3) * SSIZE_OF(sw); \ else \ - w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \ + w = compiler->locals_offset + ((p) - SLJIT_S2) * SSIZE_OF(sw); \ p = SLJIT_MEM1(SLJIT_SP); \ do; \ } @@ -115,11 +117,11 @@ /* Args: xmm0-xmm3 */ static const sljit_u8 freg_mapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 1 = { - 4, 0, 1, 2, 3, 5, 6 + 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; /* low-map. freg_map & 0x7. */ static const sljit_u8 freg_lmapSLJIT_NUMBER_OF_FLOAT_REGISTERS + 1 = { - 4, 0, 1, 2, 3, 5, 6 + 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }; #define REX_W 0x48 @@ -143,7 +145,8 @@ #endif /* SLJIT_CONFIG_X86_32 */ -#define TMP_FREG (0) +#define U8(v) ((sljit_u8)(v)) + /* Size flags for emit_x86_instruction: */ #define EX86_BIN_INS 0x0010 @@ -205,12 +208,15 @@ #define JMP_i32 0xe9 #define JMP_rm (/* GROUP_FF */ 4 << 3) #define LEA_r_m 0x8d +#define LOOP_i8 0xe2 #define MOV_r_rm 0x8b #define MOV_r_i32 0xb8 #define MOV_rm_r 0x89 #define MOV_rm_i32 0xc7 #define MOV_rm8_i8 0xc6 #define MOV_rm8_r8 0x88 +#define MOVAPS_x_xm 0x28 +#define MOVAPS_xm_x 0x29 #define MOVSD_x_xm 0x10 #define MOVSD_xm_x 0x11 #define MOVSXD_r_rm 0x63 @@ -274,14 +280,12 @@ #define MOD_REG 0xc0 #define MOD_DISP8 0x40 -#define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) +#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s)) -#define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) -#define POP_REG(r) (*inst++ = (POP_r + (r))) -#define RET() (*inst++ = (RET_near)) -#define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) -/* r32, r/m32 */ -#define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) +#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r))) +#define POP_REG(r) (*inst++ = U8(POP_r + (r))) +#define RET() (*inst++ = RET_near) +#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) /* Multithreading does not affect these static variables, since they store built-in CPU features. Therefore they can be overwritten by different threads @@ -371,7 +375,7 @@ cpu_has_cmov = (features >> 15) & 0x1; } -static sljit_u8 get_jump_code(sljit_s32 type) +static sljit_u8 get_jump_code(sljit_uw type) { switch (type) { case SLJIT_EQUAL: @@ -383,10 +387,12 @@ return 0x85 /* jne */; case SLJIT_LESS: + case SLJIT_CARRY: case SLJIT_LESS_F64: return 0x82 /* jc */; case SLJIT_GREATER_EQUAL: + case SLJIT_NOT_CARRY: case SLJIT_GREATER_EQUAL_F64: return 0x83 /* jae */; @@ -434,14 +440,14 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) { - sljit_s32 type = jump->flags >> TYPE_SHIFT; + sljit_uw type = jump->flags >> TYPE_SHIFT; sljit_s32 short_jump; sljit_uw label_addr; if (jump->flags & JUMP_LABEL) label_addr = (sljit_uw)(code + jump->u.label->size); else - label_addr = jump->u.target - executable_offset; + label_addr = jump->u.target - (sljit_uw)executable_offset; short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; @@ -463,7 +469,7 @@ jump->addr++; } else if (short_jump) { - *code_ptr++ = get_jump_code(type) - 0x10; + *code_ptr++ = U8(get_jump_code(type) - 0x10); jump->addr++; } else { @@ -492,7 +498,7 @@ sljit_u8 *buf_end; sljit_u8 len; sljit_sw executable_offset; - sljit_sw jump_addr; + sljit_uw jump_addr; struct sljit_label *label; struct sljit_jump *jump; @@ -530,7 +536,7 @@ switch (*buf_ptr) { case 0: label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; break; case 1: @@ -575,11 +581,11 @@ jump = compiler->jumps; while (jump) { - jump_addr = jump->addr + executable_offset; + jump_addr = jump->addr + (sljit_uw)executable_offset; if (jump->flags & PATCH_MB) { SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127); - *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); + *(sljit_u8*)jump->addr = U8(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); } else if (jump->flags & PATCH_MW) { if (jump->flags & JUMP_LABEL) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -600,7 +606,7 @@ } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) else if (jump->flags & PATCH_MD) - sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr); #endif jump = jump->next; @@ -626,7 +632,7 @@ compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = code_ptr - code; + compiler->executable_size = (sljit_uw)(code_ptr - code); code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); @@ -682,17 +688,40 @@ #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) -static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, - sljit_u32 op_types, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w); +#define BINARY_IMM32(op_imm, immw, arg, argw) \ + do { \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ + } while (0) -static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_u32 op_types, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w);
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitProtExecAllocator.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitProtExecAllocator.c
Changed
@@ -66,7 +66,7 @@ /* --------------------------------------------------------------------- */ /* 64 KByte. */ -#define CHUNK_SIZE 0x10000 +#define CHUNK_SIZE (sljit_uw)0x10000 struct chunk_header { void *executable; @@ -194,7 +194,7 @@ if (fd == -1) return NULL; - if (ftruncate(fd, size)) { + if (ftruncate(fd, (off_t)size)) { close(fd); return NULL; } @@ -281,7 +281,7 @@ #define AS_FREE_BLOCK(base, offset) \ ((struct free_block*)(((sljit_u8*)base) + offset)) #define MEM_START(base) ((void*)((base) + 1)) -#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7) static struct free_block* free_blocks; static sljit_uw allocated_size;
View file
_service:tar_scm:pcre2-10.39.tar.bz2/src/sljit/sljitUtils.c -> _service:tar_scm:pcre2-10.40.tar.bz2/src/sljit/sljitUtils.c
Changed
@@ -131,12 +131,12 @@ #ifdef _WIN32 -static SLJIT_INLINE sljit_sw get_page_alignment(void) { +static SLJIT_INLINE sljit_uw get_page_alignment(void) { SYSTEM_INFO si; - static sljit_sw sljit_page_align; + static sljit_uw sljit_page_align = 0; if (!sljit_page_align) { GetSystemInfo(&si); - sljit_page_align = si.dwPageSize - 1; + sljit_page_align = (sljit_uw)si.dwPageSize - 1; } return sljit_page_align; } @@ -145,18 +145,21 @@ #include <unistd.h> -static SLJIT_INLINE sljit_sw get_page_alignment(void) { - static sljit_sw sljit_page_align = -1; - if (sljit_page_align < 0) { +static SLJIT_INLINE sljit_uw get_page_alignment(void) { + static sljit_uw sljit_page_align = 0; + + sljit_sw align; + + if (!sljit_page_align) { #ifdef _SC_PAGESIZE - sljit_page_align = sysconf(_SC_PAGESIZE); + align = sysconf(_SC_PAGESIZE); #else - sljit_page_align = getpagesize(); + align = getpagesize(); #endif /* Should never happen. */ - if (sljit_page_align < 0) - sljit_page_align = 4096; - sljit_page_align--; + if (align < 0) + align = 4096; + sljit_page_align = (sljit_uw)align - 1; } return sljit_page_align; } @@ -227,7 +230,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) { SLJIT_UNUSED_ARG(allocator_data); - munmap((void*)stack->min_start, stack->end - stack->min_start); + munmap((void*)stack->min_start, (size_t)(stack->end - stack->min_start)); SLJIT_FREE(stack, allocator_data); } @@ -237,7 +240,7 @@ { struct sljit_stack *stack; void *ptr; - sljit_sw page_align; + sljit_uw page_align; SLJIT_UNUSED_ARG(allocator_data); @@ -295,7 +298,7 @@ #if defined _WIN32 || defined(POSIX_MADV_DONTNEED) sljit_uw aligned_old_start; sljit_uw aligned_new_start; - sljit_sw page_align; + sljit_uw page_align; #endif if ((new_start < stack->min_start) || (new_start >= stack->end))
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/grepoutput -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/grepoutput
Changed
@@ -978,5 +978,16 @@ a RC=0 ---------------------------- Test 133 ----------------------------- +match 1: + a +match 2: + b +--- +match 2: + b +match 3: + c +RC=0 +---------------------------- Test 134 ----------------------------- =AB3CD5= RC=0
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testinput17 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testinput17
Changed
@@ -304,4 +304,7 @@ /aCz/mg,firstline,newline=lf match\nmatch +//jitfast + \=null_subject + # End of testinput17
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testinput18 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testinput18
Changed
@@ -135,4 +135,9 @@ 123ace 123ace\=posix_startend=2:6 +//posix +\= Expect errors + \=null_subject + abc\=null_subject + # End of testdata/testinput18
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testinput2 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testinput2
Changed
@@ -5767,7 +5767,7 @@ /(?(DEFINE)b)((?<=b).*)/B -/(?(VERSION=10.4)b)((?<=b).*)/B +/(?(VERSION=10.3)b)((?<=b).*)/B /aAbcC/IB @@ -5902,4 +5902,34 @@ # --------- +# Tests for zero-length NULL to be treated as an empty string. + +// + \=null_subject +\= Expect error + abc\=null_subject + +//replace=20 + abc\=null_replacement + \=null_subject + \=null_replacement + +/X*/g,replace=xy +\= Expect error + >X<\=null_replacement + +/X+/replace=20 + >XX<\=null_replacement + +# --------- + +/Aa{2}/BI + aabcd + +/A{2}/iBI + aabcd + +/Aa{2,3}/BI + aabcd + # End of testinput2
View file
_service:tar_scm:pcre2-10.40.tar.bz2/testdata/testinput26
Added
@@ -0,0 +1,2728 @@ +# These tests are generated by maint/GenerateTest26.py, do not edit. + +# Unicode Script Extension tests. + +# Base script check +/^\p{sc=Latin}/utf + A + +/^\p{Script=Latn}/utf + \x{1df1e} + +# Script extension check +/^\p{Latin}/utf + \x{363} + +/^\p{scx=Latn}/utf + \x{a92e} + +# Script extension only character +/^\p{Latin}/utf + \x{363} + +/^\p{sc=Latin}/utf + \x{363} + +# Character not in script +/^\p{Latin}/utf + \x{1df1f} + +# Base script check +/^\p{sc=Greek}/utf + \x{370} + +/^\p{Script=Grek}/utf + \x{1d245} + +# Script extension check +/^\p{Greek}/utf + \x{342} + +/^\p{Script_Extensions=Grek}/utf + \x{1dc1} + +# Script extension only character +/^\p{Greek}/utf + \x{342} + +/^\p{sc=Greek}/utf + \x{342} + +# Character not in script +/^\p{Greek}/utf + \x{1d246} + +# Base script check +/^\p{sc=Cyrillic}/utf + \x{400} + +/^\p{Script=Cyrl}/utf + \x{fe2f} + +# Script extension check +/^\p{Cyrillic}/utf + \x{483} + +/^\p{scx=Cyrl}/utf + \x{a66f} + +# Script extension only character +/^\p{Cyrillic}/utf + \x{2e43} + +/^\p{sc=Cyrillic}/utf + \x{2e43} + +# Character not in script +/^\p{Cyrillic}/utf + \x{fe30} + +# Base script check +/^\p{sc=Arabic}/utf + \x{600} + +/^\p{Script=Arab}/utf + \x{1eef1} + +# Script extension check +/^\p{Arabic}/utf + \x{60c} + +/^\p{Script_Extensions=Arab}/utf + \x{102fb} + +# Script extension only character +/^\p{Arabic}/utf + \x{102e0} + +/^\p{sc=Arabic}/utf + \x{102e0} + +# Character not in script +/^\p{Arabic}/utf + \x{1eef2} + +# Base script check +/^\p{sc=Syriac}/utf + \x{700} + +/^\p{Script=Syrc}/utf + \x{86a} + +# Script extension check +/^\p{Syriac}/utf + \x{60c} + +/^\p{scx=Syrc}/utf + \x{1dfa} + +# Script extension only character +/^\p{Syriac}/utf + \x{1dfa} + +/^\p{sc=Syriac}/utf + \x{1dfa} + +# Character not in script +/^\p{Syriac}/utf + \x{1dfb} + +# Base script check +/^\p{sc=Thaana}/utf + \x{780} + +/^\p{Script=Thaa}/utf + \x{7b1} + +# Script extension check +/^\p{Thaana}/utf + \x{60c} + +/^\p{Script_Extensions=Thaa}/utf + \x{fdfd} + +# Script extension only character +/^\p{Thaana}/utf + \x{fdf2} + +/^\p{sc=Thaana}/utf + \x{fdf2} + +# Character not in script +/^\p{Thaana}/utf + \x{fdfe} + +# Base script check +/^\p{sc=Devanagari}/utf + \x{900} + +/^\p{Script=Deva}/utf + \x{a8ff} + +# Script extension check +/^\p{Devanagari}/utf + \x{951} + +/^\p{scx=Deva}/utf + \x{a8f3} + +# Script extension only character +/^\p{Devanagari}/utf + \x{1cd1} + +/^\p{sc=Devanagari}/utf + \x{1cd1} + +# Character not in script +/^\p{Devanagari}/utf + \x{a900} + +# Base script check +/^\p{sc=Bengali}/utf + \x{980} + +/^\p{Script=Beng}/utf + \x{9fe} + +# Script extension check +/^\p{Bengali}/utf + \x{951} + +/^\p{Script_Extensions=Beng}/utf + \x{a8f1} + +# Script extension only character +/^\p{Bengali}/utf + \x{1cf7} + +/^\p{sc=Bengali}/utf + \x{1cf7}
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testinput4 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testinput4
Changed
@@ -1,4 +1,4 @@ -# This set of tests is for UTF support, including Unicode properties. The +# This set of tests is for UTF support, including Unicode properties. The # Unicode tests are all compatible with all versions of Perl >= 5.10, but # some of the property tests may differ because of different versions of # Unicode in use by PCRE2 and Perl. @@ -6,7 +6,7 @@ # WARNING: Use only / as the pattern delimiter. Although pcre2test supports # a number of delimiters, all those other than / give problems with the # perltest.sh script. - + #newline_default lf anycrlf any #perltest @@ -694,27 +694,27 @@ /^\d*\w{4}/utf 1234 -\= Expect no match +\= Expect no match 123 /^^b*\w{4}/utf aaaa -\= Expect no match +\= Expect no match aaa /^^b*\w{4}/i,utf aaaa -\= Expect no match +\= Expect no match aaa /^\x{100}*.{4}/utf \x{100}\x{100}\x{100}\x{100} -\= Expect no match +\= Expect no match \x{100}\x{100}\x{100} /^\x{100}*.{4}/i,utf \x{100}\x{100}\x{100}\x{100} -\= Expect no match +\= Expect no match \x{100}\x{100}\x{100} /^a+a\x{200}/utf @@ -725,144 +725,144 @@ /^#^\x{ffff}#^\x{ffff}#^\x{ffff}#/utf #\x{10000}#\x{100}#\x{10ffff}# - -# Unicode property support tests + +# Unicode property support tests /^\pC\pL\pM\pN\pP\pS\pZ</utf \x7f\x{c0}\x{30f}\x{660}\x{66c}\x{f01}\x{1680}< - \np\x{300}9!\$ < -\= Expect no match - ap\x{300}9!\$ < - + \np\x{300}9!\$ < +\= Expect no match + ap\x{300}9!\$ < + /^\PC/utf X -\= Expect no match +\= Expect no match \x7f - + /^\PL/utf 9 -\= Expect no match +\= Expect no match \x{c0} - + /^\PM/utf X -\= Expect no match +\= Expect no match \x{30f} - + /^\PN/utf X -\= Expect no match +\= Expect no match \x{660} - + /^\PP/utf X -\= Expect no match +\= Expect no match \x{66c} - + /^\PS/utf X -\= Expect no match +\= Expect no match \x{f01} - + /^\PZ/utf X -\= Expect no match +\= Expect no match \x{1680} - + /^\p{Cc}/utf \x{017} - \x{09f} + \x{09f} \= Expect no match - \x{0600} - + \x{0600} + /^\p{Cf}/utf \x{601} \= Expect no match - \x{09f} - + \x{09f} + /^\p{Cn}/utf \x{e0000} \= Expect no match - \x{09f} - + \x{09f} + /^\p{Co}/utf \x{f8ff} \= Expect no match - \x{09f} - + \x{09f} + /^\p{Ll}/utf a -\= Expect no match +\= Expect no match Z - \x{e000} - + \x{e000} + /^\p{Lm}/utf \x{2b0} \= Expect no match - a - + a + /^\p{Lo}/utf \x{1bb} \x{3400} \x{3401} \x{4d00} \x{4db4} - \x{4db5} - \x{4db6} + \x{4db5} + \x{4db6} \= Expect no match - a + a \x{2b0} - + /^\p{Lt}/utf \x{1c5} \= Expect no match - a + a \x{2b0} - + /^\p{Lu}/utf A \= Expect no match \x{2b0} - + /^\p{Mc}/utf \x{903} \= Expect no match X \x{300} - + /^\p{Me}/utf
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testinput5 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testinput5
Changed
@@ -2043,7 +2043,7 @@ /^\x{1E900}\x{104B0}/i,utf \x{1E900}\x{104B0} \x{1E922}\x{104D8} - + /^(?:(\X)(?C))+$/utf \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where @@ -2071,15 +2071,6 @@ # More differences from Perl -/^\p{Arabic}/utf -\= Expect no match - \x{650} - \x{651} - \x{652} - \x{653} - \x{654} - \x{655} - /^\p{Common}/utf \x{60c} \x{61f} @@ -2188,4 +2179,37 @@ /(\xc1)\1/i,ucp \xc1\xe1\=no_jit +/\p{L&}+\p{bidi_control}/B + +/\p{bidi_control}+\p{L&}/B + +/\p{han}/B + +/\p{script:han}/B + +/\p{sc:han}/B + +/\p{script extensions:han}/B + +/\p{scx:han}/B + +# Test error - invalid script name + +/\p{sc:L}/ + +# Some Boolean property tests that differ from Perl + +/\p{emojimodifierbase}\p{ebase}/g,utf + >AN<>\x{261d}\x{1faf6}<>yz< + +/\p{graphemelink}\p{grlink}/g,utf + >AN<>\x{11d97}\x{94d}<>yz< + +/\p{soft dotted}\p{sd}/g,utf + >AF23<>\x{1df1a}\x{69}<>yz< + +# ------------------------------------------------ + +/\p{\2b:xäigi:t:_/ + # End of testinput5
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testinput7 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testinput7
Changed
@@ -1,5 +1,5 @@ # This set of tests checks UTF and Unicode property support with the DFA -# matching functionality of pcre_dfa_match(). A default subject modifier is +# matching functionality of pcre2_dfa_match(). A default subject modifier is # used to force DFA matching for all tests. #subject dfa @@ -2093,4 +2093,142 @@ /(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext \x{100}\x{200}\x{300} +# ----------------------------------------------------------------------------- +# Tests for bidi control and bidi class properties + +/\p{ bidi_control }/utf + -->\x{202c}<-- + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidi_control}/utf + -->\x{202c}<-- + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}<>+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + +/\p{bidi class = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{bidi class = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + +/\p{bidi class:L}+/utf + -->ABC<-- + +/\P{bidi class:L}+/utf + -->ABC<-- + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + +# ----------------------------------------------------------------------------- + +/\p{katakana}/utf + \x{30a1} + \x{3001} + +/\p{scx:katakana}/utf + \x{30a1} + \x{3001} + +/\p{script extensions:katakana}/utf + \x{30a1} + \x{3001} + +/\p{sc:katakana}/utf + \x{30a1} +\= Expect no match + \x{3001} + +/\p{script:katakana}/utf + \x{30a1} +\= Expect no match + \x{3001} + +/\p{sc:katakana}{3,}/utf + \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66}\x{3001}ABC + +/\p{sc:katakana}{3,}?/utf + \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66}\x{3001}ABC + # End of testinput7
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testoutput17 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testoutput17
Changed
@@ -550,4 +550,8 @@ match\nmatch 0: a (JIT) +//jitfast + \=null_subject + 0: (JIT) + # End of testinput17
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testoutput18 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testoutput18
Changed
@@ -215,4 +215,11 @@ 3: <unset> 4: c +//posix +\= Expect errors + \=null_subject +No match: POSIX code 16: bad argument + abc\=null_subject +No match: POSIX code 16: bad argument + # End of testdata/testinput18
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testoutput2 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testoutput2
Changed
@@ -17397,7 +17397,7 @@ End ------------------------------------------------------------------ -/(?(VERSION=10.4)b)((?<=b).*)/B +/(?(VERSION=10.3)b)((?<=b).*)/B ------------------------------------------------------------------ Bra Cond @@ -17674,6 +17674,78 @@ # --------- +# Tests for zero-length NULL to be treated as an empty string. + +// + \=null_subject + 0: +\= Expect error + abc\=null_subject +Failed: error -51: NULL argument passed with non-zero length + +//replace=20 + abc\=null_replacement + 1: abc + \=null_subject + 1: + \=null_replacement + 1: + +/X*/g,replace=xy +\= Expect error + >X<\=null_replacement +Failed: error -51: NULL argument passed with non-zero length + +/X+/replace=20 + >XX<\=null_replacement + 1: >< + +# --------- + +/Aa{2}/BI +------------------------------------------------------------------ + Bra + /i A{2} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'A' (caseless) +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aabcd + 0: aa + +/A{2}/iBI +------------------------------------------------------------------ + Bra + /i A{2} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless +First code unit = 'A' (caseless) +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aabcd + 0: aa + +/Aa{2,3}/BI +------------------------------------------------------------------ + Bra + /i A{2} + /i A?+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'A' (caseless) +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aabcd + 0: aa + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data
View file
_service:tar_scm:pcre2-10.40.tar.bz2/testdata/testoutput26
Added
@@ -0,0 +1,3483 @@ +# These tests are generated by maint/GenerateTest26.py, do not edit. + +# Unicode Script Extension tests. + +# Base script check +/^\p{sc=Latin}/utf + A + 0: A + +/^\p{Script=Latn}/utf + \x{1df1e} + 0: \x{1df1e} + +# Script extension check +/^\p{Latin}/utf + \x{363} + 0: \x{363} + +/^\p{scx=Latn}/utf + \x{a92e} + 0: \x{a92e} + +# Script extension only character +/^\p{Latin}/utf + \x{363} + 0: \x{363} + +/^\p{sc=Latin}/utf + \x{363} +No match + +# Character not in script +/^\p{Latin}/utf + \x{1df1f} +No match + +# Base script check +/^\p{sc=Greek}/utf + \x{370} + 0: \x{370} + +/^\p{Script=Grek}/utf + \x{1d245} + 0: \x{1d245} + +# Script extension check +/^\p{Greek}/utf + \x{342} + 0: \x{342} + +/^\p{Script_Extensions=Grek}/utf + \x{1dc1} + 0: \x{1dc1} + +# Script extension only character +/^\p{Greek}/utf + \x{342} + 0: \x{342} + +/^\p{sc=Greek}/utf + \x{342} +No match + +# Character not in script +/^\p{Greek}/utf + \x{1d246} +No match + +# Base script check +/^\p{sc=Cyrillic}/utf + \x{400} + 0: \x{400} + +/^\p{Script=Cyrl}/utf + \x{fe2f} + 0: \x{fe2f} + +# Script extension check +/^\p{Cyrillic}/utf + \x{483} + 0: \x{483} + +/^\p{scx=Cyrl}/utf + \x{a66f} + 0: \x{a66f} + +# Script extension only character +/^\p{Cyrillic}/utf + \x{2e43} + 0: \x{2e43} + +/^\p{sc=Cyrillic}/utf + \x{2e43} +No match + +# Character not in script +/^\p{Cyrillic}/utf + \x{fe30} +No match + +# Base script check +/^\p{sc=Arabic}/utf + \x{600} + 0: \x{600} + +/^\p{Script=Arab}/utf + \x{1eef1} + 0: \x{1eef1} + +# Script extension check +/^\p{Arabic}/utf + \x{60c} + 0: \x{60c} + +/^\p{Script_Extensions=Arab}/utf + \x{102fb} + 0: \x{102fb} + +# Script extension only character +/^\p{Arabic}/utf + \x{102e0} + 0: \x{102e0} + +/^\p{sc=Arabic}/utf + \x{102e0} +No match + +# Character not in script +/^\p{Arabic}/utf + \x{1eef2} +No match + +# Base script check +/^\p{sc=Syriac}/utf + \x{700} + 0: \x{700} + +/^\p{Script=Syrc}/utf + \x{86a} + 0: \x{86a} + +# Script extension check +/^\p{Syriac}/utf + \x{60c} + 0: \x{60c} + +/^\p{scx=Syrc}/utf + \x{1dfa} + 0: \x{1dfa} + +# Script extension only character +/^\p{Syriac}/utf + \x{1dfa} + 0: \x{1dfa} + +/^\p{sc=Syriac}/utf + \x{1dfa} +No match + +# Character not in script +/^\p{Syriac}/utf + \x{1dfb} +No match + +# Base script check +/^\p{sc=Thaana}/utf + \x{780} + 0: \x{780} + +/^\p{Script=Thaa}/utf + \x{7b1} + 0: \x{7b1} + +# Script extension check +/^\p{Thaana}/utf + \x{60c} + 0: \x{60c} + +/^\p{Script_Extensions=Thaa}/utf + \x{fdfd} + 0: \x{fdfd} + +# Script extension only character +/^\p{Thaana}/utf + \x{fdf2} + 0: \x{fdf2} + +/^\p{sc=Thaana}/utf + \x{fdf2} +No match + +# Character not in script +/^\p{Thaana}/utf + \x{fdfe} +No match + +# Base script check +/^\p{sc=Devanagari}/utf + \x{900}
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testoutput4 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testoutput4
Changed
@@ -1,4 +1,4 @@ -# This set of tests is for UTF support, including Unicode properties. The +# This set of tests is for UTF support, including Unicode properties. The # Unicode tests are all compatible with all versions of Perl >= 5.10, but # some of the property tests may differ because of different versions of # Unicode in use by PCRE2 and Perl. @@ -6,7 +6,7 @@ # WARNING: Use only / as the pattern delimiter. Although pcre2test supports # a number of delimiters, all those other than / give problems with the # perltest.sh script. - + #newline_default lf anycrlf any #perltest @@ -1183,35 +1183,35 @@ /^\d*\w{4}/utf 1234 0: 1234 -\= Expect no match +\= Expect no match 123 No match /^^b*\w{4}/utf aaaa 0: aaaa -\= Expect no match +\= Expect no match aaa No match /^^b*\w{4}/i,utf aaaa 0: aaaa -\= Expect no match +\= Expect no match aaa No match /^\x{100}*.{4}/utf \x{100}\x{100}\x{100}\x{100} 0: \x{100}\x{100}\x{100}\x{100} -\= Expect no match +\= Expect no match \x{100}\x{100}\x{100} No match /^\x{100}*.{4}/i,utf \x{100}\x{100}\x{100}\x{100} 0: \x{100}\x{100}\x{100}\x{100} -\= Expect no match +\= Expect no match \x{100}\x{100}\x{100} No match @@ -1226,113 +1226,113 @@ /^#^\x{ffff}#^\x{ffff}#^\x{ffff}#/utf #\x{10000}#\x{100}#\x{10ffff}# 0: #\x{10000}#\x{100}#\x{10ffff}# - -# Unicode property support tests + +# Unicode property support tests /^\pC\pL\pM\pN\pP\pS\pZ</utf \x7f\x{c0}\x{30f}\x{660}\x{66c}\x{f01}\x{1680}< 0: \x{7f}\x{c0}\x{30f}\x{660}\x{66c}\x{f01}\x{1680}< - \np\x{300}9!\$ < + \np\x{300}9!\$ < 0: \x{0a}p\x{300}9!$ < -\= Expect no match - ap\x{300}9!\$ < +\= Expect no match + ap\x{300}9!\$ < No match - + /^\PC/utf X 0: X -\= Expect no match +\= Expect no match \x7f No match - + /^\PL/utf 9 0: 9 -\= Expect no match +\= Expect no match \x{c0} No match - + /^\PM/utf X 0: X -\= Expect no match +\= Expect no match \x{30f} No match - + /^\PN/utf X 0: X -\= Expect no match +\= Expect no match \x{660} No match - + /^\PP/utf X 0: X -\= Expect no match +\= Expect no match \x{66c} No match - + /^\PS/utf X 0: X -\= Expect no match +\= Expect no match \x{f01} No match - + /^\PZ/utf X 0: X -\= Expect no match +\= Expect no match \x{1680} No match - + /^\p{Cc}/utf \x{017} 0: \x{17} - \x{09f} + \x{09f} 0: \x{9f} \= Expect no match - \x{0600} + \x{0600} No match - + /^\p{Cf}/utf \x{601} 0: \x{601} \= Expect no match - \x{09f} + \x{09f} No match - + /^\p{Cn}/utf \x{e0000} 0: \x{e0000} \= Expect no match - \x{09f} + \x{09f} No match - + /^\p{Co}/utf \x{f8ff} 0: \x{f8ff} \= Expect no match - \x{09f} + \x{09f} No match - + /^\p{Ll}/utf a 0: a -\= Expect no match +\= Expect no match Z No match - \x{e000} + \x{e000} No match - + /^\p{Lm}/utf \x{2b0} 0: \x{2b0} \= Expect no match - a + a No match -
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testoutput5 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testoutput5
Changed
@@ -3298,7 +3298,7 @@ AllAny+ notprop Any AllAny+ - prop L& + prop Lc AllAny+ prop L AllAny+ @@ -3322,29 +3322,29 @@ /\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra - prop L& + + prop Lc + AllAny - prop L& + - prop L& - notprop L& ++ - prop L& - prop L& + + prop Lc + + prop Lc + notprop Lc ++ + prop Lc + prop Lc + prop L - prop L& + + prop Lc + prop Lu - prop L& + + prop Lc + prop Han - prop L& + + prop Lc + prop Xan - prop L& ++ + prop Lc ++ notprop Xan - prop L& ++ + prop Lc ++ prop Xsp - prop L& ++ + prop Lc ++ prop Xps prop Xwd + - prop L& - prop L& + + prop Lc + prop Lc + prop Xuc Ket End @@ -3356,7 +3356,7 @@ prop N + AllAny prop N + - prop L& + prop Lc prop N ++ prop L prop N + @@ -3387,7 +3387,7 @@ prop Lu + AllAny prop Lu + - prop L& + prop Lc prop Lu + prop L prop Lu + @@ -3420,7 +3420,7 @@ prop Han + prop Lu prop Han + - prop L& + prop Lc prop Han + prop L prop Han + @@ -3449,9 +3449,9 @@ prop Xan + AllAny prop Xan + - prop L& + prop Lc notprop Xan ++ - prop L& + prop Lc prop Xan + prop L prop Xan + @@ -3480,7 +3480,7 @@ prop Xsp + AllAny prop Xsp ++ - prop L& + prop Lc prop Xsp ++ prop L prop Xsp ++ @@ -3509,7 +3509,7 @@ prop Xwd + AllAny prop Xwd + - prop L& + prop Lc prop Xwd + prop L prop Xwd + @@ -3538,7 +3538,7 @@ prop Xuc + AllAny prop Xuc + - prop L& + prop Lc prop Xuc + prop L prop Xuc + @@ -4620,7 +4620,7 @@ 0: \x{1e900}\x{104b0} \x{1E922}\x{104D8} 0: \x{1e922}\x{104d8} - + /^(?:(\X)(?C))+$/utf \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where Callout 0: last capture = 1 @@ -4720,21 +4720,6 @@ # More differences from Perl -/^\p{Arabic}/utf -\= Expect no match - \x{650} -No match - \x{651} -No match - \x{652} -No match - \x{653} -No match - \x{654} -No match - \x{655} -No match - /^\p{Common}/utf \x{60c} 0: \x{60c} @@ -4949,4 +4934,86 @@ 0: \xc1\xe1 1: \xc1 +/\p{L&}+\p{bidi_control}/B +------------------------------------------------------------------ + Bra + prop Lc + + prop Bidicontrol + Ket + End +------------------------------------------------------------------ + +/\p{bidi_control}+\p{L&}/B +------------------------------------------------------------------ + Bra + prop Bidicontrol + + prop Lc + Ket + End +------------------------------------------------------------------ + +/\p{han}/B +------------------------------------------------------------------ + Bra + prop Han + Ket + End +------------------------------------------------------------------ + +/\p{script:han}/B +------------------------------------------------------------------ + Bra + prop script:Han + Ket + End +------------------------------------------------------------------ + +/\p{sc:han}/B +------------------------------------------------------------------ + Bra + prop script:Han + Ket + End +------------------------------------------------------------------ + +/\p{script extensions:han}/B +------------------------------------------------------------------ + Bra + prop Han
View file
_service:tar_scm:pcre2-10.39.tar.bz2/testdata/testoutput7 -> _service:tar_scm:pcre2-10.40.tar.bz2/testdata/testoutput7
Changed
@@ -1,5 +1,5 @@ # This set of tests checks UTF and Unicode property support with the DFA -# matching functionality of pcre_dfa_match(). A default subject modifier is +# matching functionality of pcre2_dfa_match(). A default subject modifier is # used to force DFA matching for all tests. #subject dfa @@ -3539,4 +3539,221 @@ 0: \x{100}\x{200}\x{300} <<<<<<< >>>>>>> +# ----------------------------------------------------------------------------- +# Tests for bidi control and bidi class properties + +/\p{ bidi_control }/utf + -->\x{202c}<-- + 0: \x{202c} + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + 1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c} + 2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b} + 3: \x{61c}\x{200e}\x{200f}\x{202a} + 4: \x{61c}\x{200e}\x{200f} + 5: \x{61c}\x{200e} + 6: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + 1: \x{2066}\x{2067}\x{2068} + 2: \x{2066}\x{2067} + 3: \x{2066} + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/\p{bidi_control}/utf + -->\x{202c}<-- + 0: \x{202c} + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + 1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c} + 2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b} + 3: \x{61c}\x{200e}\x{200f}\x{202a} + 4: \x{61c}\x{200e}\x{200f} + 5: \x{61c}\x{200e} + 6: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + 1: \x{2066}\x{2067}\x{2068} + 2: \x{2066}\x{2067} + 3: \x{2066} + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/\p{bidicontrol}<>+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: >\x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}< + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: >\x{2066}\x{2067}\x{2068}\x{2069}< + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + 0: \x{61d} + +/\p{bidi class = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + 1: \x{61d}\x{61e} + 2: \x{61d} + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{bidi class = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + 0: \x{602}\x{604} + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + 0: \x{0a}\x{0d}\x{1c}\x{1e}\x{85}\x{2029} + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + 0: \x{00}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff} + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + 0: ,.\x{60c}\x{ff1a} + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + 0: 09\x{b2}\x{2074}\x{1fbf9} + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + 0: +-\x{207a}\x{ff0d} + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + 0: # + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + 0: \x{2068} + +/\p{bidi class:L}+/utf + -->ABC<-- + 0: ABC + +/\P{bidi class:L}+/utf + -->ABC<-- + 0: --> + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + 0: \x{202a}\x{2066}\x{202d} + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + 0: \x{9bc}\x{a71}\x{e31} + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + 0: >!'()*;@\x{384}\x{2039}<= + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + 0: \x{202c}\x{2069} + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + 0: \x{590}\x{5c6}\x{200f}\x{10805} + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + 0: \x{202b}\x{2067}\x{202e} + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + 0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000} + +# ----------------------------------------------------------------------------- + +/\p{katakana}/utf + \x{30a1} + 0: \x{30a1} + \x{3001} + 0: \x{3001} + +/\p{scx:katakana}/utf + \x{30a1} + 0: \x{30a1} + \x{3001} + 0: \x{3001} + +/\p{script extensions:katakana}/utf + \x{30a1}
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2