From 91b2b6c65d544f39c45498ebafbab84b81d465b5 Mon Sep 17 00:00:00 2001 From: Anatol Belski Date: Tue, 18 Sep 2018 14:16:06 +0200 Subject: Upgrade PCRE2 to 10.32 (cherry picked from commit d918e0776b5168aed2707b0ca500589844f0faa8) --- ext/pcre/pcre2lib/pcre2_tables.c | 378 ++++++++++++++++++++------------------- 1 file changed, 196 insertions(+), 182 deletions(-) (limited to 'ext/pcre/pcre2lib/pcre2_tables.c') diff --git a/ext/pcre/pcre2lib/pcre2_tables.c b/ext/pcre/pcre2lib/pcre2_tables.c index 9f8dc293aa..83d6f9de55 100644 --- a/ext/pcre/pcre2lib/pcre2_tables.c +++ b/ext/pcre/pcre2lib/pcre2_tables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -137,9 +137,10 @@ const uint32_t PRIV(ucp_gentype)[] = { /* This table encodes the rules for finding the end of an extended grapheme cluster. Every code point has a grapheme break property which is one of the -ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by -the properties of two adjacent code points. The left property selects a word -from the table, and the right property selects a bit from that word like this: +ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions +10 and 11. The 2-dimensional table is indexed by the properties of two adjacent +code points. The left property selects a word from the table, and the right +property selects a bit from that word like this: PRIV(ucp_gbtable)[left-property] & (1 << right-property) @@ -166,49 +167,41 @@ are implementing). 6. Do not break after Prepend characters. -7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed - by E_Modifier). Extend characters are allowed before the modifier; this - cannot be represented in this table, the code has to deal with it. +7. Do not break within emoji modifier sequences or emoji zwj sequences. That + is, do not break between characters with the Extended_Pictographic property. + Extend and ZWJ characters are allowed between the characters; this cannot be + represented in this table, the code has to deal with it. -8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or - E_Base_GAZ). - -9. Do not break within emoji flag sequences. That is, do not break between +8. Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there are an odd number of RI characters before the break point. This table encodes "join RI characters"; the code has to deal with checking for previous adjoining RIs. -10. Otherwise, break everywhere. +9. Otherwise, break everywhere. */ #define ESZ (1<