diff options
| author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-05-20 09:56:07 +0000 |
|---|---|---|
| committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-05-20 09:56:07 +0000 |
| commit | 41386e9cb918eed93b3f13648cbef387e371e451 (patch) | |
| tree | a97f9d7bd1d9d091833286085f72da9d83fd0606 /Source/JavaScriptCore/yarr | |
| parent | e15dd966d523731101f70ccf768bba12435a0208 (diff) | |
| download | WebKitGtk-tarball-41386e9cb918eed93b3f13648cbef387e371e451.tar.gz | |
webkitgtk-2.4.9webkitgtk-2.4.9
Diffstat (limited to 'Source/JavaScriptCore/yarr')
| -rw-r--r-- | Source/JavaScriptCore/yarr/RegularExpression.cpp | 15 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/RegularExpression.h | 4 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/Yarr.h | 2 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp | 2 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h | 4 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js | 219 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrInterpreter.cpp | 18 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrInterpreter.h | 14 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrJIT.cpp | 43 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrJIT.h | 32 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrParser.h | 3 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrPattern.cpp | 45 | ||||
| -rw-r--r-- | Source/JavaScriptCore/yarr/YarrPattern.h | 77 |
13 files changed, 106 insertions, 372 deletions
diff --git a/Source/JavaScriptCore/yarr/RegularExpression.cpp b/Source/JavaScriptCore/yarr/RegularExpression.cpp index 0c7089654..b58ad393c 100644 --- a/Source/JavaScriptCore/yarr/RegularExpression.cpp +++ b/Source/JavaScriptCore/yarr/RegularExpression.cpp @@ -1,4 +1,3 @@ - /* * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. * Copyright (C) 2008 Collabora Ltd. @@ -13,10 +12,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -37,15 +36,15 @@ namespace JSC { namespace Yarr { class RegularExpression::Private : public RefCounted<RegularExpression::Private> { public: - static Ref<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) + static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) { - return adoptRef(*new Private(pattern, caseSensitivity, multilineMode)); + return adoptRef(new Private(pattern, caseSensitivity, multilineMode)); } int lastMatchLength; unsigned m_numSubpatterns; - std::unique_ptr<JSC::Yarr::BytecodePattern> m_regExpByteCode; + OwnPtr<JSC::Yarr::BytecodePattern> m_regExpByteCode; private: Private(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) @@ -55,7 +54,7 @@ private: { } - std::unique_ptr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) + PassOwnPtr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) { JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseInsensitive), (multilineMode == MultilineEnabled), &m_constructionError); if (m_constructionError) { @@ -179,7 +178,7 @@ void replace(String& string, const RegularExpression& target, const String& repl bool RegularExpression::isValid() const { - return d->m_regExpByteCode.get(); + return d->m_regExpByteCode; } } } // namespace JSC::Yarr diff --git a/Source/JavaScriptCore/yarr/RegularExpression.h b/Source/JavaScriptCore/yarr/RegularExpression.h index 3298f0bd8..08aaf353a 100644 --- a/Source/JavaScriptCore/yarr/RegularExpression.h +++ b/Source/JavaScriptCore/yarr/RegularExpression.h @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR diff --git a/Source/JavaScriptCore/yarr/Yarr.h b/Source/JavaScriptCore/yarr/Yarr.h index 463623ea2..d393e9fa9 100644 --- a/Source/JavaScriptCore/yarr/Yarr.h +++ b/Source/JavaScriptCore/yarr/Yarr.h @@ -43,7 +43,7 @@ namespace JSC { namespace Yarr { #define YarrStackSpaceForBackTrackInfoParentheses 2 static const unsigned quantifyInfinite = UINT_MAX; -static const unsigned offsetNoMatch = std::numeric_limits<unsigned>::max(); +static const unsigned offsetNoMatch = (unsigned)-1; // The below limit restricts the number of "recursive" match calls in order to // avoid spending exponential time on complex regular expressions. diff --git a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp index 52cb1a939..777b1cff1 100644 --- a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp +++ b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp @@ -49,7 +49,7 @@ const uint16_t ucs2CharacterSet13[] = { 0x03a6u, 0x03c6u, 0x03d5u, 0 }; const uint16_t ucs2CharacterSet14[] = { 0x1e60u, 0x1e61u, 0x1e9bu, 0 }; static const size_t UCS2_CANONICALIZATION_SETS = 15; -const uint16_t* const characterSetInfo[UCS2_CANONICALIZATION_SETS] = { +const uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = { ucs2CharacterSet0, ucs2CharacterSet1, ucs2CharacterSet2, diff --git a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h index d2df70720..fcc318673 100644 --- a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h +++ b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h @@ -27,7 +27,7 @@ #define YarrCanonicalizeUCS2_H #include <stdint.h> -#include <unicode/utypes.h> +#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { @@ -44,7 +44,7 @@ enum UCS2CanonicalizationType { }; struct UCS2CanonicalizationRange { uint16_t begin, end, value, type; }; extern const size_t UCS2_CANONICALIZATION_RANGES; -extern const uint16_t* const characterSetInfo[]; +extern const uint16_t* characterSetInfo[]; extern const UCS2CanonicalizationRange rangeInfo[]; // This table is similar to the full rangeInfo table, however this maps from UCS2 codepoints to diff --git a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js deleted file mode 100644 index 00361dd46..000000000 --- a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (C) 2012 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// See ES 5.1, 15.10.2.8 -function canonicalize(ch) -{ - var u = String.fromCharCode(ch).toUpperCase(); - if (u.length > 1) - return ch; - var cu = u.charCodeAt(0); - if (ch >= 128 && cu < 128) - return ch; - return cu; -} - -var MAX_UCS2 = 0xFFFF; -var MAX_LATIN = 0xFF; - -var groupedCanonically = []; -// Pass 1: populate groupedCanonically - this is mapping from canonicalized -// values back to the set of character code that canonicalize to them. -for (var i = 0; i <= MAX_UCS2; ++i) { - var ch = canonicalize(i); - if (!groupedCanonically[ch]) - groupedCanonically[ch] = []; - groupedCanonically[ch].push(i); -} - -var typeInfo = []; -var latinTypeInfo = []; -var characterSetInfo = []; -// Pass 2: populate typeInfo & characterSetInfo. For every character calculate -// a typeInfo value, described by the types above, and a value payload. -for (cu in groupedCanonically) { - // The set of characters that canonicalize to cu - var characters = groupedCanonically[cu]; - - // If there is only one, it is unique. - if (characters.length == 1) { - typeInfo[characters[0]] = "CanonicalizeUnique:0"; - latinTypeInfo[characters[0]] = characters[0] <= MAX_LATIN ? "CanonicalizeLatinSelf:0" : "CanonicalizeLatinInvalid:0"; - continue; - } - - // Sort the array. - characters.sort(function(x,y){return x-y;}); - - // If there are more than two characters, create an entry in characterSetInfo. - if (characters.length > 2) { - for (i in characters) - typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length; - characterSetInfo.push(characters); - - if (characters[1] <= MAX_LATIN) - throw new Error("sets with more than one latin character not supported!"); - if (characters[0] <= MAX_LATIN) { - for (i in characters) - latinTypeInfo[characters[i]] = "CanonicalizeLatinOther:" + characters[0]; - latinTypeInfo[characters[0]] = "CanonicalizeLatinSelf:0"; - } else { - for (i in characters) - latinTypeInfo[characters[i]] = "CanonicalizeLatinInvalid:0"; - } - - continue; - } - - // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner. - var lo = characters[0]; - var hi = characters[1]; - var delta = hi - lo; - if (delta == 1) { - var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0"; - typeInfo[lo] = type; - typeInfo[hi] = type; - } else { - typeInfo[lo] = "CanonicalizeRangeLo:" + delta; - typeInfo[hi] = "CanonicalizeRangeHi:" + delta; - } - - if (lo > MAX_LATIN) { - latinTypeInfo[lo] = "CanonicalizeLatinInvalid:0"; - latinTypeInfo[hi] = "CanonicalizeLatinInvalid:0"; - } else if (hi > MAX_LATIN) { - latinTypeInfo[lo] = "CanonicalizeLatinSelf:0"; - latinTypeInfo[hi] = "CanonicalizeLatinOther:" + lo; - } else { - if (delta != 0x20 || lo & 0x20) - throw new Error("pairs of latin characters that don't mask with 0x20 not supported!"); - latinTypeInfo[lo] = "CanonicalizeLatinMask0x20:0"; - latinTypeInfo[hi] = "CanonicalizeLatinMask0x20:0"; - } -} - -var rangeInfo = []; -// Pass 3: coallesce types into ranges. -for (var end = 0; end <= MAX_UCS2; ++end) { - var begin = end; - var type = typeInfo[end]; - while (end < MAX_UCS2 && typeInfo[end + 1] == type) - ++end; - rangeInfo.push({begin:begin, end:end, type:type}); -} - -var latinRangeInfo = []; -// Pass 4: coallesce latin-1 types into ranges. -for (var end = 0; end <= MAX_UCS2; ++end) { - var begin = end; - var type = latinTypeInfo[end]; - while (end < MAX_UCS2 && latinTypeInfo[end + 1] == type) - ++end; - latinRangeInfo.push({begin:begin, end:end, type:type}); -} - - -// Helper function to convert a number to a fixed width hex representation of a C uint16_t. -function hex(x) -{ - var s = Number(x).toString(16); - while (s.length < 4) - s = 0 + s; - return "0x" + s + "u"; -} - -var copyright = ( - "/*" + "\n" + - " * Copyright (C) 2012 Apple Inc. All rights reserved." + "\n" + - " *" + "\n" + - " * Redistribution and use in source and binary forms, with or without" + "\n" + - " * modification, are permitted provided that the following conditions" + "\n" + - " * are met:" + "\n" + - " * 1. Redistributions of source code must retain the above copyright" + "\n" + - " * notice, this list of conditions and the following disclaimer." + "\n" + - " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" + - " * notice, this list of conditions and the following disclaimer in the" + "\n" + - " * documentation and/or other materials provided with the distribution." + "\n" + - " *" + "\n" + - " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" + - " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" + - " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" + - " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" + - " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" + - " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" + - " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" + - " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" + - " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" + - " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" + - " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" + - " */"); - -print(copyright); -print(); -print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUCS2.js"); -print(); -print('#include "config.h"'); -print('#include "YarrCanonicalizeUCS2.h"'); -print(); -print("namespace JSC { namespace Yarr {"); -print(); -print("#include <stdint.h>"); -print(); - -for (i in characterSetInfo) { - var characters = "" - var set = characterSetInfo[i]; - for (var j in set) - characters += hex(set[j]) + ", "; - print("uint16_t ucs2CharacterSet" + i + "[] = { " + characters + "0 };"); -} -print(); -print("static const size_t UCS2_CANONICALIZATION_SETS = " + characterSetInfo.length + ";"); -print("uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = {"); -for (i in characterSetInfo) -print(" ucs2CharacterSet" + i + ","); -print("};"); -print(); -print("const size_t UCS2_CANONICALIZATION_RANGES = " + rangeInfo.length + ";"); -print("UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = {"); -for (i in rangeInfo) { - var info = rangeInfo[i]; - var typeAndValue = info.type.split(':'); - print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); -} -print("};"); -print(); -print("const size_t LATIN_CANONICALIZATION_RANGES = " + latinRangeInfo.length + ";"); -print("LatinCanonicalizationRange latinRangeInfo[LATIN_CANONICALIZATION_RANGES] = {"); -for (i in latinRangeInfo) { - var info = latinRangeInfo[i]; - var typeAndValue = info.type.split(':'); - print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); -} -print("};"); -print(); -print("} } // JSC::Yarr"); -print(); - diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp index d45096ce3..8645b5f20 100644 --- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp +++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp @@ -154,7 +154,7 @@ public: ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term) { - size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t); + size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); allocatorPool = allocatorPool->ensureCapacity(size); RELEASE_ASSERT(allocatorPool); return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); @@ -1472,13 +1472,13 @@ public: m_currentAlternativeIndex = 0; } - std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator) + PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator) { regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough()); emitDisjunction(m_pattern.m_body); regexEnd(); - return std::make_unique<BytecodePattern>(WTF::move(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator); + return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator)); } void checkInput(unsigned count) @@ -1712,7 +1712,7 @@ public: unsigned subpatternId = parenthesesBegin.atom.subpatternId; unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; - auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize); + OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize)); unsigned firstTermInParentheses = beginTerm + 1; parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2); @@ -1725,7 +1725,7 @@ public: m_bodyDisjunction->terms.shrink(beginTerm); m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition)); - m_allParenthesesInfo.append(WTF::move(parenthesesDisjunction)); + m_allParenthesesInfo.append(parenthesesDisjunction.release()); m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; @@ -1778,7 +1778,7 @@ public: void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough) { - m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize); + m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize)); m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough)); m_bodyDisjunction->terms[0].frameLocation = 0; m_currentAlternativeIndex = 0; @@ -1920,13 +1920,13 @@ public: private: YarrPattern& m_pattern; - std::unique_ptr<ByteDisjunction> m_bodyDisjunction; + OwnPtr<ByteDisjunction> m_bodyDisjunction; unsigned m_currentAlternativeIndex; Vector<ParenthesesStackEntry> m_parenthesesStack; - Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo; + Vector<OwnPtr<ByteDisjunction>> m_allParenthesesInfo; }; -std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) +PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) { return ByteCompiler(pattern).compile(allocator); } diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.h b/Source/JavaScriptCore/yarr/YarrInterpreter.h index e3dea2467..f37309436 100644 --- a/Source/JavaScriptCore/yarr/YarrInterpreter.h +++ b/Source/JavaScriptCore/yarr/YarrInterpreter.h @@ -27,6 +27,8 @@ #define YarrInterpreter_h #include "YarrPattern.h" +#include <wtf/PassOwnPtr.h> +#include <wtf/unicode/Unicode.h> namespace WTF { class BumpPointerAllocator; @@ -335,8 +337,8 @@ public: struct BytecodePattern { WTF_MAKE_FAST_ALLOCATED; public: - BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator) - : m_body(WTF::move(body)) + BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<OwnPtr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator) + : m_body(body) , m_ignoreCase(pattern.m_ignoreCase) , m_multiline(pattern.m_multiline) , m_allocator(allocator) @@ -353,7 +355,7 @@ public: m_userCharacterClasses.shrinkToFit(); } - std::unique_ptr<ByteDisjunction> m_body; + OwnPtr<ByteDisjunction> m_body; bool m_ignoreCase; bool m_multiline; // Each BytecodePattern is associated with a RegExp, each RegExp is associated @@ -364,11 +366,11 @@ public: CharacterClass* wordcharCharacterClass; private: - Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo; - Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; + Vector<OwnPtr<ByteDisjunction>> m_allParenthesesInfo; + Vector<OwnPtr<CharacterClass>> m_userCharacterClasses; }; -JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*); +JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*); JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output); unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output); unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output); diff --git a/Source/JavaScriptCore/yarr/YarrJIT.cpp b/Source/JavaScriptCore/yarr/YarrJIT.cpp index 2229da767..364a72dd8 100644 --- a/Source/JavaScriptCore/yarr/YarrJIT.cpp +++ b/Source/JavaScriptCore/yarr/YarrJIT.cpp @@ -1639,14 +1639,16 @@ class YarrGenerator : private MacroAssembler { const RegisterID indexTemporary = regT0; ASSERT(term->quantityCount == 1); +#ifndef NDEBUG // Runtime ASSERT to make sure that the nested alternative handled the // "no input consumed" check. - if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { + if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { Jump pastBreakpoint; pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); - abortWithReason(YARRNoInputConsumed); + breakpoint(); pastBreakpoint.link(this); } +#endif // If the parenthese are capturing, store the ending index value to the // captures array, offsetting as necessary. @@ -1693,16 +1695,16 @@ class YarrGenerator : private MacroAssembler { } case OpParenthesesSubpatternTerminalEnd: { YarrOp& beginOp = m_ops[op.m_previousOp]; - if (!ASSERT_DISABLED) { - PatternTerm* term = op.m_term; - - // Runtime ASSERT to make sure that the nested alternative handled the - // "no input consumed" check. - Jump pastBreakpoint; - pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); - abortWithReason(YARRNoInputConsumed); - pastBreakpoint.link(this); - } +#ifndef NDEBUG + PatternTerm* term = op.m_term; + + // Runtime ASSERT to make sure that the nested alternative handled the + // "no input consumed" check. + Jump pastBreakpoint; + pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); + breakpoint(); + pastBreakpoint.link(this); +#endif // We know that the match is non-zero, we can accept it and // loop back up to the head of the subpattern. @@ -2344,7 +2346,7 @@ class YarrGenerator : private MacroAssembler { m_ops.append(alternativeBeginOpCode); m_ops.last().m_previousOp = notFound; m_ops.last().m_term = term; - Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; + Vector<OwnPtr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; for (unsigned i = 0; i < alternatives.size(); ++i) { size_t lastOpIndex = m_ops.size() - 1; @@ -2395,7 +2397,7 @@ class YarrGenerator : private MacroAssembler { m_ops.append(OpSimpleNestedAlternativeBegin); m_ops.last().m_previousOp = notFound; m_ops.last().m_term = term; - Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; + Vector<OwnPtr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; for (unsigned i = 0; i < alternatives.size(); ++i) { size_t lastOpIndex = m_ops.size() - 1; @@ -2469,7 +2471,7 @@ class YarrGenerator : private MacroAssembler { // to return the failing result. void opCompileBody(PatternDisjunction* disjunction) { - Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives; + Vector<OwnPtr<PatternAlternative>>& alternatives = disjunction->m_alternatives; size_t currentAlternativeIndex = 0; // Emit the 'once through' alternatives. @@ -2643,8 +2645,11 @@ public: initCallFrame(); + // Compile the pattern to the internal 'YarrOp' representation. opCompileBody(m_pattern.m_body); + // If we encountered anything we can't handle in the JIT code + // (e.g. backreferences) then return early. if (m_shouldFallBack) { jitObject.setFallBack(true); return; @@ -2653,12 +2658,8 @@ public: generate(); backtrack(); - LinkBuffer linkBuffer(*vm, *this, REGEXP_CODE_ID, JITCompilationCanFail); - if (linkBuffer.didFailToAllocate()) { - jitObject.setFallBack(true); - return; - } - + // Link & finalize the code. + LinkBuffer linkBuffer(*vm, this, REGEXP_CODE_ID); m_backtrackingState.linkDataLabels(linkBuffer); if (compileMode == MatchOnly) { diff --git a/Source/JavaScriptCore/yarr/YarrJIT.h b/Source/JavaScriptCore/yarr/YarrJIT.h index 8ce1bf1b6..e7b222a8e 100644 --- a/Source/JavaScriptCore/yarr/YarrJIT.h +++ b/Source/JavaScriptCore/yarr/YarrJIT.h @@ -108,37 +108,7 @@ public: } #if ENABLE(REGEXP_TRACING) - void *get8BitMatchOnlyAddr() - { - if (!has8BitCodeMatchOnly()) - return 0; - - return m_matchOnly8.code().executableAddress(); - } - - void *get16BitMatchOnlyAddr() - { - if (!has16BitCodeMatchOnly()) - return 0; - - return m_matchOnly16.code().executableAddress(); - } - - void *get8BitMatchAddr() - { - if (!has8BitCode()) - return 0; - - return m_ref8.code().executableAddress(); - } - - void *get16BitMatchAddr() - { - if (!has16BitCode()) - return 0; - - return m_ref16.code().executableAddress(); - } + void *getAddr() { return m_ref.code().executableAddress(); } #endif void clear() diff --git a/Source/JavaScriptCore/yarr/YarrParser.h b/Source/JavaScriptCore/yarr/YarrParser.h index 761acb557..366aa40d3 100644 --- a/Source/JavaScriptCore/yarr/YarrParser.h +++ b/Source/JavaScriptCore/yarr/YarrParser.h @@ -29,6 +29,7 @@ #include "Yarr.h" #include <wtf/ASCIICType.h> #include <wtf/text/WTFString.h> +#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { @@ -232,7 +233,7 @@ private: : m_delegate(delegate) , m_backReferenceLimit(backReferenceLimit) , m_err(NoError) - , m_data(pattern.characters<CharType>()) + , m_data(pattern.getCharacters<CharType>()) , m_size(pattern.length()) , m_index(0) , m_parenthesesNestingDepth(0) diff --git a/Source/JavaScriptCore/yarr/YarrPattern.cpp b/Source/JavaScriptCore/yarr/YarrPattern.cpp index 34c377a54..7ed9d3c30 100644 --- a/Source/JavaScriptCore/yarr/YarrPattern.cpp +++ b/Source/JavaScriptCore/yarr/YarrPattern.cpp @@ -175,16 +175,16 @@ public: } - std::unique_ptr<CharacterClass> charClass() + PassOwnPtr<CharacterClass> charClass() { - auto characterClass = std::make_unique<CharacterClass>(); + OwnPtr<CharacterClass> characterClass = adoptPtr(new CharacterClass); characterClass->m_matches.swap(m_matches); characterClass->m_ranges.swap(m_ranges); characterClass->m_matchesUnicode.swap(m_matchesUnicode); characterClass->m_rangesUnicode.swap(m_rangesUnicode); - return characterClass; + return characterClass.release(); } private: @@ -274,10 +274,10 @@ public: , m_characterClassConstructor(pattern.m_ignoreCase) , m_invertParentheticalAssertion(false) { - auto body = std::make_unique<PatternDisjunction>(); + OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction); m_pattern.m_body = body.get(); m_alternative = body->addNewAlternative(); - m_pattern.m_disjunctions.append(WTF::move(body)); + m_pattern.m_disjunctions.append(body.release()); } ~YarrPatternConstructor() @@ -289,15 +289,15 @@ public: m_pattern.reset(); m_characterClassConstructor.reset(); - auto body = std::make_unique<PatternDisjunction>(); + OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction); m_pattern.m_body = body.get(); m_alternative = body->addNewAlternative(); - m_pattern.m_disjunctions.append(WTF::move(body)); + m_pattern.m_disjunctions.append(body.release()); } void assertionBOL() { - if (!m_alternative->m_terms.size() && !m_invertParentheticalAssertion) { + if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) { m_alternative->m_startsWithBOL = true; m_alternative->m_containsBOL = true; m_pattern.m_containsBOL = true; @@ -329,9 +329,9 @@ public: } m_characterClassConstructor.putUnicodeIgnoreCase(ch, info); - auto newCharacterClass = m_characterClassConstructor.charClass(); + OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass(); m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), false)); - m_pattern.m_userCharacterClasses.append(WTF::move(newCharacterClass)); + m_pattern.m_userCharacterClasses.append(newCharacterClass.release()); } void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert) @@ -391,9 +391,9 @@ public: void atomCharacterClassEnd() { - auto newCharacterClass = m_characterClassConstructor.charClass(); + OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass(); m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), m_invertCharacterClass)); - m_pattern.m_userCharacterClasses.append(WTF::move(newCharacterClass)); + m_pattern.m_userCharacterClasses.append(newCharacterClass.release()); } void atomParenthesesSubpatternBegin(bool capture = true) @@ -402,19 +402,19 @@ public: if (capture) m_pattern.m_numSubpatterns++; - auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative); + OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative)); m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, false)); m_alternative = parenthesesDisjunction->addNewAlternative(); - m_pattern.m_disjunctions.append(WTF::move(parenthesesDisjunction)); + m_pattern.m_disjunctions.append(parenthesesDisjunction.release()); } void atomParentheticalAssertionBegin(bool invert = false) { - auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative); + OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative)); m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction.get(), false, invert)); m_alternative = parenthesesDisjunction->addNewAlternative(); m_invertParentheticalAssertion = invert; - m_pattern.m_disjunctions.append(WTF::move(parenthesesDisjunction)); + m_pattern.m_disjunctions.append(parenthesesDisjunction.release()); } void atomParenthesesEnd() @@ -479,12 +479,12 @@ public: // skip alternatives with m_startsWithBOL set true. PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false) { - std::unique_ptr<PatternDisjunction> newDisjunction; + OwnPtr<PatternDisjunction> newDisjunction; for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { PatternAlternative* alternative = disjunction->m_alternatives[alt].get(); if (!filterStartsWithBOL || !alternative->m_startsWithBOL) { if (!newDisjunction) { - newDisjunction = std::make_unique<PatternDisjunction>(); + newDisjunction = adoptPtr(new PatternDisjunction()); newDisjunction->m_parent = disjunction->m_parent; } PatternAlternative* newAlternative = newDisjunction->addNewAlternative(); @@ -498,7 +498,7 @@ public: return 0; PatternDisjunction* copiedDisjunction = newDisjunction.get(); - m_pattern.m_disjunctions.append(WTF::move(newDisjunction)); + m_pattern.m_disjunctions.append(newDisjunction.release()); return copiedDisjunction; } @@ -666,8 +666,6 @@ public: minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize); maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize); hasFixedSize &= alternative->m_hasFixedSize; - if (alternative->m_minimumSize > INT_MAX) - m_pattern.m_containsUnsignedLengthPattern = true; } ASSERT(minimumInputSize != UINT_MAX); @@ -698,7 +696,7 @@ public: if (m_pattern.m_numSubpatterns) return; - Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives; + Vector<OwnPtr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives; for (size_t i = 0; i < alternatives.size(); ++i) { Vector<PatternTerm>& terms = alternatives[i]->m_terms; if (terms.size()) { @@ -768,7 +766,7 @@ public: // beginning and the end of the match. void optimizeDotStarWrappedExpressions() { - Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives; + Vector<OwnPtr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives; if (alternatives.size() != 1) return; @@ -866,7 +864,6 @@ YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, , m_multiline(multiline) , m_containsBackreferences(false) , m_containsBOL(false) - , m_containsUnsignedLengthPattern(false) , m_numSubpatterns(0) , m_maxBackReference(0) , newlineCached(0) diff --git a/Source/JavaScriptCore/yarr/YarrPattern.h b/Source/JavaScriptCore/yarr/YarrPattern.h index 5482de5af..d42b0f979 100644 --- a/Source/JavaScriptCore/yarr/YarrPattern.h +++ b/Source/JavaScriptCore/yarr/YarrPattern.h @@ -28,9 +28,12 @@ #define YarrPattern_h #include <wtf/CheckedArithmetic.h> +#include <wtf/OwnPtr.h> +#include <wtf/PassOwnPtr.h> #include <wtf/RefCounted.h> #include <wtf/Vector.h> #include <wtf/text/WTFString.h> +#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { @@ -267,11 +270,12 @@ public: PatternAlternative* addNewAlternative() { - m_alternatives.append(std::make_unique<PatternAlternative>(this)); - return static_cast<PatternAlternative*>(m_alternatives.last().get()); + PatternAlternative* alternative = new PatternAlternative(this); + m_alternatives.append(adoptPtr(alternative)); + return alternative; } - Vector<std::unique_ptr<PatternAlternative>> m_alternatives; + Vector<OwnPtr<PatternAlternative>> m_alternatives; PatternAlternative* m_parent; unsigned m_minimumSize; unsigned m_callFrameSize; @@ -282,13 +286,13 @@ public: // (please to be calling newlineCharacterClass() et al on your // friendly neighborhood YarrPattern instance to get nicely // cached copies). -std::unique_ptr<CharacterClass> newlineCreate(); -std::unique_ptr<CharacterClass> digitsCreate(); -std::unique_ptr<CharacterClass> spacesCreate(); -std::unique_ptr<CharacterClass> wordcharCreate(); -std::unique_ptr<CharacterClass> nondigitsCreate(); -std::unique_ptr<CharacterClass> nonspacesCreate(); -std::unique_ptr<CharacterClass> nonwordcharCreate(); +CharacterClass* newlineCreate(); +CharacterClass* digitsCreate(); +CharacterClass* spacesCreate(); +CharacterClass* wordcharCreate(); +CharacterClass* nondigitsCreate(); +CharacterClass* nonspacesCreate(); +CharacterClass* nonwordcharCreate(); struct TermChain { TermChain(PatternTerm term) @@ -309,7 +313,6 @@ struct YarrPattern { m_containsBackreferences = false; m_containsBOL = false; - m_containsUnsignedLengthPattern = false; newlineCached = 0; digitsCached = 0; @@ -328,65 +331,46 @@ struct YarrPattern { return m_maxBackReference > m_numSubpatterns; } - bool containsUnsignedLengthPattern() - { - return m_containsUnsignedLengthPattern; - } - CharacterClass* newlineCharacterClass() { - if (!newlineCached) { - m_userCharacterClasses.append(newlineCreate()); - newlineCached = m_userCharacterClasses.last().get(); - } + if (!newlineCached) + m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate())); return newlineCached; } CharacterClass* digitsCharacterClass() { - if (!digitsCached) { - m_userCharacterClasses.append(digitsCreate()); - digitsCached = m_userCharacterClasses.last().get(); - } + if (!digitsCached) + m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate())); return digitsCached; } CharacterClass* spacesCharacterClass() { - if (!spacesCached) { - m_userCharacterClasses.append(spacesCreate()); - spacesCached = m_userCharacterClasses.last().get(); - } + if (!spacesCached) + m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate())); return spacesCached; } CharacterClass* wordcharCharacterClass() { - if (!wordcharCached) { - m_userCharacterClasses.append(wordcharCreate()); - wordcharCached = m_userCharacterClasses.last().get(); - } + if (!wordcharCached) + m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate())); return wordcharCached; } CharacterClass* nondigitsCharacterClass() { - if (!nondigitsCached) { - m_userCharacterClasses.append(nondigitsCreate()); - nondigitsCached = m_userCharacterClasses.last().get(); - } + if (!nondigitsCached) + m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate())); return nondigitsCached; } CharacterClass* nonspacesCharacterClass() { - if (!nonspacesCached) { - m_userCharacterClasses.append(nonspacesCreate()); - nonspacesCached = m_userCharacterClasses.last().get(); - } + if (!nonspacesCached) + m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate())); return nonspacesCached; } CharacterClass* nonwordcharCharacterClass() { - if (!nonwordcharCached) { - m_userCharacterClasses.append(nonwordcharCreate()); - nonwordcharCached = m_userCharacterClasses.last().get(); - } + if (!nonwordcharCached) + m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate())); return nonwordcharCached; } @@ -394,12 +378,11 @@ struct YarrPattern { bool m_multiline : 1; bool m_containsBackreferences : 1; bool m_containsBOL : 1; - bool m_containsUnsignedLengthPattern : 1; unsigned m_numSubpatterns; unsigned m_maxBackReference; PatternDisjunction* m_body; - Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions; - Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; + Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions; + Vector<OwnPtr<CharacterClass>> m_userCharacterClasses; private: const char* compile(const String& patternString); |
