summaryrefslogtreecommitdiff
path: root/Source/JavaScriptCore/yarr/RegularExpression.cpp
diff options
context:
space:
mode:
authorKonstantin Tokarev <annulen@yandex.ru>2016-08-25 19:20:41 +0300
committerKonstantin Tokarev <annulen@yandex.ru>2017-02-02 12:30:55 +0000
commit6882a04fb36642862b11efe514251d32070c3d65 (patch)
treeb7959826000b061fd5ccc7512035c7478742f7b0 /Source/JavaScriptCore/yarr/RegularExpression.cpp
parentab6df191029eeeb0b0f16f127d553265659f739e (diff)
downloadqtwebkit-6882a04fb36642862b11efe514251d32070c3d65.tar.gz
Imported QtWebKit TP3 (git b57bc6801f1876c3220d5a4bfea33d620d477443)
Change-Id: I3b1d8a2808782c9f34d50240000e20cb38d3680f Reviewed-by: Konstantin Tokarev <annulen@yandex.ru>
Diffstat (limited to 'Source/JavaScriptCore/yarr/RegularExpression.cpp')
-rw-r--r--Source/JavaScriptCore/yarr/RegularExpression.cpp185
1 files changed, 185 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/yarr/RegularExpression.cpp b/Source/JavaScriptCore/yarr/RegularExpression.cpp
new file mode 100644
index 000000000..0c7089654
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/RegularExpression.cpp
@@ -0,0 +1,185 @@
+
+/*
+ * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2008 Collabora Ltd.
+ * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegularExpression.h"
+
+#include "Yarr.h"
+#include <wtf/Assertions.h>
+#include <wtf/BumpPointerAllocator.h>
+
+namespace JSC { namespace Yarr {
+
+class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
+public:
+ static Ref<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
+ {
+ return adoptRef(*new Private(pattern, caseSensitivity, multilineMode));
+ }
+
+ int lastMatchLength;
+
+ unsigned m_numSubpatterns;
+ std::unique_ptr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
+
+private:
+ Private(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
+ : lastMatchLength(-1)
+ , m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode))
+ , m_constructionError(nullptr)
+ {
+ }
+
+ std::unique_ptr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
+ {
+ JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseInsensitive), (multilineMode == MultilineEnabled), &m_constructionError);
+ if (m_constructionError) {
+ LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError);
+ return nullptr;
+ }
+
+ m_numSubpatterns = pattern.m_numSubpatterns;
+
+ return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
+ }
+
+ BumpPointerAllocator m_regexAllocator;
+ const char* m_constructionError;
+};
+
+RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
+ : d(Private::create(pattern, caseSensitivity, multilineMode))
+{
+}
+
+RegularExpression::RegularExpression(const RegularExpression& re)
+ : d(re.d)
+{
+}
+
+RegularExpression::~RegularExpression()
+{
+}
+
+RegularExpression& RegularExpression::operator=(const RegularExpression& re)
+{
+ d = re.d;
+ return *this;
+}
+
+int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
+{
+ if (!d->m_regExpByteCode)
+ return -1;
+
+ if (str.isNull())
+ return -1;
+
+ int offsetVectorSize = (d->m_numSubpatterns + 1) * 2;
+ unsigned* offsetVector;
+ Vector<unsigned, 32> nonReturnedOvector;
+
+ nonReturnedOvector.resize(offsetVectorSize);
+ offsetVector = nonReturnedOvector.data();
+
+ ASSERT(offsetVector);
+ for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
+ offsetVector[j] = JSC::Yarr::offsetNoMatch;
+
+ unsigned result;
+ if (str.length() <= INT_MAX)
+ result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str, startFrom, offsetVector);
+ else {
+ // This code can't handle unsigned offsets. Limit our processing to strings with offsets that
+ // can be represented as ints.
+ result = JSC::Yarr::offsetNoMatch;
+ }
+
+ if (result == JSC::Yarr::offsetNoMatch) {
+ d->lastMatchLength = -1;
+ return -1;
+ }
+
+ // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
+ d->lastMatchLength = offsetVector[1] - offsetVector[0];
+ if (matchLength)
+ *matchLength = d->lastMatchLength;
+ return offsetVector[0];
+}
+
+int RegularExpression::searchRev(const String& str) const
+{
+ // FIXME: This could be faster if it actually searched backwards.
+ // Instead, it just searches forwards, multiple times until it finds the last match.
+
+ int start = 0;
+ int pos;
+ int lastPos = -1;
+ int lastMatchLength = -1;
+ do {
+ int matchLength;
+ pos = match(str, start, &matchLength);
+ if (pos >= 0) {
+ if (pos + matchLength > lastPos + lastMatchLength) {
+ // replace last match if this one is later and not a subset of the last match
+ lastPos = pos;
+ lastMatchLength = matchLength;
+ }
+ start = pos + 1;
+ }
+ } while (pos != -1);
+ d->lastMatchLength = lastMatchLength;
+ return lastPos;
+}
+
+int RegularExpression::matchedLength() const
+{
+ return d->lastMatchLength;
+}
+
+void replace(String& string, const RegularExpression& target, const String& replacement)
+{
+ int index = 0;
+ while (index < static_cast<int>(string.length())) {
+ int matchLength;
+ index = target.match(string, index, &matchLength);
+ if (index < 0)
+ break;
+ string.replace(index, matchLength, replacement);
+ index += replacement.length();
+ if (!matchLength)
+ break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
+ }
+}
+
+bool RegularExpression::isValid() const
+{
+ return d->m_regExpByteCode.get();
+}
+
+} } // namespace JSC::Yarr