From 40736c5763bf61337c8c14e16d8587db021a87d4 Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Fri, 6 Jan 2012 14:44:00 +0100 Subject: Imported WebKit commit 2ea9d364d0f6efa8fa64acf19f451504c59be0e4 (http://svn.webkit.org/repository/webkit/trunk@104285) --- Source/JavaScriptCore/wtf/text/ASCIIFastPath.h | 101 ++ Source/JavaScriptCore/wtf/text/AtomicString.cpp | 320 ++++ Source/JavaScriptCore/wtf/text/AtomicString.h | 215 +++ Source/JavaScriptCore/wtf/text/AtomicStringHash.h | 62 + Source/JavaScriptCore/wtf/text/AtomicStringImpl.h | 38 + Source/JavaScriptCore/wtf/text/CString.cpp | 104 ++ Source/JavaScriptCore/wtf/text/CString.h | 87 ++ Source/JavaScriptCore/wtf/text/StringBuffer.h | 87 ++ Source/JavaScriptCore/wtf/text/StringBuilder.cpp | 301 ++++ Source/JavaScriptCore/wtf/text/StringBuilder.h | 234 +++ Source/JavaScriptCore/wtf/text/StringConcatenate.h | 964 ++++++++++++ Source/JavaScriptCore/wtf/text/StringHash.h | 184 +++ Source/JavaScriptCore/wtf/text/StringImpl.cpp | 1541 ++++++++++++++++++++ Source/JavaScriptCore/wtf/text/StringImpl.h | 774 ++++++++++ Source/JavaScriptCore/wtf/text/StringOperators.h | 150 ++ Source/JavaScriptCore/wtf/text/StringStatics.cpp | 92 ++ Source/JavaScriptCore/wtf/text/TextPosition.h | 85 ++ Source/JavaScriptCore/wtf/text/WTFString.cpp | 1126 ++++++++++++++ Source/JavaScriptCore/wtf/text/WTFString.h | 648 ++++++++ 19 files changed, 7113 insertions(+) create mode 100644 Source/JavaScriptCore/wtf/text/ASCIIFastPath.h create mode 100644 Source/JavaScriptCore/wtf/text/AtomicString.cpp create mode 100644 Source/JavaScriptCore/wtf/text/AtomicString.h create mode 100644 Source/JavaScriptCore/wtf/text/AtomicStringHash.h create mode 100644 Source/JavaScriptCore/wtf/text/AtomicStringImpl.h create mode 100644 Source/JavaScriptCore/wtf/text/CString.cpp create mode 100644 Source/JavaScriptCore/wtf/text/CString.h create mode 100644 Source/JavaScriptCore/wtf/text/StringBuffer.h create mode 100644 Source/JavaScriptCore/wtf/text/StringBuilder.cpp create mode 100644 Source/JavaScriptCore/wtf/text/StringBuilder.h create mode 100644 Source/JavaScriptCore/wtf/text/StringConcatenate.h create mode 100644 Source/JavaScriptCore/wtf/text/StringHash.h create mode 100644 Source/JavaScriptCore/wtf/text/StringImpl.cpp create mode 100644 Source/JavaScriptCore/wtf/text/StringImpl.h create mode 100644 Source/JavaScriptCore/wtf/text/StringOperators.h create mode 100644 Source/JavaScriptCore/wtf/text/StringStatics.cpp create mode 100644 Source/JavaScriptCore/wtf/text/TextPosition.h create mode 100644 Source/JavaScriptCore/wtf/text/WTFString.cpp create mode 100644 Source/JavaScriptCore/wtf/text/WTFString.h (limited to 'Source/JavaScriptCore/wtf/text') diff --git a/Source/JavaScriptCore/wtf/text/ASCIIFastPath.h b/Source/JavaScriptCore/wtf/text/ASCIIFastPath.h new file mode 100644 index 000000000..ace1a687d --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/ASCIIFastPath.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2011 Apple Inc. All rights reserved. + * Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef ASCIIFastPath_h +#define ASCIIFastPath_h + +#include +#include + +namespace WTF { + +// Assuming that a pointer is the size of a "machine word", then +// uintptr_t is an integer type that is also a machine word. +typedef uintptr_t MachineWord; +const uintptr_t machineWordAlignmentMask = sizeof(MachineWord) - 1; + +inline bool isAlignedToMachineWord(const void* pointer) +{ + return !(reinterpret_cast(pointer) & machineWordAlignmentMask); +} + +template inline T* alignToMachineWord(T* pointer) +{ + return reinterpret_cast(reinterpret_cast(pointer) & ~machineWordAlignmentMask); +} + +template struct NonASCIIMask; +template<> struct NonASCIIMask<4, UChar> { + static inline uint32_t value() { return 0xFF80FF80U; } +}; +template<> struct NonASCIIMask<4, LChar> { + static inline uint32_t value() { return 0x80808080U; } +}; +template<> struct NonASCIIMask<8, UChar> { + static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } +}; +template<> struct NonASCIIMask<8, LChar> { + static inline uint64_t value() { return 0x8080808080808080ULL; } +}; + + +template +inline bool isAllASCII(MachineWord word) +{ + return !(word & NonASCIIMask::value()); +} + +// Note: This function assume the input is likely all ASCII, and +// does not leave early if it is not the case. +template +inline bool charactersAreAllASCII(const CharacterType* characters, size_t length) +{ + MachineWord allCharBits = 0; + const CharacterType* end = characters + length; + + // Prologue: align the input. + while (!isAlignedToMachineWord(characters) && characters != end) { + allCharBits |= *characters; + ++characters; + } + + // Compare the values of CPU word size. + const CharacterType* wordEnd = alignToMachineWord(end); + const size_t loopIncrement = sizeof(MachineWord) / sizeof(CharacterType); + while (characters < wordEnd) { + allCharBits |= *(reinterpret_cast(characters)); + characters += loopIncrement; + } + + // Process the remaining bytes. + while (characters != end) { + allCharBits |= *characters; + ++characters; + } + + MachineWord nonASCIIBitMask = NonASCIIMask::value(); + return !(allCharBits & nonASCIIBitMask); +} + + +} // namespace WTF + +#endif // ASCIIFastPath_h diff --git a/Source/JavaScriptCore/wtf/text/AtomicString.cpp b/Source/JavaScriptCore/wtf/text/AtomicString.cpp new file mode 100644 index 000000000..966879827 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/AtomicString.cpp @@ -0,0 +1,320 @@ +/* + * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + * Copyright (C) 2010 Patrick Gansterer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" + +#include "AtomicString.h" + +#include "StringHash.h" +#include +#include +#include +#include + +namespace WTF { + +using namespace Unicode; + +COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); + +class AtomicStringTable { +public: + static AtomicStringTable* create() + { + AtomicStringTable* table = new AtomicStringTable; + + WTFThreadData& data = wtfThreadData(); + data.m_atomicStringTable = table; + data.m_atomicStringTableDestructor = AtomicStringTable::destroy; + + return table; + } + + HashSet& table() + { + return m_table; + } + +private: + static void destroy(AtomicStringTable* table) + { + HashSet::iterator end = table->m_table.end(); + for (HashSet::iterator iter = table->m_table.begin(); iter != end; ++iter) + (*iter)->setIsAtomic(false); + delete table; + } + + HashSet m_table; +}; + +static inline HashSet& stringTable() +{ + // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). + AtomicStringTable* table = wtfThreadData().atomicStringTable(); + if (UNLIKELY(!table)) + table = AtomicStringTable::create(); + return table->table(); +} + +template +static inline PassRefPtr addToStringTable(const T& value) +{ + pair::iterator, bool> addResult = stringTable().add(value); + + // If the string is newly-translated, then we need to adopt it. + // The boolean in the pair tells us if that is so. + return addResult.second ? adoptRef(*addResult.first) : *addResult.first; +} + +struct CStringTranslator { + static unsigned hash(const LChar* c) + { + return StringHasher::computeHash(c); + } + + static inline bool equal(StringImpl* r, const LChar* s) + { + return WTF::equal(r, s); + } + + static void translate(StringImpl*& location, const LChar* const& c, unsigned hash) + { + location = StringImpl::create(c).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +PassRefPtr AtomicString::add(const LChar* c) +{ + if (!c) + return 0; + if (!*c) + return StringImpl::empty(); + + return addToStringTable(c); +} + +struct UCharBuffer { + const UChar* s; + unsigned length; +}; + +struct UCharBufferTranslator { + static unsigned hash(const UCharBuffer& buf) + { + return StringHasher::computeHash(buf.s, buf.length); + } + + static bool equal(StringImpl* const& str, const UCharBuffer& buf) + { + return WTF::equal(str, buf.s, buf.length); + } + + static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) + { + location = StringImpl::create(buf.s, buf.length).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +struct HashAndCharacters { + unsigned hash; + const UChar* characters; + unsigned length; +}; + +struct HashAndCharactersTranslator { + static unsigned hash(const HashAndCharacters& buffer) + { + ASSERT(buffer.hash == StringHasher::computeHash(buffer.characters, buffer.length)); + return buffer.hash; + } + + static bool equal(StringImpl* const& string, const HashAndCharacters& buffer) + { + return WTF::equal(string, buffer.characters, buffer.length); + } + + static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash) + { + location = StringImpl::create(buffer.characters, buffer.length).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +struct HashAndUTF8Characters { + unsigned hash; + const char* characters; + unsigned length; + unsigned utf16Length; +}; + +struct HashAndUTF8CharactersTranslator { + static unsigned hash(const HashAndUTF8Characters& buffer) + { + return buffer.hash; + } + + static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) + { + if (buffer.utf16Length != string->length()) + return false; + + const UChar* stringCharacters = string->characters(); + + // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. + if (buffer.utf16Length != buffer.length) + return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length); + + for (unsigned i = 0; i < buffer.length; ++i) { + ASSERT(isASCII(buffer.characters[i])); + if (stringCharacters[i] != buffer.characters[i]) + return false; + } + + return true; + } + + static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) + { + UChar* target; + location = StringImpl::createUninitialized(buffer.utf16Length, target).leakRef(); + + const char* source = buffer.characters; + if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK) + ASSERT_NOT_REACHED(); + + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +PassRefPtr AtomicString::add(const UChar* s, unsigned length) +{ + if (!s) + return 0; + + if (!length) + return StringImpl::empty(); + + UCharBuffer buffer = { s, length }; + return addToStringTable(buffer); +} + +PassRefPtr AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) +{ + ASSERT(s); + ASSERT(existingHash); + + if (!length) + return StringImpl::empty(); + + HashAndCharacters buffer = { existingHash, s, length }; + return addToStringTable(buffer); +} + +PassRefPtr AtomicString::add(const UChar* s) +{ + if (!s) + return 0; + + int length = 0; + while (s[length] != UChar(0)) + length++; + + if (!length) + return StringImpl::empty(); + + UCharBuffer buffer = { s, length }; + return addToStringTable(buffer); +} + +PassRefPtr AtomicString::addSlowCase(StringImpl* r) +{ + if (!r || r->isAtomic()) + return r; + + if (!r->length()) + return StringImpl::empty(); + + StringImpl* result = *stringTable().add(r).first; + if (result == r) + r->setIsAtomic(true); + return result; +} + +AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash) +{ + ASSERT(s); + ASSERT(existingHash); + + if (!length) + return static_cast(StringImpl::empty()); + + HashAndCharacters buffer = { existingHash, s, length }; + HashSet::iterator iterator = stringTable().find(buffer); + if (iterator == stringTable().end()) + return 0; + return static_cast(*iterator); +} + +void AtomicString::remove(StringImpl* r) +{ + stringTable().remove(r); +} + +AtomicString AtomicString::lower() const +{ + // Note: This is a hot function in the Dromaeo benchmark. + StringImpl* impl = this->impl(); + if (UNLIKELY(!impl)) + return *this; + RefPtr newImpl = impl->lower(); + if (LIKELY(newImpl == impl)) + return *this; + return AtomicString(newImpl); +} + +AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) +{ + HashAndUTF8Characters buffer; + buffer.characters = charactersStart; + buffer.hash = calculateStringHashAndLengthFromUTF8(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); + + if (!buffer.hash) + return nullAtom; + + AtomicString atomicString; + atomicString.m_string = addToStringTable(buffer); + return atomicString; +} + +#ifndef NDEBUG +void AtomicString::show() const +{ + m_string.show(); +} +#endif + +} // namespace WTF diff --git a/Source/JavaScriptCore/wtf/text/AtomicString.h b/Source/JavaScriptCore/wtf/text/AtomicString.h new file mode 100644 index 000000000..43b38d179 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/AtomicString.h @@ -0,0 +1,215 @@ +/* + * Copyright (C) 2004, 2005, 2006, 2008 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef AtomicString_h +#define AtomicString_h + +#include "AtomicStringImpl.h" +#include "WTFString.h" + +// Define 'NO_IMPLICIT_ATOMICSTRING' before including this header, +// to disallow (expensive) implicit String-->AtomicString conversions. +#ifdef NO_IMPLICIT_ATOMICSTRING +#define ATOMICSTRING_CONVERSION explicit +#else +#define ATOMICSTRING_CONVERSION +#endif + +namespace WTF { + +struct AtomicStringHash; + +class AtomicString { +public: + static void init(); + + AtomicString() { } + AtomicString(const LChar* s) : m_string(add(s)) { } + AtomicString(const char* s) : m_string(add(s)) { } + AtomicString(const UChar* s, unsigned length) : m_string(add(s, length)) { } + AtomicString(const UChar* s, unsigned length, unsigned existingHash) : m_string(add(s, length, existingHash)) { } + AtomicString(const UChar* s) : m_string(add(s)) { } + ATOMICSTRING_CONVERSION AtomicString(StringImpl* imp) : m_string(add(imp)) { } + AtomicString(AtomicStringImpl* imp) : m_string(imp) { } + ATOMICSTRING_CONVERSION AtomicString(const String& s) : m_string(add(s.impl())) { } + + // Hash table deleted values, which are only constructed and never copied or destroyed. + AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { } + bool isHashTableDeletedValue() const { return m_string.isHashTableDeletedValue(); } + + static AtomicStringImpl* find(const UChar* s, unsigned length, unsigned existingHash); + + operator const String&() const { return m_string; } + const String& string() const { return m_string; }; + + AtomicStringImpl* impl() const { return static_cast(m_string.impl()); } + + const UChar* characters() const { return m_string.characters(); } + unsigned length() const { return m_string.length(); } + + UChar operator[](unsigned int i) const { return m_string[i]; } + + bool contains(UChar c) const { return m_string.contains(c); } + bool contains(const LChar* s, bool caseSensitive = true) const + { return m_string.contains(s, caseSensitive); } + bool contains(const String& s, bool caseSensitive = true) const + { return m_string.contains(s, caseSensitive); } + + size_t find(UChar c, size_t start = 0) const { return m_string.find(c, start); } + size_t find(const LChar* s, size_t start = 0, bool caseSentitive = true) const + { return m_string.find(s, start, caseSentitive); } + size_t find(const String& s, size_t start = 0, bool caseSentitive = true) const + { return m_string.find(s, start, caseSentitive); } + + bool startsWith(const String& s, bool caseSensitive = true) const + { return m_string.startsWith(s, caseSensitive); } + bool endsWith(const String& s, bool caseSensitive = true) const + { return m_string.endsWith(s, caseSensitive); } + + AtomicString lower() const; + AtomicString upper() const { return AtomicString(impl()->upper()); } + + int toInt(bool* ok = 0) const { return m_string.toInt(ok); } + double toDouble(bool* ok = 0) const { return m_string.toDouble(ok); } + float toFloat(bool* ok = 0) const { return m_string.toFloat(ok); } + bool percentage(int& p) const { return m_string.percentage(p); } + + bool isNull() const { return m_string.isNull(); } + bool isEmpty() const { return m_string.isEmpty(); } + + static void remove(StringImpl*); + +#if USE(CF) + AtomicString(CFStringRef s) : m_string(add(String(s).impl())) { } + CFStringRef createCFString() const { return m_string.createCFString(); } +#endif +#ifdef __OBJC__ + AtomicString(NSString* s) : m_string(add(String(s).impl())) { } + operator NSString*() const { return m_string; } +#endif +#if PLATFORM(QT) + AtomicString(const QString& s) : m_string(add(String(s).impl())) { } + operator QString() const { return m_string; } +#endif + + // AtomicString::fromUTF8 will return a null string if + // the input data contains invalid UTF-8 sequences. + static AtomicString fromUTF8(const char*, size_t); + static AtomicString fromUTF8(const char*); + +#ifndef NDEBUG + void show() const; +#endif +private: + String m_string; + + static PassRefPtr add(const LChar*); + ALWAYS_INLINE static PassRefPtr add(const char* s) { return add(reinterpret_cast(s)); }; + static PassRefPtr add(const UChar*, unsigned length); + ALWAYS_INLINE static PassRefPtr add(const char* s, unsigned length) { return add(reinterpret_cast(s), length); }; + static PassRefPtr add(const UChar*, unsigned length, unsigned existingHash); + static PassRefPtr add(const UChar*); + ALWAYS_INLINE PassRefPtr add(StringImpl* r) + { + if (!r || r->isAtomic()) + return r; + return addSlowCase(r); + } + static PassRefPtr addSlowCase(StringImpl*); + static AtomicString fromUTF8Internal(const char*, const char*); +}; + +inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); } +bool operator==(const AtomicString&, const LChar*); +inline bool operator==(const AtomicString& a, const char* b) { return WTF::equal(a.impl(), reinterpret_cast(b)); } +inline bool operator==(const AtomicString& a, const Vector& b) { return a.impl() && equal(a.impl(), b.data(), b.size()); } +inline bool operator==(const AtomicString& a, const String& b) { return equal(a.impl(), b.impl()); } +inline bool operator==(const LChar* a, const AtomicString& b) { return b == a; } +inline bool operator==(const String& a, const AtomicString& b) { return equal(a.impl(), b.impl()); } +inline bool operator==(const Vector& a, const AtomicString& b) { return b == a; } + +inline bool operator!=(const AtomicString& a, const AtomicString& b) { return a.impl() != b.impl(); } +inline bool operator!=(const AtomicString& a, const LChar* b) { return !(a == b); } +inline bool operator!=(const AtomicString& a, const char* b) { return !(a == b); } +inline bool operator!=(const AtomicString& a, const String& b) { return !equal(a.impl(), b.impl()); } +inline bool operator!=(const AtomicString& a, const Vector& b) { return !(a == b); } +inline bool operator!=(const LChar* a, const AtomicString& b) { return !(b == a); } +inline bool operator!=(const String& a, const AtomicString& b) { return !equal(a.impl(), b.impl()); } +inline bool operator!=(const Vector& a, const AtomicString& b) { return !(a == b); } + +inline bool equalIgnoringCase(const AtomicString& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); } +inline bool equalIgnoringCase(const AtomicString& a, const LChar* b) { return equalIgnoringCase(a.impl(), b); } +inline bool equalIgnoringCase(const AtomicString& a, const char* b) { return equalIgnoringCase(a.impl(), reinterpret_cast(b)); } +inline bool equalIgnoringCase(const AtomicString& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); } +inline bool equalIgnoringCase(const LChar* a, const AtomicString& b) { return equalIgnoringCase(a, b.impl()); } +inline bool equalIgnoringCase(const char* a, const AtomicString& b) { return equalIgnoringCase(reinterpret_cast(a), b.impl()); } +inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); } + +// Define external global variables for the commonly used atomic strings. +// These are only usable from the main thread. +#ifndef ATOMICSTRING_HIDE_GLOBALS +extern const WTF_EXPORTDATA AtomicString nullAtom; +extern const WTF_EXPORTDATA AtomicString emptyAtom; +extern const WTF_EXPORTDATA AtomicString textAtom; +extern const WTF_EXPORTDATA AtomicString commentAtom; +extern const WTF_EXPORTDATA AtomicString starAtom; +extern const WTF_EXPORTDATA AtomicString xmlAtom; +extern const WTF_EXPORTDATA AtomicString xmlnsAtom; + +inline AtomicString AtomicString::fromUTF8(const char* characters, size_t length) +{ + if (!characters) + return nullAtom; + if (!length) + return emptyAtom; + return fromUTF8Internal(characters, characters + length); +} + +inline AtomicString AtomicString::fromUTF8(const char* characters) +{ + if (!characters) + return nullAtom; + if (!*characters) + return emptyAtom; + return fromUTF8Internal(characters, 0); +} +#endif + +// AtomicStringHash is the default hash for AtomicString +template struct DefaultHash; +template<> struct DefaultHash { + typedef AtomicStringHash Hash; +}; + +} // namespace WTF + +#ifndef ATOMICSTRING_HIDE_GLOBALS +using WTF::AtomicString; +using WTF::nullAtom; +using WTF::emptyAtom; +using WTF::textAtom; +using WTF::commentAtom; +using WTF::starAtom; +using WTF::xmlAtom; +using WTF::xmlnsAtom; +#endif + +#include "StringConcatenate.h" +#endif // AtomicString_h diff --git a/Source/JavaScriptCore/wtf/text/AtomicStringHash.h b/Source/JavaScriptCore/wtf/text/AtomicStringHash.h new file mode 100644 index 000000000..6130d9493 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/AtomicStringHash.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AtomicStringHash_h +#define AtomicStringHash_h + +#include +#include + +namespace WTF { + + struct AtomicStringHash { + static unsigned hash(const AtomicString& key) + { + return key.impl()->existingHash(); + } + + static bool equal(const AtomicString& a, const AtomicString& b) + { + return a == b; + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + // AtomicStringHash is the default hash for AtomicString + template<> struct HashTraits : GenericHashTraits { + static const bool emptyValueIsZero = true; + static void constructDeletedValue(WTF::AtomicString& slot) { new (NotNull, &slot) WTF::AtomicString(HashTableDeletedValue); } + static bool isDeletedValue(const WTF::AtomicString& slot) { return slot.isHashTableDeletedValue(); } + }; + +} + +using WTF::AtomicStringHash; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/AtomicStringImpl.h b/Source/JavaScriptCore/wtf/text/AtomicStringImpl.h new file mode 100644 index 000000000..3f0c37606 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/AtomicStringImpl.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2006 Apple Computer, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef AtomicStringImpl_h +#define AtomicStringImpl_h + +#include "StringImpl.h" + +namespace WTF { + +class AtomicStringImpl : public StringImpl +{ +public: + AtomicStringImpl() : StringImpl(0) {} +}; + +} + +using WTF::AtomicStringImpl; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/CString.cpp b/Source/JavaScriptCore/wtf/text/CString.cpp new file mode 100644 index 000000000..981d77a1d --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/CString.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2003, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "CString.h" + +using namespace std; + +namespace WTF { + +CString::CString(const char* str) +{ + if (!str) + return; + + init(str, strlen(str)); +} + +CString::CString(const char* str, size_t length) +{ + init(str, length); +} + +void CString::init(const char* str, size_t length) +{ + if (!str) + return; + + // We need to be sure we can add 1 to length without overflowing. + // Since the passed-in length is the length of an actual existing + // string, and we know the string doesn't occupy the entire address + // space, we can assert here and there's no need for a runtime check. + ASSERT(length < numeric_limits::max()); + + m_buffer = CStringBuffer::create(length + 1); + memcpy(m_buffer->mutableData(), str, length); + m_buffer->mutableData()[length] = '\0'; +} + +char* CString::mutableData() +{ + copyBufferIfNeeded(); + if (!m_buffer) + return 0; + return m_buffer->mutableData(); +} + +CString CString::newUninitialized(size_t length, char*& characterBuffer) +{ + if (length >= numeric_limits::max()) + CRASH(); + + CString result; + result.m_buffer = CStringBuffer::create(length + 1); + char* bytes = result.m_buffer->mutableData(); + bytes[length] = '\0'; + characterBuffer = bytes; + return result; +} + +void CString::copyBufferIfNeeded() +{ + if (!m_buffer || m_buffer->hasOneRef()) + return; + + RefPtr buffer = m_buffer.release(); + size_t length = buffer->length(); + m_buffer = CStringBuffer::create(length); + memcpy(m_buffer->mutableData(), buffer->data(), length); +} + +bool operator==(const CString& a, const CString& b) +{ + if (a.isNull() != b.isNull()) + return false; + if (a.length() != b.length()) + return false; + return !strncmp(a.data(), b.data(), min(a.length(), b.length())); +} + +} // namespace WTF diff --git a/Source/JavaScriptCore/wtf/text/CString.h b/Source/JavaScriptCore/wtf/text/CString.h new file mode 100644 index 000000000..343a7a525 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/CString.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2003, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CString_h +#define CString_h + +#include "PassRefPtr.h" +#include "RefCounted.h" +#include "Vector.h" + +namespace WTF { + +class CStringBuffer : public RefCounted { +public: + const char* data() { return m_vector.data(); } + size_t length() { return m_vector.size(); } + +private: + friend class CString; + + static PassRefPtr create(size_t length) { return adoptRef(new CStringBuffer(length)); } + CStringBuffer(size_t length) : m_vector(length) { } + char* mutableData() { return m_vector.data(); } + + Vector m_vector; +}; + +// A container for a null-terminated char array supporting copy-on-write +// assignment. The contained char array may be null. +class CString { +public: + CString() { } + CString(const char*); + CString(const char*, size_t length); + CString(CStringBuffer* buffer) : m_buffer(buffer) { } + static CString newUninitialized(size_t length, char*& characterBuffer); + + const char* data() const + { + return m_buffer ? m_buffer->data() : 0; + } + char* mutableData(); + size_t length() const + { + return m_buffer ? m_buffer->length() - 1 : 0; + } + + bool isNull() const { return !m_buffer; } + + CStringBuffer* buffer() const { return m_buffer.get(); } + +private: + void copyBufferIfNeeded(); + void init(const char*, size_t length); + RefPtr m_buffer; +}; + +bool operator==(const CString& a, const CString& b); +inline bool operator!=(const CString& a, const CString& b) { return !(a == b); } + +} // namespace WTF + +using WTF::CString; + +#endif // CString_h diff --git a/Source/JavaScriptCore/wtf/text/StringBuffer.h b/Source/JavaScriptCore/wtf/text/StringBuffer.h new file mode 100644 index 000000000..739260d27 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringBuffer.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2008, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringBuffer_h +#define StringBuffer_h + +#include +#include +#include + +namespace WTF { + +template +class StringBuffer { + WTF_MAKE_NONCOPYABLE(StringBuffer); +public: + explicit StringBuffer(unsigned length) + : m_length(length) + { + if (m_length > std::numeric_limits::max() / sizeof(CharType)) + CRASH(); + m_data = static_cast(fastMalloc(m_length * sizeof(CharType))); + } + + ~StringBuffer() + { + fastFree(m_data); + } + + void shrink(unsigned newLength) + { + ASSERT(newLength <= m_length); + m_length = newLength; + } + + void resize(unsigned newLength) + { + if (newLength > m_length) { + if (newLength > std::numeric_limits::max() / sizeof(UChar)) + CRASH(); + m_data = static_cast(fastRealloc(m_data, newLength * sizeof(UChar))); + } + m_length = newLength; + } + + unsigned length() const { return m_length; } + CharType* characters() { return m_data; } + + UChar& operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; } + + CharType* release() { CharType* data = m_data; m_data = 0; return data; } + +private: + unsigned m_length; + CharType* m_data; +}; + +} // namespace WTF + +using WTF::StringBuffer; + +#endif // StringBuffer_h diff --git a/Source/JavaScriptCore/wtf/text/StringBuilder.cpp b/Source/JavaScriptCore/wtf/text/StringBuilder.cpp new file mode 100644 index 000000000..6d3c310e6 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringBuilder.cpp @@ -0,0 +1,301 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "StringBuilder.h" + +#include "WTFString.h" + +namespace WTF { + +static const unsigned minimumCapacity = 16; + +void StringBuilder::reifyString() +{ + // Check if the string already exists. + if (!m_string.isNull()) { + ASSERT(m_string.length() == m_length); + return; + } + + // Check for empty. + if (!m_length) { + m_string = StringImpl::empty(); + return; + } + + // Must be valid in the buffer, take a substring (unless string fills the buffer). + ASSERT(m_buffer && m_length <= m_buffer->length()); + m_string = (m_length == m_buffer->length()) + ? m_buffer.get() + : StringImpl::create(m_buffer, 0, m_length); + + if (m_buffer->has16BitShadow() && m_valid16BitShadowLength < m_length) + m_buffer->upconvertCharacters(m_valid16BitShadowLength, m_length); + + m_valid16BitShadowLength = m_length; +} + +void StringBuilder::resize(unsigned newSize) +{ + // Check newSize < m_length, hence m_length > 0. + ASSERT(newSize <= m_length); + if (newSize == m_length) + return; + ASSERT(m_length); + + // If there is a buffer, we only need to duplicate it if it has more than one ref. + if (m_buffer) { + if (!m_buffer->hasOneRef()) { + if (m_buffer->is8Bit()) + allocateBuffer(m_buffer->characters8(), m_buffer->length()); + else + allocateBuffer(m_buffer->characters16(), m_buffer->length()); + } + m_length = newSize; + m_string = String(); + return; + } + + // Since m_length && !m_buffer, the string must be valid in m_string, and m_string.length() > 0. + ASSERT(!m_string.isEmpty()); + ASSERT(m_length == m_string.length()); + ASSERT(newSize < m_string.length()); + m_length = newSize; + m_string = StringImpl::create(m_string.impl(), 0, newSize); +} + +// Allocate a new 8 bit buffer, copying in currentCharacters (these may come from either m_string +// or m_buffer, neither will be reassigned until the copy has completed). +void StringBuilder::allocateBuffer(const LChar* currentCharacters, unsigned requiredLength) +{ + ASSERT(m_is8Bit); + // Copy the existing data into a new buffer, set result to point to the end of the existing data. + RefPtr buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters8); + memcpy(m_bufferCharacters8, currentCharacters, static_cast(m_length) * sizeof(LChar)); // This can't overflow. + + // Update the builder state. + m_buffer = buffer.release(); + m_string = String(); +} + +// Allocate a new 16 bit buffer, copying in currentCharacters (these may come from either m_string +// or m_buffer, neither will be reassigned until the copy has completed). +void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requiredLength) +{ + ASSERT(!m_is8Bit); + // Copy the existing data into a new buffer, set result to point to the end of the existing data. + RefPtr buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16); + memcpy(m_bufferCharacters16, currentCharacters, static_cast(m_length) * sizeof(UChar)); // This can't overflow. + + // Update the builder state. + m_buffer = buffer.release(); + m_string = String(); +} + +// Allocate a new 16 bit buffer, copying in currentCharacters (which is 8 bit and may come +// from either m_string or m_buffer, neither will be reassigned until the copy has completed). +void StringBuilder::allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength) +{ + ASSERT(m_is8Bit); + // Copy the existing data into a new buffer, set result to point to the end of the existing data. + RefPtr buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16); + for (unsigned i = 0; i < m_length; i++) + m_bufferCharacters16[i] = currentCharacters[i]; + + m_is8Bit = false; + + // Update the builder state. + m_buffer = buffer.release(); + m_string = String(); +} + +template <> +void StringBuilder::reallocateBuffer(unsigned requiredLength) +{ + // If the buffer has only one ref (by this StringBuilder), reallocate it, + // otherwise fall back to "allocate and copy" method. + m_string = String(); + + ASSERT(m_is8Bit); + ASSERT(m_buffer->is8Bit()); + + if (m_buffer->hasOneRef()) + m_buffer = StringImpl::reallocate(m_buffer.release(), requiredLength, m_bufferCharacters8); + else + allocateBuffer(m_buffer->characters8(), requiredLength); +} + +template <> +void StringBuilder::reallocateBuffer(unsigned requiredLength) +{ + // If the buffer has only one ref (by this StringBuilder), reallocate it, + // otherwise fall back to "allocate and copy" method. + m_string = String(); + + if (m_buffer->is8Bit()) + allocateBufferUpConvert(m_buffer->characters8(), requiredLength); + else if (m_buffer->hasOneRef()) + m_buffer = StringImpl::reallocate(m_buffer.release(), requiredLength, m_bufferCharacters16); + else + allocateBuffer(m_buffer->characters16(), requiredLength); +} + +void StringBuilder::reserveCapacity(unsigned newCapacity) +{ + if (m_buffer) { + // If there is already a buffer, then grow if necessary. + if (newCapacity > m_buffer->length()) { + if (m_buffer->is8Bit()) + reallocateBuffer(newCapacity); + else + reallocateBuffer(newCapacity); + } + } else { + // Grow the string, if necessary. + if (newCapacity > m_length) { + if (!m_length) { + LChar* nullPlaceholder = 0; + allocateBuffer(nullPlaceholder, newCapacity); + } else if (m_string.is8Bit()) + allocateBuffer(m_string.characters8(), newCapacity); + else + allocateBuffer(m_string.characters16(), newCapacity); + } + } +} + +// Make 'length' additional capacity be available in m_buffer, update m_string & m_length, +// return a pointer to the newly allocated storage. +template +ALWAYS_INLINE CharType* StringBuilder::appendUninitialized(unsigned length) +{ + ASSERT(length); + + // Calculate the new size of the builder after appending. + unsigned requiredLength = length + m_length; + if (requiredLength < length) + CRASH(); + + if ((m_buffer) && (requiredLength <= m_buffer->length())) { + // If the buffer is valid it must be at least as long as the current builder contents! + ASSERT(m_buffer->length() >= m_length); + unsigned currentLength = m_length; + m_string = String(); + m_length = requiredLength; + return getBufferCharacters() + currentLength; + } + + return appendUninitializedSlow(requiredLength); +} + +// Make 'length' additional capacity be available in m_buffer, update m_string & m_length, +// return a pointer to the newly allocated storage. +template +CharType* StringBuilder::appendUninitializedSlow(unsigned requiredLength) +{ + ASSERT(requiredLength); + + if (m_buffer) { + // If the buffer is valid it must be at least as long as the current builder contents! + ASSERT(m_buffer->length() >= m_length); + + reallocateBuffer(std::max(requiredLength, std::max(minimumCapacity, m_buffer->length() * 2))); + } else { + ASSERT(m_string.length() == m_length); + allocateBuffer(m_length ? m_string.getCharacters() : 0, std::max(requiredLength, std::max(minimumCapacity, m_length * 2))); + } + + CharType* result = getBufferCharacters() + m_length; + m_length = requiredLength; + return result; +} + +void StringBuilder::append(const UChar* characters, unsigned length) +{ + if (!length) + return; + + ASSERT(characters); + + if (m_is8Bit) { + // Calculate the new size of the builder after appending. + unsigned requiredLength = length + m_length; + if (requiredLength < length) + CRASH(); + + if (m_buffer) { + // If the buffer is valid it must be at least as long as the current builder contents! + ASSERT(m_buffer->length() >= m_length); + + allocateBufferUpConvert(m_buffer->characters8(), requiredLength); + } else { + ASSERT(m_string.length() == m_length); + allocateBufferUpConvert(m_string.isNull() ? 0 : m_string.characters8(), std::max(requiredLength, std::max(minimumCapacity, m_length * 2))); + } + + memcpy(m_bufferCharacters16 + m_length, characters, static_cast(length) * sizeof(UChar)); + m_length = requiredLength; + } else + memcpy(appendUninitialized(length), characters, static_cast(length) * sizeof(UChar)); +} + +void StringBuilder::append(const LChar* characters, unsigned length) +{ + if (!length) + return; + ASSERT(characters); + + if (m_is8Bit) { + LChar* dest = appendUninitialized(length); + if (length > 8) + memcpy(dest, characters, static_cast(length) * sizeof(LChar)); + else { + const LChar* end = characters + length; + while (characters < end) + *(dest++) = *(characters++); + } + } else { + UChar* dest = appendUninitialized(length); + const LChar* end = characters + length; + while (characters < end) + *(dest++) = *(characters++); + } +} + +void StringBuilder::shrinkToFit() +{ + // If the buffer is at least 80% full, don't bother copying. Need to tune this heuristic! + if (m_buffer && m_buffer->length() > (m_length + (m_length >> 2))) { + if (m_is8Bit) + reallocateBuffer(m_length); + else + reallocateBuffer(m_length); + m_string = m_buffer; + m_buffer = 0; + } +} + +} // namespace WTF diff --git a/Source/JavaScriptCore/wtf/text/StringBuilder.h b/Source/JavaScriptCore/wtf/text/StringBuilder.h new file mode 100644 index 000000000..da1e8320d --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringBuilder.h @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringBuilder_h +#define StringBuilder_h + +#include +#include + +namespace WTF { + +class StringBuilder { +public: + StringBuilder() + : m_length(0) + , m_is8Bit(true) + , m_valid16BitShadowLength(0) + , m_bufferCharacters8(0) + { + } + + void append(const UChar*, unsigned); + void append(const LChar*, unsigned); + + ALWAYS_INLINE void append(const char* characters, unsigned length) { append(reinterpret_cast(characters), length); } + + void append(const String& string) + { + if (!string.length()) + return; + + // If we're appending to an empty string, and there is not buffer + // (in case reserveCapacity has been called) then just retain the + // string. + if (!m_length && !m_buffer) { + m_string = string; + m_length = string.length(); + m_is8Bit = m_string.is8Bit(); + return; + } + + if (string.is8Bit()) + append(string.characters8(), string.length()); + else + append(string.characters16(), string.length()); + } + + void append(const char* characters) + { + if (characters) + append(characters, strlen(characters)); + } + + void append(UChar c) + { + if (m_buffer && !m_is8Bit && m_length < m_buffer->length() && m_string.isNull()) + m_bufferCharacters16[m_length++] = c; + else + append(&c, 1); + } + + void append(LChar c) + { + if (m_buffer && m_length < m_buffer->length() && m_string.isNull()) { + if (m_is8Bit) + m_bufferCharacters8[m_length++] = c; + else + m_bufferCharacters16[m_length++] = c; + } else + append(&c, 1); + } + + void append(char c) + { + if (m_buffer && m_length < m_buffer->length() && m_string.isNull()) { + if (m_is8Bit) + m_bufferCharacters8[m_length++] = (LChar)c; + else + m_bufferCharacters16[m_length++] = (LChar)c; + } + else + append(&c, 1); + } + + String toString() + { + if (m_string.isNull()) { + shrinkToFit(); + reifyString(); + } + return m_string; + } + + String toStringPreserveCapacity() + { + if (m_string.isNull()) + reifyString(); + return m_string; + } + + unsigned length() const + { + return m_length; + } + + bool isEmpty() const { return !length(); } + + void reserveCapacity(unsigned newCapacity); + + void resize(unsigned newSize); + + void shrinkToFit(); + + UChar operator[](unsigned i) const + { + ASSERT(i < m_length); + if (m_is8Bit) + return characters8()[i]; + return characters16()[i]; + } + + const LChar* characters8() const + { + ASSERT(m_is8Bit); + if (!m_length) + return 0; + if (!m_string.isNull()) + return m_string.characters8(); + ASSERT(m_buffer); + return m_buffer->characters8(); + } + + const UChar* characters16() const + { + ASSERT(!m_is8Bit); + if (!m_length) + return 0; + if (!m_string.isNull()) + return m_string.characters16(); + ASSERT(m_buffer); + return m_buffer->characters16(); + } + + const UChar* characters() const + { + if (!m_length) + return 0; + if (!m_string.isNull()) + return m_string.characters(); + ASSERT(m_buffer); + if (m_buffer->has16BitShadow() && m_valid16BitShadowLength < m_length) + m_buffer->upconvertCharacters(m_valid16BitShadowLength, m_length); + + m_valid16BitShadowLength = m_length; + + return m_buffer->characters(); + } + + void clear() + { + m_length = 0; + m_string = String(); + m_buffer = 0; + m_bufferCharacters8 = 0; + m_is8Bit = true; + m_valid16BitShadowLength = 0; + } + +private: + void allocateBuffer(const LChar* currentCharacters, unsigned requiredLength); + void allocateBuffer(const UChar* currentCharacters, unsigned requiredLength); + void allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength); + template + void reallocateBuffer(unsigned requiredLength); + template + ALWAYS_INLINE CharType* appendUninitialized(unsigned length); + template + CharType* appendUninitializedSlow(unsigned length); + template + ALWAYS_INLINE CharType * getBufferCharacters(); + void reifyString(); + + unsigned m_length; + String m_string; + RefPtr m_buffer; + bool m_is8Bit; + mutable unsigned m_valid16BitShadowLength; + union { + LChar* m_bufferCharacters8; + UChar* m_bufferCharacters16; + }; +}; + +template <> +ALWAYS_INLINE LChar* StringBuilder::getBufferCharacters() +{ + ASSERT(m_is8Bit); + return m_bufferCharacters8; +} + +template <> +ALWAYS_INLINE UChar* StringBuilder::getBufferCharacters() +{ + ASSERT(!m_is8Bit); + return m_bufferCharacters16; +} + +} // namespace WTF + +using WTF::StringBuilder; + +#endif // StringBuilder_h diff --git a/Source/JavaScriptCore/wtf/text/StringConcatenate.h b/Source/JavaScriptCore/wtf/text/StringConcatenate.h new file mode 100644 index 000000000..08d67dcf0 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringConcatenate.h @@ -0,0 +1,964 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringConcatenate_h +#define StringConcatenate_h + +#ifndef WTFString_h +#include "AtomicString.h" +#endif + +// This macro is helpful for testing how many intermediate Strings are created while evaluating an +// expression containing operator+. +#ifndef WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING +#define WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING() ((void)0) +#endif + +namespace WTF { + +template +class StringTypeAdapter { +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(char buffer) + : m_buffer(buffer) + { + } + + unsigned length() { return 1; } + + bool is8Bit() { return true; } + + void writeTo(LChar* destination) + { + *destination = m_buffer; + } + + void writeTo(UChar* destination) { *destination = m_buffer; } + +private: + unsigned char m_buffer; +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(LChar buffer) + : m_buffer(buffer) + { + } + + unsigned length() { return 1; } + + bool is8Bit() { return true; } + + void writeTo(LChar* destination) + { + *destination = m_buffer; + } + + void writeTo(UChar* destination) { *destination = m_buffer; } + +private: + LChar m_buffer; +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(UChar buffer) + : m_buffer(buffer) + { + } + + unsigned length() { return 1; } + + bool is8Bit() { return m_buffer <= 0xff; } + + void writeTo(LChar* destination) + { + ASSERT(is8Bit()); + *destination = static_cast(m_buffer); + } + + void writeTo(UChar* destination) { *destination = m_buffer; } + +private: + UChar m_buffer; +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(char* buffer) + : m_buffer(buffer) + , m_length(strlen(buffer)) + { + } + + unsigned length() { return m_length; } + + bool is8Bit() { return true; } + + void writeTo(LChar* destination) + { + for (unsigned i = 0; i < m_length; ++i) + destination[i] = static_cast(m_buffer[i]); + } + + void writeTo(UChar* destination) + { + for (unsigned i = 0; i < m_length; ++i) { + unsigned char c = m_buffer[i]; + destination[i] = c; + } + } + +private: + const char* m_buffer; + unsigned m_length; +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(LChar* buffer) + : m_buffer(buffer) + , m_length(strlen(reinterpret_cast(buffer))) + { + } + + unsigned length() { return m_length; } + + bool is8Bit() { return true; } + + void writeTo(LChar* destination) + { + memcpy(destination, m_buffer, m_length * sizeof(LChar)); + } + + void writeTo(UChar* destination) + { + for (unsigned i = 0; i < m_length; ++i) + destination[i] = m_buffer[i]; + } + +private: + const LChar* m_buffer; + unsigned m_length; +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(const UChar* buffer) + : m_buffer(buffer) + { + size_t len = 0; + while (m_buffer[len] != UChar(0)) + len++; + + if (len > std::numeric_limits::max()) + CRASH(); + + m_length = len; + } + + unsigned length() { return m_length; } + + bool is8Bit() { return false; } + + NO_RETURN_DUE_TO_CRASH void writeTo(LChar*) + { + CRASH(); + } + + void writeTo(UChar* destination) + { + memcpy(destination, m_buffer, m_length * sizeof(UChar)); + } + +private: + const UChar* m_buffer; + unsigned m_length; +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(const char* buffer) + : m_buffer(buffer) + , m_length(strlen(buffer)) + { + } + + unsigned length() { return m_length; } + + bool is8Bit() { return true; } + + void writeTo(LChar* destination) + { + memcpy(destination, m_buffer, static_cast(m_length) * sizeof(LChar)); + } + + void writeTo(UChar* destination) + { + for (unsigned i = 0; i < m_length; ++i) { + unsigned char c = m_buffer[i]; + destination[i] = c; + } + } + +private: + const char* m_buffer; + unsigned m_length; +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(const LChar* buffer) + : m_buffer(buffer) + , m_length(strlen(reinterpret_cast(buffer))) + { + } + + unsigned length() { return m_length; } + + bool is8Bit() { return true; } + + void writeTo(LChar* destination) + { + memcpy(destination, m_buffer, static_cast(m_length) * sizeof(LChar)); + } + + void writeTo(UChar* destination) + { + for (unsigned i = 0; i < m_length; ++i) + destination[i] = m_buffer[i]; + } + +private: + const LChar* m_buffer; + unsigned m_length; +}; + +template<> +class StringTypeAdapter > { +public: + StringTypeAdapter >(const Vector& buffer) + : m_buffer(buffer) + { + } + + size_t length() { return m_buffer.size(); } + + bool is8Bit() { return true; } + + void writeTo(LChar* destination) + { + for (size_t i = 0; i < m_buffer.size(); ++i) + destination[i] = static_cast(m_buffer[i]); + } + + void writeTo(UChar* destination) + { + for (size_t i = 0; i < m_buffer.size(); ++i) + destination[i] = static_cast(m_buffer[i]); + } + +private: + const Vector& m_buffer; +}; + +template<> +class StringTypeAdapter > { +public: + StringTypeAdapter >(const Vector& buffer) + : m_buffer(buffer) + { + } + + size_t length() { return m_buffer.size(); } + + bool is8Bit() { return true; } + + void writeTo(LChar* destination) + { + for (size_t i = 0; i < m_buffer.size(); ++i) + destination[i] = m_buffer[i]; + } + + void writeTo(UChar* destination) + { + for (size_t i = 0; i < m_buffer.size(); ++i) + destination[i] = m_buffer[i]; + } + +private: + const Vector& m_buffer; +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(const String& string) + : m_buffer(string) + { + } + + unsigned length() { return m_buffer.length(); } + + bool is8Bit() { return m_buffer.isNull() || m_buffer.is8Bit(); } + + void writeTo(LChar* destination) + { + unsigned length = m_buffer.length(); + + ASSERT(is8Bit()); + const LChar* data = m_buffer.characters8(); + for (unsigned i = 0; i < length; ++i) + destination[i] = data[i]; + + WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING(); + } + + void writeTo(UChar* destination) + { + unsigned length = m_buffer.length(); + + if (is8Bit()) { + const LChar* data = m_buffer.characters8(); + for (unsigned i = 0; i < length; ++i) + destination[i] = data[i]; + } else { + const UChar* data = m_buffer.characters16(); + for (unsigned i = 0; i < length; ++i) + destination[i] = data[i]; + } + + WTF_STRINGTYPEADAPTER_COPIED_WTF_STRING(); + } + +private: + const String& m_buffer; +}; + +template<> +class StringTypeAdapter { +public: + StringTypeAdapter(const AtomicString& string) + : m_adapter(string.string()) + { + } + + unsigned length() { return m_adapter.length(); } + + bool is8Bit() { return m_adapter.is8Bit(); } + + void writeTo(LChar* destination) { m_adapter.writeTo(destination); } + void writeTo(UChar* destination) { m_adapter.writeTo(destination); } + +private: + StringTypeAdapter m_adapter; +}; + +inline void sumWithOverflow(unsigned& total, unsigned addend, bool& overflow) +{ + unsigned oldTotal = total; + total = oldTotal + addend; + if (total < oldTotal) + overflow = true; +} + +template +PassRefPtr tryMakeString(StringType1 string1, StringType2 string2) +{ + StringTypeAdapter adapter1(string1); + StringTypeAdapter adapter2(string2); + + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + if (overflow) + return 0; + + if (adapter1.is8Bit() && adapter2.is8Bit()) { + LChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + LChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + + return resultImpl.release(); + } + + UChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + + return resultImpl.release(); +} + +template +PassRefPtr tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3) +{ + StringTypeAdapter adapter1(string1); + StringTypeAdapter adapter2(string2); + StringTypeAdapter adapter3(string3); + + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + if (overflow) + return 0; + + if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit()) { + LChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + LChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + + return resultImpl.release(); + } + + UChar* buffer = 0; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + + return resultImpl.release(); +} + +template +PassRefPtr tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4) +{ + StringTypeAdapter adapter1(string1); + StringTypeAdapter adapter2(string2); + StringTypeAdapter adapter3(string3); + StringTypeAdapter adapter4(string4); + + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + if (overflow) + return 0; + + if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit()) { + LChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + LChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + + return resultImpl.release(); + } + + UChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + + return resultImpl.release(); +} + +template +PassRefPtr tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5) +{ + StringTypeAdapter adapter1(string1); + StringTypeAdapter adapter2(string2); + StringTypeAdapter adapter3(string3); + StringTypeAdapter adapter4(string4); + StringTypeAdapter adapter5(string5); + + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + sumWithOverflow(length, adapter5.length(), overflow); + if (overflow) + return 0; + + if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit()) { + LChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + LChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + + return resultImpl.release(); + } + + UChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + + return resultImpl.release(); +} + +template +PassRefPtr tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6) +{ + StringTypeAdapter adapter1(string1); + StringTypeAdapter adapter2(string2); + StringTypeAdapter adapter3(string3); + StringTypeAdapter adapter4(string4); + StringTypeAdapter adapter5(string5); + StringTypeAdapter adapter6(string6); + + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + sumWithOverflow(length, adapter5.length(), overflow); + sumWithOverflow(length, adapter6.length(), overflow); + if (overflow) + return 0; + + if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit()) { + LChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + LChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + + return resultImpl.release(); + } + + UChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + + return resultImpl.release(); +} + +template +PassRefPtr tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7) +{ + StringTypeAdapter adapter1(string1); + StringTypeAdapter adapter2(string2); + StringTypeAdapter adapter3(string3); + StringTypeAdapter adapter4(string4); + StringTypeAdapter adapter5(string5); + StringTypeAdapter adapter6(string6); + StringTypeAdapter adapter7(string7); + + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + sumWithOverflow(length, adapter5.length(), overflow); + sumWithOverflow(length, adapter6.length(), overflow); + sumWithOverflow(length, adapter7.length(), overflow); + if (overflow) + return 0; + + if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit()) { + LChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + LChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + result += adapter6.length(); + adapter7.writeTo(result); + + return resultImpl.release(); + } + + UChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + result += adapter6.length(); + adapter7.writeTo(result); + + return resultImpl.release(); +} + +template +PassRefPtr tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8) +{ + StringTypeAdapter adapter1(string1); + StringTypeAdapter adapter2(string2); + StringTypeAdapter adapter3(string3); + StringTypeAdapter adapter4(string4); + StringTypeAdapter adapter5(string5); + StringTypeAdapter adapter6(string6); + StringTypeAdapter adapter7(string7); + StringTypeAdapter adapter8(string8); + + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + sumWithOverflow(length, adapter5.length(), overflow); + sumWithOverflow(length, adapter6.length(), overflow); + sumWithOverflow(length, adapter7.length(), overflow); + sumWithOverflow(length, adapter8.length(), overflow); + if (overflow) + return 0; + + if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit() && adapter7.is8Bit() && adapter8.is8Bit()) { + LChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + LChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + result += adapter6.length(); + adapter7.writeTo(result); + result += adapter7.length(); + adapter8.writeTo(result); + + return resultImpl.release(); + } + + UChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + result += adapter6.length(); + adapter7.writeTo(result); + result += adapter7.length(); + adapter8.writeTo(result); + + return resultImpl.release(); +} + +template +PassRefPtr tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8, StringType9 string9) +{ + StringTypeAdapter adapter1(string1); + StringTypeAdapter adapter2(string2); + StringTypeAdapter adapter3(string3); + StringTypeAdapter adapter4(string4); + StringTypeAdapter adapter5(string5); + StringTypeAdapter adapter6(string6); + StringTypeAdapter adapter7(string7); + StringTypeAdapter adapter8(string8); + StringTypeAdapter adapter9(string9); + + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + sumWithOverflow(length, adapter5.length(), overflow); + sumWithOverflow(length, adapter6.length(), overflow); + sumWithOverflow(length, adapter7.length(), overflow); + sumWithOverflow(length, adapter8.length(), overflow); + sumWithOverflow(length, adapter9.length(), overflow); + if (overflow) + return 0; + + if (adapter1.is8Bit() && adapter2.is8Bit() && adapter3.is8Bit() && adapter4.is8Bit() && adapter5.is8Bit() && adapter6.is8Bit() && adapter7.is8Bit() && adapter8.is8Bit() && adapter9.is8Bit()) { + LChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + LChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + result += adapter6.length(); + adapter7.writeTo(result); + result += adapter7.length(); + adapter8.writeTo(result); + result += adapter8.length(); + adapter9.writeTo(result); + + return resultImpl.release(); + } + + UChar* buffer; + RefPtr resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + result += adapter6.length(); + adapter7.writeTo(result); + result += adapter7.length(); + adapter8.writeTo(result); + result += adapter8.length(); + adapter9.writeTo(result); + + return resultImpl.release(); +} + + +// Convenience only. +template +String makeString(StringType1 string1) +{ + return String(string1); +} + +template +String makeString(StringType1 string1, StringType2 string2) +{ + RefPtr resultImpl = tryMakeString(string1, string2); + if (!resultImpl) + CRASH(); + return resultImpl.release(); +} + +template +String makeString(StringType1 string1, StringType2 string2, StringType3 string3) +{ + RefPtr resultImpl = tryMakeString(string1, string2, string3); + if (!resultImpl) + CRASH(); + return resultImpl.release(); +} + +template +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4) +{ + RefPtr resultImpl = tryMakeString(string1, string2, string3, string4); + if (!resultImpl) + CRASH(); + return resultImpl.release(); +} + +template +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5) +{ + RefPtr resultImpl = tryMakeString(string1, string2, string3, string4, string5); + if (!resultImpl) + CRASH(); + return resultImpl.release(); +} + +template +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6) +{ + RefPtr resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6); + if (!resultImpl) + CRASH(); + return resultImpl.release(); +} + +template +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7) +{ + RefPtr resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7); + if (!resultImpl) + CRASH(); + return resultImpl.release(); +} + +template +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8) +{ + RefPtr resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7, string8); + if (!resultImpl) + CRASH(); + return resultImpl.release(); +} + +template +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8, StringType9 string9) +{ + RefPtr resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7, string8, string9); + if (!resultImpl) + CRASH(); + return resultImpl.release(); +} + +} // namespace WTF + +using WTF::makeString; + +#include "StringOperators.h" +#endif diff --git a/Source/JavaScriptCore/wtf/text/StringHash.h b/Source/JavaScriptCore/wtf/text/StringHash.h new file mode 100644 index 000000000..cde591c18 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringHash.h @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved + * Copyright (C) Research In Motion Limited 2009. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef StringHash_h +#define StringHash_h + +#include "AtomicString.h" +#include "WTFString.h" +#include +#include +#include +#include + +namespace WTF { + + // The hash() functions on StringHash and CaseFoldingHash do not support + // null strings. get(), contains(), and add() on HashMap + // cause a null-pointer dereference when passed null strings. + + // FIXME: We should really figure out a way to put the computeHash function that's + // currently a member function of StringImpl into this file so we can be a little + // closer to having all the nearly-identical hash functions in one place. + + struct StringHash { + static unsigned hash(StringImpl* key) { return key->hash(); } + static bool equal(const StringImpl* a, const StringImpl* b) + { + if (a == b) + return true; + if (!a || !b) + return false; + + unsigned aLength = a->length(); + unsigned bLength = b->length(); + if (aLength != bLength) + return false; + + if (a->is8Bit()) { + if (b->is8Bit()) { + // Both a & b are 8 bit. + return WTF::equal(a->characters8(), b->characters8(), aLength); + } + + // We know that a is 8 bit & b is 16 bit. + return WTF::equal(a->characters8(), b->characters16(), aLength); + } + + if (b->is8Bit()) { + // We know that a is 8 bit and b is 16 bit. + return WTF::equal(a->characters16(), b->characters8(), aLength); + } + + return WTF::equal(a->characters16(), b->characters16(), aLength); + } + + static unsigned hash(const RefPtr& key) { return key->hash(); } + static bool equal(const RefPtr& a, const RefPtr& b) + { + return equal(a.get(), b.get()); + } + + static unsigned hash(const String& key) { return key.impl()->hash(); } + static bool equal(const String& a, const String& b) + { + return equal(a.impl(), b.impl()); + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + class CaseFoldingHash { + public: + template static inline UChar foldCase(T ch) + { + return WTF::Unicode::foldCase(ch); + } + + static unsigned hash(const UChar* data, unsigned length) + { + return StringHasher::computeHash >(data, length); + } + + static unsigned hash(StringImpl* str) + { + return hash(str->characters(), str->length()); + } + + static unsigned hash(const LChar* data, unsigned length) + { + return StringHasher::computeHash >(data, length); + } + + static inline unsigned hash(const char* data, unsigned length) + { + return CaseFoldingHash::hash(reinterpret_cast(data), length); + } + + static bool equal(const StringImpl* a, const StringImpl* b) + { + if (a == b) + return true; + if (!a || !b) + return false; + unsigned length = a->length(); + if (length != b->length()) + return false; + return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0; + } + + static unsigned hash(const RefPtr& key) + { + return hash(key.get()); + } + + static bool equal(const RefPtr& a, const RefPtr& b) + { + return equal(a.get(), b.get()); + } + + static unsigned hash(const String& key) + { + return hash(key.impl()); + } + static unsigned hash(const AtomicString& key) + { + return hash(key.impl()); + } + static bool equal(const String& a, const String& b) + { + return equal(a.impl(), b.impl()); + } + static bool equal(const AtomicString& a, const AtomicString& b) + { + return (a == b) || equal(a.impl(), b.impl()); + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + // This hash can be used in cases where the key is a hash of a string, but we don't + // want to store the string. It's not really specific to string hashing, but all our + // current uses of it are for strings. + struct AlreadyHashed : IntHash { + static unsigned hash(unsigned key) { return key; } + + // To use a hash value as a key for a hash table, we need to eliminate the + // "deleted" value, which is negative one. That could be done by changing + // the string hash function to never generate negative one, but this works + // and is still relatively efficient. + static unsigned avoidDeletedValue(unsigned hash) + { + ASSERT(hash); + unsigned newHash = hash | (!(hash + 1) << 31); + ASSERT(newHash); + ASSERT(newHash != 0xFFFFFFFF); + return newHash; + } + }; + +} + +using WTF::StringHash; +using WTF::CaseFoldingHash; +using WTF::AlreadyHashed; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/StringImpl.cpp b/Source/JavaScriptCore/wtf/text/StringImpl.cpp new file mode 100644 index 000000000..aa5a8d56e --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringImpl.cpp @@ -0,0 +1,1541 @@ +/* + * Copyright (C) 1999 Lars Knoll (knoll@kde.org) + * (C) 1999 Antti Koivisto (koivisto@kde.org) + * (C) 2001 Dirk Mueller ( mueller@kde.org ) + * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" +#include "StringImpl.h" + +#include "AtomicString.h" +#include "StringBuffer.h" +#include "StringHash.h" +#include +#include + +using namespace std; + +namespace WTF { + +using namespace Unicode; + +COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small); + +StringImpl::~StringImpl() +{ + ASSERT(!isStatic()); + + if (isAtomic()) + AtomicString::remove(this); +#if USE(JSC) + if (isIdentifier()) { + if (!wtfThreadData().currentIdentifierTable()->remove(this)) + CRASH(); + } +#endif + + BufferOwnership ownership = bufferOwnership(); + + if (has16BitShadow()) { + ASSERT(m_copyData16); + fastFree(m_copyData16); + } + + if (ownership == BufferInternal) + return; + if (ownership == BufferOwned) { + // We use m_data8, but since it is a union with m_data16 this works either way. + ASSERT(m_data8); + fastFree(const_cast(m_data8)); + return; + } + + ASSERT(ownership == BufferSubstring); + ASSERT(m_substringBuffer); + m_substringBuffer->deref(); +} + +PassRefPtr StringImpl::createUninitialized(unsigned length, LChar*& data) +{ + if (!length) { + data = 0; + return empty(); + } + + // Allocate a single buffer large enough to contain the StringImpl + // struct as well as the data which it contains. This removes one + // heap allocation from this call. + if (length > ((std::numeric_limits::max() - sizeof(StringImpl)) / sizeof(LChar))) + CRASH(); + size_t size = sizeof(StringImpl) + length * sizeof(LChar); + StringImpl* string = static_cast(fastMalloc(size)); + + data = reinterpret_cast(string + 1); + return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructor)); +} + +PassRefPtr StringImpl::createUninitialized(unsigned length, UChar*& data) +{ + if (!length) { + data = 0; + return empty(); + } + + // Allocate a single buffer large enough to contain the StringImpl + // struct as well as the data which it contains. This removes one + // heap allocation from this call. + if (length > ((std::numeric_limits::max() - sizeof(StringImpl)) / sizeof(UChar))) + CRASH(); + size_t size = sizeof(StringImpl) + length * sizeof(UChar); + StringImpl* string = static_cast(fastMalloc(size)); + + data = reinterpret_cast(string + 1); + return adoptRef(new (NotNull, string) StringImpl(length)); +} + +PassRefPtr StringImpl::reallocate(PassRefPtr originalString, unsigned length, LChar*& data) +{ + ASSERT(originalString->is8Bit()); + ASSERT(originalString->hasOneRef()); + ASSERT(originalString->bufferOwnership() == BufferInternal); + + if (!length) { + data = 0; + return empty(); + } + + // Same as createUninitialized() except here we use fastRealloc. + if (length > ((std::numeric_limits::max() - sizeof(StringImpl)) / sizeof(LChar))) + CRASH(); + size_t size = sizeof(StringImpl) + length * sizeof(LChar); + originalString->~StringImpl(); + StringImpl* string = static_cast(fastRealloc(originalString.leakRef(), size)); + + data = reinterpret_cast(string + 1); + return adoptRef(new (NotNull, string) StringImpl(length, Force8BitConstructor)); +} + +PassRefPtr StringImpl::reallocate(PassRefPtr originalString, unsigned length, UChar*& data) +{ + ASSERT(!originalString->is8Bit()); + ASSERT(originalString->hasOneRef()); + ASSERT(originalString->bufferOwnership() == BufferInternal); + + if (!length) { + data = 0; + return empty(); + } + + // Same as createUninitialized() except here we use fastRealloc. + if (length > ((std::numeric_limits::max() - sizeof(StringImpl)) / sizeof(UChar))) + CRASH(); + size_t size = sizeof(StringImpl) + length * sizeof(UChar); + originalString->~StringImpl(); + StringImpl* string = static_cast(fastRealloc(originalString.leakRef(), size)); + + data = reinterpret_cast(string + 1); + return adoptRef(new (NotNull, string) StringImpl(length)); +} + +PassRefPtr StringImpl::create(const UChar* characters, unsigned length) +{ + if (!characters || !length) + return empty(); + + UChar* data; + RefPtr string = createUninitialized(length, data); + memcpy(data, characters, length * sizeof(UChar)); + return string.release(); +} + +PassRefPtr StringImpl::create(const LChar* characters, unsigned length) +{ + if (!characters || !length) + return empty(); + + LChar* data; + RefPtr string = createUninitialized(length, data); + memcpy(data, characters, length * sizeof(LChar)); + return string.release(); +} + +PassRefPtr StringImpl::create(const LChar* string) +{ + if (!string) + return empty(); + size_t length = strlen(reinterpret_cast(string)); + if (length > numeric_limits::max()) + CRASH(); + return create(string, length); +} + +const UChar* StringImpl::getData16SlowCase() const +{ + if (has16BitShadow()) + return m_copyData16; + + if (bufferOwnership() == BufferSubstring) { + // If this is a substring, return a pointer into the parent string. + // TODO: Consider severing this string from the parent string + unsigned offset = m_data8 - m_substringBuffer->characters8(); + return m_substringBuffer->characters() + offset; + } + + unsigned len = length(); + if (hasTerminatingNullCharacter()) + len++; + + m_copyData16 = static_cast(fastMalloc(len * sizeof(UChar))); + + m_hashAndFlags |= s_hashFlagHas16BitShadow; + + upconvertCharacters(0, len); + + return m_copyData16; +} + +void StringImpl::upconvertCharacters(unsigned start, unsigned end) const +{ + ASSERT(is8Bit()); + ASSERT(has16BitShadow()); + + for (size_t i = start; i < end; i++) + m_copyData16[i] = m_data8[i]; +} + + +bool StringImpl::containsOnlyWhitespace() +{ + // FIXME: The definition of whitespace here includes a number of characters + // that are not whitespace from the point of view of RenderText; I wonder if + // that's a problem in practice. + if (is8Bit()) { + for (unsigned i = 0; i < m_length; i++) { + UChar c = m_data8[i]; + if (!isASCIISpace(c)) + return false; + } + + return true; + } + + for (unsigned i = 0; i < m_length; i++) { + UChar c = m_data16[i]; + if (!isASCIISpace(c)) + return false; + } + return true; +} + +PassRefPtr StringImpl::substring(unsigned start, unsigned length) +{ + if (start >= m_length) + return empty(); + unsigned maxLength = m_length - start; + if (length >= maxLength) { + if (!start) + return this; + length = maxLength; + } + if (is8Bit()) + return create(m_data8 + start, length); + + return create(m_data16 + start, length); +} + +UChar32 StringImpl::characterStartingAt(unsigned i) +{ + if (is8Bit()) + return m_data8[i]; + if (U16_IS_SINGLE(m_data16[i])) + return m_data16[i]; + if (i + 1 < m_length && U16_IS_LEAD(m_data16[i]) && U16_IS_TRAIL(m_data16[i + 1])) + return U16_GET_SUPPLEMENTARY(m_data16[i], m_data16[i + 1]); + return 0; +} + +PassRefPtr StringImpl::lower() +{ + // Note: This is a hot function in the Dromaeo benchmark, specifically the + // no-op code path up through the first 'return' statement. + + // First scan the string for uppercase and non-ASCII characters: + bool noUpper = true; + UChar ored = 0; + if (is8Bit()) { + const LChar* end = m_data8 + m_length; + for (const LChar* chp = m_data8; chp != end; chp++) { + if (UNLIKELY(isASCIIUpper(*chp))) + noUpper = false; + ored |= *chp; + } + // Nothing to do if the string is all ASCII with no uppercase. + if (noUpper && !(ored & ~0x7F)) + return this; + + if (m_length > static_cast(numeric_limits::max())) + CRASH(); + int32_t length = m_length; + + LChar* data8; + RefPtr newImpl = createUninitialized(length, data8); + + if (!(ored & ~0x7F)) { + for (int32_t i = 0; i < length; i++) + data8[i] = toASCIILower(m_data8[i]); + + return newImpl.release(); + } + + // Do a slower implementation for cases that include non-ASCII Latin-1 characters. + for (int32_t i = 0; i < length; i++) + data8[i] = static_cast(Unicode::toLower(m_data8[i])); + + return newImpl.release(); + } + + const UChar *end = m_data16 + m_length; + for (const UChar* chp = m_data16; chp != end; chp++) { + if (UNLIKELY(isASCIIUpper(*chp))) + noUpper = false; + ored |= *chp; + } + // Nothing to do if the string is all ASCII with no uppercase. + if (noUpper && !(ored & ~0x7F)) + return this; + + if (m_length > static_cast(numeric_limits::max())) + CRASH(); + int32_t length = m_length; + + if (!(ored & ~0x7F)) { + UChar* data16; + RefPtr newImpl = createUninitialized(m_length, data16); + + for (int32_t i = 0; i < length; i++) { + UChar c = m_data16[i]; + data16[i] = toASCIILower(c); + } + return newImpl.release(); + } + + // Do a slower implementation for cases that include non-ASCII characters. + UChar* data16; + RefPtr newImpl = createUninitialized(m_length, data16); + + bool error; + int32_t realLength = Unicode::toLower(data16, length, m_data16, m_length, &error); + if (!error && realLength == length) + return newImpl.release(); + + newImpl = createUninitialized(realLength, data16); + Unicode::toLower(data16, realLength, m_data16, m_length, &error); + if (error) + return this; + return newImpl.release(); +} + +PassRefPtr StringImpl::upper() +{ + // This function could be optimized for no-op cases the way lower() is, + // but in empirical testing, few actual calls to upper() are no-ops, so + // it wouldn't be worth the extra time for pre-scanning. + + if (m_length > static_cast(numeric_limits::max())) + CRASH(); + int32_t length = m_length; + + if (is8Bit()) { + LChar* data8; + RefPtr newImpl = createUninitialized(m_length, data8); + + // Do a faster loop for the case where all the characters are ASCII. + LChar ored = 0; + for (int i = 0; i < length; i++) { + LChar c = m_data8[i]; + ored |= c; + data8[i] = toASCIIUpper(c); + } + if (!(ored & ~0x7F)) + return newImpl.release(); + + // Do a slower implementation for cases that include non-ASCII Latin-1 characters. + for (int32_t i = 0; i < length; i++) + data8[i] = static_cast(Unicode::toUpper(m_data8[i])); + + return newImpl.release(); + } + + UChar* data16; + RefPtr newImpl = createUninitialized(m_length, data16); + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + for (int i = 0; i < length; i++) { + UChar c = m_data16[i]; + ored |= c; + data16[i] = toASCIIUpper(c); + } + if (!(ored & ~0x7F)) + return newImpl.release(); + + // Do a slower implementation for cases that include non-ASCII characters. + bool error; + newImpl = createUninitialized(m_length, data16); + int32_t realLength = Unicode::toUpper(data16, length, m_data16, m_length, &error); + if (!error && realLength == length) + return newImpl; + newImpl = createUninitialized(realLength, data16); + Unicode::toUpper(data16, realLength, m_data16, m_length, &error); + if (error) + return this; + return newImpl.release(); +} + +PassRefPtr StringImpl::fill(UChar character) +{ + if (!m_length) + return this; + + if (!(character & ~0x7F)) { + LChar* data; + RefPtr newImpl = createUninitialized(m_length, data); + for (unsigned i = 0; i < m_length; ++i) + data[i] = character; + return newImpl.release(); + } + UChar* data; + RefPtr newImpl = createUninitialized(m_length, data); + for (unsigned i = 0; i < m_length; ++i) + data[i] = character; + return newImpl.release(); +} + +PassRefPtr StringImpl::foldCase() +{ + if (m_length > static_cast(numeric_limits::max())) + CRASH(); + int32_t length = m_length; + + if (is8Bit()) { + // Do a faster loop for the case where all the characters are ASCII. + LChar* data; + RefPtr newImpl = createUninitialized(m_length, data); + LChar ored = 0; + + for (int32_t i = 0; i < length; i++) { + LChar c = m_data8[i]; + data[i] = toASCIILower(c); + ored |= c; + } + + if (!(ored & ~0x7F)) + return newImpl.release(); + + // Do a slower implementation for cases that include non-ASCII Latin-1 characters. + for (int32_t i = 0; i < length; i++) + data[i] = static_cast(Unicode::toLower(m_data8[i])); + + return newImpl.release(); + } + + // Do a faster loop for the case where all the characters are ASCII. + UChar* data; + RefPtr newImpl = createUninitialized(m_length, data); + UChar ored = 0; + for (int32_t i = 0; i < length; i++) { + UChar c = m_data16[i]; + ored |= c; + data[i] = toASCIILower(c); + } + if (!(ored & ~0x7F)) + return newImpl.release(); + + // Do a slower implementation for cases that include non-ASCII characters. + bool error; + int32_t realLength = Unicode::foldCase(data, length, m_data16, m_length, &error); + if (!error && realLength == length) + return newImpl.release(); + newImpl = createUninitialized(realLength, data); + Unicode::foldCase(data, realLength, m_data16, m_length, &error); + if (error) + return this; + return newImpl.release(); +} + +template +inline PassRefPtr StringImpl::stripMatchedCharacters(UCharPredicate predicate) +{ + if (!m_length) + return empty(); + + unsigned start = 0; + unsigned end = m_length - 1; + + // skip white space from start + while (start <= end && predicate(is8Bit() ? m_data8[start] : m_data16[start])) + start++; + + // only white space + if (start > end) + return empty(); + + // skip white space from end + while (end && predicate(is8Bit() ? m_data8[end] : m_data16[end])) + end--; + + if (!start && end == m_length - 1) + return this; + if (is8Bit()) + return create(m_data8 + start, end + 1 - start); + return create(m_data16 + start, end + 1 - start); +} + +class UCharPredicate { +public: + inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { } + + inline bool operator()(UChar ch) const + { + return m_function(ch); + } + +private: + const CharacterMatchFunctionPtr m_function; +}; + +class SpaceOrNewlinePredicate { +public: + inline bool operator()(UChar ch) const + { + return isSpaceOrNewline(ch); + } +}; + +PassRefPtr StringImpl::stripWhiteSpace() +{ + return stripMatchedCharacters(SpaceOrNewlinePredicate()); +} + +PassRefPtr StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) +{ + return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); +} + +template +ALWAYS_INLINE PassRefPtr StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch) +{ + const CharType* from = characters; + const CharType* fromend = from + m_length; + + // Assume the common case will not remove any characters + while (from != fromend && !findMatch(*from)) + from++; + if (from == fromend) + return this; + + StringBuffer data(m_length); + CharType* to = data.characters(); + unsigned outc = from - characters; + + if (outc) + memcpy(to, characters, outc * sizeof(CharType)); + + while (true) { + while (from != fromend && findMatch(*from)) + from++; + while (from != fromend && !findMatch(*from)) + to[outc++] = *from++; + if (from == fromend) + break; + } + + data.shrink(outc); + + return adopt(data); +} + +PassRefPtr StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) +{ + if (is8Bit()) + return removeCharacters(characters8(), findMatch); + return removeCharacters(characters16(), findMatch); +} + +template +inline PassRefPtr StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate) +{ + StringBuffer data(m_length); + + const CharType* from = getCharacters(); + const CharType* fromend = from + m_length; + int outc = 0; + bool changedToSpace = false; + + CharType* to = data.characters(); + + while (true) { + while (from != fromend && predicate(*from)) { + if (*from != ' ') + changedToSpace = true; + from++; + } + while (from != fromend && !predicate(*from)) + to[outc++] = *from++; + if (from != fromend) + to[outc++] = ' '; + else + break; + } + + if (outc > 0 && to[outc - 1] == ' ') + outc--; + + if (static_cast(outc) == m_length && !changedToSpace) + return this; + + data.shrink(outc); + + return adopt(data); +} + +PassRefPtr StringImpl::simplifyWhiteSpace() +{ + if (is8Bit()) + return StringImpl::simplifyMatchedCharactersToSpace(SpaceOrNewlinePredicate()); + return StringImpl::simplifyMatchedCharactersToSpace(SpaceOrNewlinePredicate()); +} + +PassRefPtr StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) +{ + if (is8Bit()) + return StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate(isWhiteSpace)); + return StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate(isWhiteSpace)); +} + +int StringImpl::toIntStrict(bool* ok, int base) +{ + if (is8Bit()) + return charactersToIntStrict(characters8(), m_length, ok, base); + return charactersToIntStrict(characters16(), m_length, ok, base); +} + +unsigned StringImpl::toUIntStrict(bool* ok, int base) +{ + if (is8Bit()) + return charactersToUIntStrict(characters8(), m_length, ok, base); + return charactersToUIntStrict(characters16(), m_length, ok, base); +} + +int64_t StringImpl::toInt64Strict(bool* ok, int base) +{ + if (is8Bit()) + return charactersToInt64Strict(characters8(), m_length, ok, base); + return charactersToInt64Strict(characters16(), m_length, ok, base); +} + +uint64_t StringImpl::toUInt64Strict(bool* ok, int base) +{ + if (is8Bit()) + return charactersToUInt64Strict(characters8(), m_length, ok, base); + return charactersToUInt64Strict(characters16(), m_length, ok, base); +} + +intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) +{ + if (is8Bit()) + return charactersToIntPtrStrict(characters8(), m_length, ok, base); + return charactersToIntPtrStrict(characters16(), m_length, ok, base); +} + +int StringImpl::toInt(bool* ok) +{ + if (is8Bit()) + return charactersToInt(characters8(), m_length, ok); + return charactersToInt(characters16(), m_length, ok); +} + +unsigned StringImpl::toUInt(bool* ok) +{ + if (is8Bit()) + return charactersToUInt(characters8(), m_length, ok); + return charactersToUInt(characters16(), m_length, ok); +} + +int64_t StringImpl::toInt64(bool* ok) +{ + if (is8Bit()) + return charactersToInt64(characters8(), m_length, ok); + return charactersToInt64(characters16(), m_length, ok); +} + +uint64_t StringImpl::toUInt64(bool* ok) +{ + if (is8Bit()) + return charactersToUInt64(characters8(), m_length, ok); + return charactersToUInt64(characters16(), m_length, ok); +} + +intptr_t StringImpl::toIntPtr(bool* ok) +{ + if (is8Bit()) + return charactersToIntPtr(characters8(), m_length, ok); + return charactersToIntPtr(characters16(), m_length, ok); +} + +double StringImpl::toDouble(bool* ok, bool* didReadNumber) +{ + if (is8Bit()) + return charactersToDouble(characters8(), m_length, ok, didReadNumber); + return charactersToDouble(characters16(), m_length, ok, didReadNumber); +} + +float StringImpl::toFloat(bool* ok, bool* didReadNumber) +{ + if (is8Bit()) + return charactersToFloat(characters8(), m_length, ok, didReadNumber); + return charactersToFloat(characters16(), m_length, ok, didReadNumber); +} + +bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) +{ + while (length--) { + LChar bc = *b++; + if (foldCase(*a++) != foldCase(bc)) + return false; + } + return true; +} + +static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) +{ + ASSERT(length >= 0); + return umemcasecmp(a, b, length) == 0; +} + +int codePointCompare(const StringImpl* s1, const StringImpl* s2) +{ + const unsigned l1 = s1 ? s1->length() : 0; + const unsigned l2 = s2 ? s2->length() : 0; + const unsigned lmin = l1 < l2 ? l1 : l2; + const UChar* c1 = s1 ? s1->characters() : 0; + const UChar* c2 = s2 ? s2->characters() : 0; + unsigned pos = 0; + while (pos < lmin && *c1 == *c2) { + c1++; + c2++; + pos++; + } + + if (pos < lmin) + return (c1[0] > c2[0]) ? 1 : -1; + + if (l1 == l2) + return 0; + + return (l1 > l2) ? 1 : -1; +} + +size_t StringImpl::find(UChar c, unsigned start) +{ + if (is8Bit()) + return WTF::find(characters8(), m_length, c, start); + return WTF::find(characters16(), m_length, c, start); +} + +size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) +{ + if (is8Bit()) + return WTF::find(characters8(), m_length, matchFunction, start); + return WTF::find(characters16(), m_length, matchFunction, start); +} + +size_t StringImpl::find(const LChar* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + size_t matchStringLength = strlen(reinterpret_cast(matchString)); + if (matchStringLength > numeric_limits::max()) + CRASH(); + unsigned matchLength = matchStringLength; + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::find(characters16(), length(), *matchString, index); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchString[i]; + } + + unsigned i = 0; + // keep looping until we match + while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; +} + +size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + size_t matchStringLength = strlen(reinterpret_cast(matchString)); + if (matchStringLength > numeric_limits::max()) + CRASH(); + unsigned matchLength = matchStringLength; + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + + unsigned i = 0; + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + ++i; + } + return index + i; +} + +size_t StringImpl::find(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) { + if (is8Bit() && matchString->is8Bit()) + return WTF::find(characters8(), length(), matchString->characters8()[0], index); + return WTF::find(characters(), length(), matchString->characters()[0], index); + } + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const UChar* matchCharacters = matchString->characters(); + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchCharacters[i]; + } + + unsigned i = 0; + // keep looping until we match + while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; +} + +size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const UChar* matchCharacters = matchString->characters(); + + unsigned i = 0; + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { + if (i == delta) + return notFound; + ++i; + } + return index + i; +} + +size_t StringImpl::reverseFind(UChar c, unsigned index) +{ + if (is8Bit()) + return WTF::reverseFind(characters8(), m_length, c, index); + return WTF::reverseFind(characters16(), m_length, c, index); +} + +size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) { + if (is8Bit() && matchString->is8Bit()) + return WTF::reverseFind(characters8(), length(), matchString->characters8()[0], index); + return WTF::reverseFind(characters(), length(), matchString->characters()[0], index); + } + + // Check index & matchLength are in range. + if (matchLength > length()) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = min(index, length() - matchLength); + + const UChar *searchCharacters = characters(); + const UChar *matchCharacters = matchString->characters(); + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[delta + i]; + matchHash += matchCharacters[i]; + } + + // keep looping until we match + while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) { + if (!delta) + return notFound; + delta--; + searchHash -= searchCharacters[delta + matchLength]; + searchHash += searchCharacters[delta]; + } + return delta; +} + +size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (matchLength > length()) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = min(index, length() - matchLength); + + const UChar *searchCharacters = characters(); + const UChar *matchCharacters = matchString->characters(); + + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) { + if (!delta) + return notFound; + delta--; + } + return delta; +} + +bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive) +{ + ASSERT(matchString); + if (m_length >= matchString->m_length) { + unsigned start = m_length - matchString->m_length; + return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start; + } + return false; +} + +PassRefPtr StringImpl::replace(UChar oldC, UChar newC) +{ + if (oldC == newC) + return this; + unsigned i; + for (i = 0; i != m_length; ++i) { + UChar c = is8Bit() ? m_data8[i] : m_data16[i]; + if (c == oldC) + break; + } + if (i == m_length) + return this; + + if (is8Bit()) { + if (oldC > 0xff) + // Looking for a 16 bit char in an 8 bit string, we're done. + return this; + + if (newC <= 0xff) { + LChar* data; + LChar oldChar = static_cast(oldC); + LChar newChar = static_cast(newC); + + RefPtr newImpl = createUninitialized(m_length, data); + + for (i = 0; i != m_length; ++i) { + LChar ch = m_data8[i]; + if (ch == oldChar) + ch = newChar; + data[i] = ch; + } + return newImpl.release(); + } + + // There is the possibility we need to up convert from 8 to 16 bit, + // create a 16 bit string for the result. + UChar* data; + RefPtr newImpl = createUninitialized(m_length, data); + + for (i = 0; i != m_length; ++i) { + UChar ch = m_data8[i]; + if (ch == oldC) + ch = newC; + data[i] = ch; + } + + return newImpl.release(); + } + + UChar* data; + RefPtr newImpl = createUninitialized(m_length, data); + + for (i = 0; i != m_length; ++i) { + UChar ch = m_data16[i]; + if (ch == oldC) + ch = newC; + data[i] = ch; + } + return newImpl.release(); +} + +PassRefPtr StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) +{ + position = min(position, length()); + lengthToReplace = min(lengthToReplace, length() - position); + unsigned lengthToInsert = str ? str->length() : 0; + if (!lengthToReplace && !lengthToInsert) + return this; + + if ((length() - lengthToReplace) >= (numeric_limits::max() - lengthToInsert)) + CRASH(); + + if (is8Bit() && (!str || str->is8Bit())) { + LChar* data; + RefPtr newImpl = + createUninitialized(length() - lengthToReplace + lengthToInsert, data); + memcpy(data, m_data8, position * sizeof(LChar)); + if (str) + memcpy(data + position, str->m_data8, lengthToInsert * sizeof(LChar)); + memcpy(data + position + lengthToInsert, m_data8 + position + lengthToReplace, + (length() - position - lengthToReplace) * sizeof(LChar)); + return newImpl.release(); + } + UChar* data; + RefPtr newImpl = + createUninitialized(length() - lengthToReplace + lengthToInsert, data); + if (is8Bit()) + for (unsigned i = 0; i < position; i++) + data[i] = m_data8[i]; + else + memcpy(data, m_data16, position * sizeof(UChar)); + if (str) { + if (str->is8Bit()) + for (unsigned i = 0; i < lengthToInsert; i++) + data[i + position] = str->m_data8[i]; + else + memcpy(data + position, str->m_data16, lengthToInsert * sizeof(UChar)); + } + if (is8Bit()) { + for (unsigned i = 0; i < length() - position - lengthToReplace; i++) + data[i + position + lengthToInsert] = m_data8[i + position + lengthToReplace]; + } else { + memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace, + (length() - position - lengthToReplace) * sizeof(UChar)); + } + return newImpl.release(); +} + +PassRefPtr StringImpl::replace(UChar pattern, StringImpl* replacement) +{ + if (!replacement) + return this; + + unsigned repStrLength = replacement->length(); + size_t srcSegmentStart = 0; + unsigned matchCount = 0; + + // Count the matches. + while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { + ++matchCount; + ++srcSegmentStart; + } + + // If we have 0 matches then we don't have to do any more work. + if (!matchCount) + return this; + + if (repStrLength && matchCount > numeric_limits::max() / repStrLength) + CRASH(); + + unsigned replaceSize = matchCount * repStrLength; + unsigned newSize = m_length - matchCount; + if (newSize >= (numeric_limits::max() - replaceSize)) + CRASH(); + + newSize += replaceSize; + + // Construct the new data. + size_t srcSegmentEnd; + unsigned srcSegmentLength; + srcSegmentStart = 0; + unsigned dstOffset = 0; + bool srcIs8Bit = is8Bit(); + bool replacementIs8Bit = replacement->is8Bit(); + + // There are 4 cases: + // 1. This and replacement are both 8 bit. + // 2. This and replacement are both 16 bit. + // 3. This is 8 bit and replacement is 16 bit. + // 4. This is 16 bit and replacement is 8 bit. + if (srcIs8Bit && replacementIs8Bit) { + // Case 1 + LChar* data; + RefPtr newImpl = createUninitialized(newSize, data); + + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { + srcSegmentLength = srcSegmentEnd - srcSegmentStart; + memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * sizeof(LChar)); + dstOffset += srcSegmentLength; + memcpy(data + dstOffset, replacement->m_data8, repStrLength * sizeof(LChar)); + dstOffset += repStrLength; + srcSegmentStart = srcSegmentEnd + 1; + } + + srcSegmentLength = m_length - srcSegmentStart; + memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * sizeof(LChar)); + + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); + + return newImpl.release(); + } + + UChar* data; + RefPtr newImpl = createUninitialized(newSize, data); + + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { + srcSegmentLength = srcSegmentEnd - srcSegmentStart; + if (srcIs8Bit) { + // Case 3. + for (unsigned i = 0; i < srcSegmentLength; i++) + data[i + dstOffset] = m_data8[i + srcSegmentStart]; + } else { + // Cases 2 & 4. + memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + } + dstOffset += srcSegmentLength; + if (replacementIs8Bit) { + // Case 4. + for (unsigned i = 0; i < repStrLength; i++) + data[i + dstOffset] = replacement->m_data8[i]; + } else { + // Cases 2 & 3. + memcpy(data + dstOffset, replacement->m_data16, repStrLength * sizeof(UChar)); + } + dstOffset += repStrLength; + srcSegmentStart = srcSegmentEnd + 1; + } + + srcSegmentLength = m_length - srcSegmentStart; + if (srcIs8Bit) { + // Case 3. + for (unsigned i = 0; i < srcSegmentLength; i++) + data[i + dstOffset] = m_data8[i + srcSegmentStart]; + } else { + // Cases 2 & 4. + memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + } + + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); + + return newImpl.release(); +} + +PassRefPtr StringImpl::replace(StringImpl* pattern, StringImpl* replacement) +{ + if (!pattern || !replacement) + return this; + + unsigned patternLength = pattern->length(); + if (!patternLength) + return this; + + unsigned repStrLength = replacement->length(); + size_t srcSegmentStart = 0; + unsigned matchCount = 0; + + // Count the matches. + while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { + ++matchCount; + srcSegmentStart += patternLength; + } + + // If we have 0 matches, we don't have to do any more work + if (!matchCount) + return this; + + unsigned newSize = m_length - matchCount * patternLength; + if (repStrLength && matchCount > numeric_limits::max() / repStrLength) + CRASH(); + + if (newSize > (numeric_limits::max() - matchCount * repStrLength)) + CRASH(); + + newSize += matchCount * repStrLength; + + + // Construct the new data + size_t srcSegmentEnd; + unsigned srcSegmentLength; + srcSegmentStart = 0; + unsigned dstOffset = 0; + bool srcIs8Bit = is8Bit(); + bool replacementIs8Bit = replacement->is8Bit(); + + // There are 4 cases: + // 1. This and replacement are both 8 bit. + // 2. This and replacement are both 16 bit. + // 3. This is 8 bit and replacement is 16 bit. + // 4. This is 16 bit and replacement is 8 bit. + if (srcIs8Bit && replacementIs8Bit) { + // Case 1 + LChar* data; + RefPtr newImpl = createUninitialized(newSize, data); + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { + srcSegmentLength = srcSegmentEnd - srcSegmentStart; + memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * sizeof(LChar)); + dstOffset += srcSegmentLength; + memcpy(data + dstOffset, replacement->m_data8, repStrLength * sizeof(LChar)); + dstOffset += repStrLength; + srcSegmentStart = srcSegmentEnd + patternLength; + } + + srcSegmentLength = m_length - srcSegmentStart; + memcpy(data + dstOffset, m_data8 + srcSegmentStart, srcSegmentLength * sizeof(LChar)); + + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); + + return newImpl.release(); + } + + UChar* data; + RefPtr newImpl = createUninitialized(newSize, data); + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { + srcSegmentLength = srcSegmentEnd - srcSegmentStart; + if (srcIs8Bit) { + // Case 3. + for (unsigned i = 0; i < srcSegmentLength; i++) + data[i + dstOffset] = m_data8[i + srcSegmentStart]; + } else { + // Case 2 & 4. + memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + } + dstOffset += srcSegmentLength; + if (replacementIs8Bit) { + // Cases 2 & 3. + for (unsigned i = 0; i < repStrLength; i++) + data[i + dstOffset] = replacement->m_data8[i]; + } else { + // Case 4 + memcpy(data + dstOffset, replacement->m_data16, repStrLength * sizeof(UChar)); + } + dstOffset += repStrLength; + srcSegmentStart = srcSegmentEnd + patternLength; + } + + srcSegmentLength = m_length - srcSegmentStart; + if (srcIs8Bit) { + // Case 3. + for (unsigned i = 0; i < srcSegmentLength; i++) + data[i + dstOffset] = m_data8[i + srcSegmentStart]; + } else { + // Cases 2 & 4. + memcpy(data + dstOffset, m_data16 + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + } + + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); + + return newImpl.release(); +} + +bool equal(const StringImpl* a, const StringImpl* b) +{ + return StringHash::equal(a, b); +} + +bool equal(const StringImpl* a, const LChar* b, unsigned length) +{ + if (!a) + return !b; + if (!b) + return !a; + + if (length != a->length()) + return false; + + if (a->is8Bit()) + return equal(a->characters8(), b, length); + return equal(a->characters16(), b, length); +} + +bool equal(const StringImpl* a, const LChar* b) +{ + if (!a) + return !b; + if (!b) + return !a; + + unsigned length = a->length(); + + if (a->is8Bit()) { + const LChar* aPtr = a->characters8(); + for (unsigned i = 0; i != length; ++i) { + LChar bc = b[i]; + LChar ac = aPtr[i]; + if (!bc) + return false; + if (ac != bc) + return false; + } + + return !b[length]; + } + + const UChar* aPtr = a->characters16(); + for (unsigned i = 0; i != length; ++i) { + LChar bc = b[i]; + if (!bc) + return false; + if (aPtr[i] != bc) + return false; + } + + return !b[length]; +} + +bool equal(const StringImpl* a, const UChar* b, unsigned length) +{ + if (!a) + return !b; + if (!b) + return false; + + if (a->length() != length) + return false; + if (a->is8Bit()) + return equal(a->characters8(), b, length); + return equal(a->characters16(), b, length); +} + +bool equalIgnoringCase(StringImpl* a, StringImpl* b) +{ + return CaseFoldingHash::equal(a, b); +} + +bool equalIgnoringCase(StringImpl* a, const LChar* b) +{ + if (!a) + return !b; + if (!b) + return !a; + + unsigned length = a->length(); + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + bool equal = true; + if (a->is8Bit()) { + const LChar* as = a->characters8(); + for (unsigned i = 0; i != length; ++i) { + LChar bc = b[i]; + if (!bc) + return false; + UChar ac = as[i]; + ored |= ac; + equal = equal && (toASCIILower(ac) == toASCIILower(bc)); + } + + // Do a slower implementation for cases that include non-ASCII characters. + if (ored & ~0x7F) { + equal = true; + for (unsigned i = 0; i != length; ++i) + equal = equal && (foldCase(as[i]) == foldCase(b[i])); + } + + return equal && !b[length]; + } + + const UChar* as = a->characters16(); + for (unsigned i = 0; i != length; ++i) { + LChar bc = b[i]; + if (!bc) + return false; + UChar ac = as[i]; + ored |= ac; + equal = equal && (toASCIILower(ac) == toASCIILower(bc)); + } + + // Do a slower implementation for cases that include non-ASCII characters. + if (ored & ~0x7F) { + equal = true; + for (unsigned i = 0; i != length; ++i) { + equal = equal && (foldCase(as[i]) == foldCase(b[i])); + } + } + + return equal && !b[length]; +} + +bool equalIgnoringNullity(StringImpl* a, StringImpl* b) +{ + if (StringHash::equal(a, b)) + return true; + if (!a && b && !b->length()) + return true; + if (!b && a && !a->length()) + return true; + + return false; +} + +WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirectionality) +{ + for (unsigned i = 0; i < m_length; ++i) { + WTF::Unicode::Direction charDirection = WTF::Unicode::direction(is8Bit() ? m_data8[i] : m_data16[i]); + if (charDirection == WTF::Unicode::LeftToRight) { + if (hasStrongDirectionality) + *hasStrongDirectionality = true; + return WTF::Unicode::LeftToRight; + } + if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) { + if (hasStrongDirectionality) + *hasStrongDirectionality = true; + return WTF::Unicode::RightToLeft; + } + } + if (hasStrongDirectionality) + *hasStrongDirectionality = false; + return WTF::Unicode::LeftToRight; +} + +PassRefPtr StringImpl::adopt(StringBuffer& buffer) +{ +unsigned length = buffer.length(); +if (!length) + return empty(); +return adoptRef(new StringImpl(buffer.release(), length)); +} + +PassRefPtr StringImpl::adopt(StringBuffer& buffer) +{ + unsigned length = buffer.length(); + if (!length) + return empty(); + return adoptRef(new StringImpl(buffer.release(), length)); +} + +PassRefPtr StringImpl::createWithTerminatingNullCharacter(const StringImpl& string) +{ + // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer + // get allocated in a single memory block. + unsigned length = string.m_length; + if (length >= numeric_limits::max()) + CRASH(); + RefPtr terminatedString; + if (string.is8Bit()) { + LChar* data; + terminatedString = createUninitialized(length + 1, data); + memcpy(data, string.m_data8, length * sizeof(LChar)); + data[length] = 0; + } else { + UChar* data; + terminatedString = createUninitialized(length + 1, data); + memcpy(data, string.m_data16, length * sizeof(UChar)); + data[length] = 0; + } + terminatedString->m_length--; + terminatedString->m_hashAndFlags = (string.m_hashAndFlags & (~s_flagMask | s_hashFlag8BitBuffer)) | s_hashFlagHasTerminatingNullCharacter; + return terminatedString.release(); +} + +} // namespace WTF diff --git a/Source/JavaScriptCore/wtf/text/StringImpl.h b/Source/JavaScriptCore/wtf/text/StringImpl.h new file mode 100644 index 000000000..a3008e1d3 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringImpl.h @@ -0,0 +1,774 @@ +/* + * Copyright (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. + * Copyright (C) 2009 Google Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef StringImpl_h +#define StringImpl_h + +#include +#include +#include +#include +#include +#include +#include +#include + +#if USE(CF) +typedef const struct __CFString * CFStringRef; +#endif + +#ifdef __OBJC__ +@class NSString; +#endif + +// FIXME: This is a temporary layering violation while we move string code to WTF. +// Landing the file moves in one patch, will follow on with patches to change the namespaces. +namespace JSC { +struct IdentifierCStringTranslator; +template struct IdentifierCharBufferTranslator; +struct IdentifierLCharFromUCharTranslator; +} + +namespace WTF { + +struct CStringTranslator; +struct HashAndCharactersTranslator; +struct HashAndUTF8CharactersTranslator; +struct UCharBufferTranslator; + +enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; + +typedef bool (*CharacterMatchFunctionPtr)(UChar); +typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); + +class StringImpl { + WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED; + friend struct JSC::IdentifierCStringTranslator; + friend struct JSC::IdentifierCharBufferTranslator; + friend struct JSC::IdentifierCharBufferTranslator; + friend struct JSC::IdentifierLCharFromUCharTranslator; + friend struct WTF::CStringTranslator; + friend struct WTF::HashAndCharactersTranslator; + friend struct WTF::HashAndUTF8CharactersTranslator; + friend struct WTF::UCharBufferTranslator; + friend class AtomicStringImpl; + +private: + enum BufferOwnership { + BufferInternal, + BufferOwned, + BufferSubstring, + }; + + // Used to construct static strings, which have an special refCount that can never hit zero. + // This means that the static string will never be destroyed, which is important because + // static strings will be shared across threads & ref-counted in a non-threadsafe manner. + enum ConstructStaticStringTag { ConstructStaticString }; + StringImpl(const UChar* characters, unsigned length, ConstructStaticStringTag) + : m_refCount(s_refCountFlagIsStaticString) + , m_length(length) + , m_data16(characters) + , m_buffer(0) + , m_hashAndFlags(s_hashFlagIsIdentifier | BufferOwned) + { + // Ensure that the hash is computed so that AtomicStringHash can call existingHash() + // with impunity. The empty string is special because it is never entered into + // AtomicString's HashKey, but still needs to compare correctly. + hash(); + } + + // Used to construct static strings, which have an special refCount that can never hit zero. + // This means that the static string will never be destroyed, which is important because + // static strings will be shared across threads & ref-counted in a non-threadsafe manner. + StringImpl(const LChar* characters, unsigned length, ConstructStaticStringTag) + : m_refCount(s_refCountFlagIsStaticString) + , m_length(length) + , m_data8(characters) + , m_buffer(0) + , m_hashAndFlags(s_hashFlag8BitBuffer | s_hashFlagIsIdentifier | BufferOwned) + { + // Ensure that the hash is computed so that AtomicStringHash can call existingHash() + // with impunity. The empty string is special because it is never entered into + // AtomicString's HashKey, but still needs to compare correctly. + hash(); + } + + // FIXME: there has to be a less hacky way to do this. + enum Force8Bit { Force8BitConstructor }; + // Create a normal 8-bit string with internal storage (BufferInternal) + StringImpl(unsigned length, Force8Bit) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data8(reinterpret_cast(this + 1)) + , m_buffer(0) + , m_hashAndFlags(s_hashFlag8BitBuffer | BufferInternal) + { + ASSERT(m_data8); + ASSERT(m_length); + } + + // Create a normal 16-bit string with internal storage (BufferInternal) + StringImpl(unsigned length) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data16(reinterpret_cast(this + 1)) + , m_buffer(0) + , m_hashAndFlags(BufferInternal) + { + ASSERT(m_data16); + ASSERT(m_length); + } + + // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) + StringImpl(const LChar* characters, unsigned length) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data8(characters) + , m_buffer(0) + , m_hashAndFlags(s_hashFlag8BitBuffer | BufferOwned) + { + ASSERT(m_data8); + ASSERT(m_length); + } + + // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) + StringImpl(const UChar* characters, unsigned length) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data16(characters) + , m_buffer(0) + , m_hashAndFlags(BufferOwned) + { + ASSERT(m_data16); + ASSERT(m_length); + } + + // Used to create new strings that are a substring of an existing 8-bit StringImpl (BufferSubstring) + StringImpl(const LChar* characters, unsigned length, PassRefPtr base) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data8(characters) + , m_substringBuffer(base.leakRef()) + , m_hashAndFlags(s_hashFlag8BitBuffer | BufferSubstring) + { + ASSERT(is8Bit()); + ASSERT(m_data8); + ASSERT(m_length); + ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); + } + + // Used to create new strings that are a substring of an existing 16-bit StringImpl (BufferSubstring) + StringImpl(const UChar* characters, unsigned length, PassRefPtr base) + : m_refCount(s_refCountIncrement) + , m_length(length) + , m_data16(characters) + , m_substringBuffer(base.leakRef()) + , m_hashAndFlags(BufferSubstring) + { + ASSERT(!is8Bit()); + ASSERT(m_data16); + ASSERT(m_length); + ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); + } + +public: + ~StringImpl(); + + static PassRefPtr create(const UChar*, unsigned length); + static PassRefPtr create(const LChar*, unsigned length); + ALWAYS_INLINE static PassRefPtr create(const char* s, unsigned length) { return create(reinterpret_cast(s), length); } + static PassRefPtr create(const LChar*); + ALWAYS_INLINE static PassRefPtr create(const char* s) { return create(reinterpret_cast(s)); } + + static ALWAYS_INLINE PassRefPtr create8(PassRefPtr rep, unsigned offset, unsigned length) + { + ASSERT(rep); + ASSERT(length <= rep->length()); + + if (!length) + return empty(); + + ASSERT(rep->is8Bit()); + StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); + return adoptRef(new StringImpl(rep->m_data8 + offset, length, ownerRep)); + } + + static ALWAYS_INLINE PassRefPtr create(PassRefPtr rep, unsigned offset, unsigned length) + { + ASSERT(rep); + ASSERT(length <= rep->length()); + + if (!length) + return empty(); + + StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); + if (rep->is8Bit()) + return adoptRef(new StringImpl(rep->m_data8 + offset, length, ownerRep)); + return adoptRef(new StringImpl(rep->m_data16 + offset, length, ownerRep)); + } + + static PassRefPtr createUninitialized(unsigned length, LChar*& data); + static PassRefPtr createUninitialized(unsigned length, UChar*& data); + template static ALWAYS_INLINE PassRefPtr tryCreateUninitialized(unsigned length, T*& output) + { + if (!length) { + output = 0; + return empty(); + } + + if (length > ((std::numeric_limits::max() - sizeof(StringImpl)) / sizeof(T))) { + output = 0; + return 0; + } + StringImpl* resultImpl; + if (!tryFastMalloc(sizeof(T) * length + sizeof(StringImpl)).getValue(resultImpl)) { + output = 0; + return 0; + } + output = reinterpret_cast(resultImpl + 1); + + if (sizeof(T) == sizeof(char)) + return adoptRef(new (NotNull, resultImpl) StringImpl(length, Force8BitConstructor)); + + return adoptRef(new (NotNull, resultImpl) StringImpl(length)); + } + + // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr, + // and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(), + // the originalString can't be used after this function. + static PassRefPtr reallocate(PassRefPtr originalString, unsigned length, LChar*& data); + static PassRefPtr reallocate(PassRefPtr originalString, unsigned length, UChar*& data); + + static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); } + static unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; } + static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); } + static PassRefPtr createWithTerminatingNullCharacter(const StringImpl&); + + template + static PassRefPtr adopt(Vector& vector) + { + if (size_t size = vector.size()) { + ASSERT(vector.data()); + if (size > std::numeric_limits::max()) + CRASH(); + return adoptRef(new StringImpl(vector.releaseBuffer(), size)); + } + return empty(); + } + + static PassRefPtr adopt(StringBuffer& buffer); + static PassRefPtr adopt(StringBuffer& buffer); + + unsigned length() const { return m_length; } + bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; } + + // FIXME: Remove all unnecessary usages of characters() + ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; } + ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; } + ALWAYS_INLINE const UChar* characters() const + { + if (!is8Bit()) + return m_data16; + + return getData16SlowCase(); + } + + template + ALWAYS_INLINE const CharType * getCharacters() const; + + size_t cost() + { + // For substrings, return the cost of the base string. + if (bufferOwnership() == BufferSubstring) + return m_substringBuffer->cost(); + + if (m_hashAndFlags & s_hashFlagDidReportCost) + return 0; + + m_hashAndFlags |= s_hashFlagDidReportCost; + return m_length; + } + + bool has16BitShadow() const { return m_hashAndFlags & s_hashFlagHas16BitShadow; } + void upconvertCharacters(unsigned, unsigned) const; + bool isIdentifier() const { return m_hashAndFlags & s_hashFlagIsIdentifier; } + void setIsIdentifier(bool isIdentifier) + { + ASSERT(!isStatic()); + if (isIdentifier) + m_hashAndFlags |= s_hashFlagIsIdentifier; + else + m_hashAndFlags &= ~s_hashFlagIsIdentifier; + } + + bool hasTerminatingNullCharacter() const { return m_hashAndFlags & s_hashFlagHasTerminatingNullCharacter; } + + bool isAtomic() const { return m_hashAndFlags & s_hashFlagIsAtomic; } + void setIsAtomic(bool isIdentifier) + { + ASSERT(!isStatic()); + if (isIdentifier) + m_hashAndFlags |= s_hashFlagIsAtomic; + else + m_hashAndFlags &= ~s_hashFlagIsAtomic; + } + +private: + // The high bits of 'hash' are always empty, but we prefer to store our flags + // in the low bits because it makes them slightly more efficient to access. + // So, we shift left and right when setting and getting our hash code. + void setHash(unsigned hash) const + { + ASSERT(!hasHash()); + // Multiple clients assume that StringHasher is the canonical string hash function. + ASSERT(hash == (is8Bit() ? StringHasher::computeHash(m_data8, m_length) : StringHasher::computeHash(m_data16, m_length))); + ASSERT(!(hash & (s_flagMask << (8 * sizeof(hash) - s_flagCount)))); // Verify that enough high bits are empty. + + hash <<= s_flagCount; + ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are empty after shift. + ASSERT(hash); // Verify that 0 is a valid sentinel hash value. + + m_hashAndFlags |= hash; // Store hash with flags in low bits. + } + + unsigned rawHash() const + { + return m_hashAndFlags >> s_flagCount; + } + +public: + bool hasHash() const + { + return rawHash() != 0; + } + + unsigned existingHash() const + { + ASSERT(hasHash()); + return rawHash(); + } + + unsigned hash() const + { + if (hasHash()) + return existingHash(); + return hashSlowCase(); + } + + inline bool hasOneRef() const + { + return m_refCount == s_refCountIncrement; + } + + inline void ref() + { + m_refCount += s_refCountIncrement; + } + + inline void deref() + { + if (m_refCount == s_refCountIncrement) { + delete this; + return; + } + + m_refCount -= s_refCountIncrement; + } + + static StringImpl* empty(); + + // FIXME: Does this really belong in StringImpl? + template static void copyChars(T* destination, const T* source, unsigned numCharacters) + { + if (numCharacters == 1) { + *destination = *source; + return; + } + + if (numCharacters <= s_copyCharsInlineCutOff) { + unsigned i = 0; +#if (CPU(X86) || CPU(X86_64)) + const unsigned charsPerInt = sizeof(uint32_t) / sizeof(T); + + if (numCharacters > charsPerInt) { + unsigned stopCount = numCharacters & ~(charsPerInt - 1); + + const uint32_t* srcCharacters = reinterpret_cast(source); + uint32_t* destCharacters = reinterpret_cast(destination); + for (unsigned j = 0; i < stopCount; i += charsPerInt, ++j) + destCharacters[j] = srcCharacters[j]; + } +#endif + for (; i < numCharacters; ++i) + destination[i] = source[i]; + } else + memcpy(destination, source, numCharacters * sizeof(T)); + } + + // Some string features, like refcounting and the atomicity flag, are not + // thread-safe. We achieve thread safety by isolation, giving each thread + // its own copy of the string. + PassRefPtr isolatedCopy() const; + + PassRefPtr substring(unsigned pos, unsigned len = UINT_MAX); + + UChar operator[](unsigned i) const + { + ASSERT(i < m_length); + if (is8Bit()) + return m_data8[i]; + return m_data16[i]; + } + UChar32 characterStartingAt(unsigned); + + bool containsOnlyWhitespace(); + + int toIntStrict(bool* ok = 0, int base = 10); + unsigned toUIntStrict(bool* ok = 0, int base = 10); + int64_t toInt64Strict(bool* ok = 0, int base = 10); + uint64_t toUInt64Strict(bool* ok = 0, int base = 10); + intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); + + int toInt(bool* ok = 0); // ignores trailing garbage + unsigned toUInt(bool* ok = 0); // ignores trailing garbage + int64_t toInt64(bool* ok = 0); // ignores trailing garbage + uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage + intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage + + double toDouble(bool* ok = 0, bool* didReadNumber = 0); + float toFloat(bool* ok = 0, bool* didReadNumber = 0); + + PassRefPtr lower(); + PassRefPtr upper(); + + PassRefPtr fill(UChar); + // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII? + PassRefPtr foldCase(); + + PassRefPtr stripWhiteSpace(); + PassRefPtr stripWhiteSpace(IsWhiteSpaceFunctionPtr); + PassRefPtr simplifyWhiteSpace(); + PassRefPtr simplifyWhiteSpace(IsWhiteSpaceFunctionPtr); + + PassRefPtr removeCharacters(CharacterMatchFunctionPtr); + template + ALWAYS_INLINE PassRefPtr removeCharacters(const CharType* characters, CharacterMatchFunctionPtr); + + size_t find(UChar, unsigned index = 0); + size_t find(CharacterMatchFunctionPtr, unsigned index = 0); + size_t find(const LChar*, unsigned index = 0); + ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(reinterpret_cast(s), index); }; + size_t find(StringImpl*, unsigned index = 0); + size_t findIgnoringCase(const LChar*, unsigned index = 0); + ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast(s), index); }; + size_t findIgnoringCase(StringImpl*, unsigned index = 0); + + size_t reverseFind(UChar, unsigned index = UINT_MAX); + size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); + size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); + + bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; } + bool endsWith(StringImpl*, bool caseSensitive = true); + + PassRefPtr replace(UChar, UChar); + PassRefPtr replace(UChar, StringImpl*); + PassRefPtr replace(StringImpl*, StringImpl*); + PassRefPtr replace(unsigned index, unsigned len, StringImpl*); + + WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0); + +#if USE(CF) + CFStringRef createCFString(); +#endif +#ifdef __OBJC__ + operator NSString*(); +#endif + +private: + // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. + static const unsigned s_copyCharsInlineCutOff = 20; + + BufferOwnership bufferOwnership() const { return static_cast(m_hashAndFlags & s_hashMaskBufferOwnership); } + bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; } + template PassRefPtr stripMatchedCharacters(UCharPredicate); + template PassRefPtr simplifyMatchedCharactersToSpace(UCharPredicate); + NEVER_INLINE const UChar* getData16SlowCase() const; + NEVER_INLINE unsigned hashSlowCase() const; + + // The bottom bit in the ref count indicates a static (immortal) string. + static const unsigned s_refCountFlagIsStaticString = 0x1; + static const unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag. + + // The bottom 8 bits in the hash are flags. + static const unsigned s_flagCount = 8; + static const unsigned s_flagMask = (1u << s_flagCount) - 1; + COMPILE_ASSERT(s_flagCount == StringHasher::flagCount, StringHasher_reserves_enough_bits_for_StringImpl_flags); + + static const unsigned s_hashFlagHas16BitShadow = 1u << 7; + static const unsigned s_hashFlag8BitBuffer = 1u << 6; + static const unsigned s_hashFlagHasTerminatingNullCharacter = 1u << 5; + static const unsigned s_hashFlagIsAtomic = 1u << 4; + static const unsigned s_hashFlagDidReportCost = 1u << 3; + static const unsigned s_hashFlagIsIdentifier = 1u << 2; + static const unsigned s_hashMaskBufferOwnership = 1u | (1u << 1); + + unsigned m_refCount; + unsigned m_length; + union { + const LChar* m_data8; + const UChar* m_data16; + }; + union { + void* m_buffer; + StringImpl* m_substringBuffer; + mutable UChar* m_copyData16; + }; + mutable unsigned m_hashAndFlags; +}; + +template <> +ALWAYS_INLINE const LChar* StringImpl::getCharacters() const { return characters8(); } + +template <> +ALWAYS_INLINE const UChar* StringImpl::getCharacters() const { return characters16(); } + +bool equal(const StringImpl*, const StringImpl*); +bool equal(const StringImpl*, const LChar*); +inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast(b)); } +bool equal(const StringImpl*, const LChar*, unsigned); +inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast(b), length); } +inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } +inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast(a)); } +bool equal(const StringImpl*, const UChar*, unsigned); + +// Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. +#if CPU(X86_64) +ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) +{ + unsigned dwordLength = length >> 3; + + if (dwordLength) { + const uint64_t* aDWordCharacters = reinterpret_cast(a); + const uint64_t* bDWordCharacters = reinterpret_cast(b); + + for (unsigned i = 0; i != dwordLength; ++i) { + if (*aDWordCharacters++ != *bDWordCharacters++) + return false; + } + + a = reinterpret_cast(aDWordCharacters); + b = reinterpret_cast(bDWordCharacters); + } + + if (length & 4) { + if (*reinterpret_cast(a) != *reinterpret_cast(b)) + return false; + + a += 4; + b += 4; + } + + if (length & 2) { + if (*reinterpret_cast(a) != *reinterpret_cast(b)) + return false; + + a += 2; + b += 2; + } + + if (length & 1 && (*a != *b)) + return false; + + return true; +} + +ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) +{ + unsigned dwordLength = length >> 2; + + if (dwordLength) { + const uint64_t* aDWordCharacters = reinterpret_cast(a); + const uint64_t* bDWordCharacters = reinterpret_cast(b); + + for (unsigned i = 0; i != dwordLength; ++i) { + if (*aDWordCharacters++ != *bDWordCharacters++) + return false; + } + + a = reinterpret_cast(aDWordCharacters); + b = reinterpret_cast(bDWordCharacters); + } + + if (length & 2) { + if (*reinterpret_cast(a) != *reinterpret_cast(b)) + return false; + + a += 2; + b += 2; + } + + if (length & 1 && (*a != *b)) + return false; + + return true; +} +#elif CPU(X86) +ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) +{ + const uint32_t* aCharacters = reinterpret_cast(a); + const uint32_t* bCharacters = reinterpret_cast(b); + + unsigned wordLength = length >> 2; + for (unsigned i = 0; i != wordLength; ++i) { + if (*aCharacters++ != *bCharacters++) + return false; + } + + length &= 3; + + if (length) { + const LChar* aRemainder = reinterpret_cast(aCharacters); + const LChar* bRemainder = reinterpret_cast(bCharacters); + + for (unsigned i = 0; i < length; ++i) { + if (aRemainder[i] != bRemainder[i]) + return false; + } + } + + return true; +} + +ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) +{ + const uint32_t* aCharacters = reinterpret_cast(a); + const uint32_t* bCharacters = reinterpret_cast(b); + + unsigned wordLength = length >> 1; + for (unsigned i = 0; i != wordLength; ++i) { + if (*aCharacters++ != *bCharacters++) + return false; + } + + if (length & 1 && *reinterpret_cast(aCharacters) != *reinterpret_cast(bCharacters)) + return false; + + return true; +} +#else +ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) +{ + for (unsigned i = 0; i != length; ++i) { + if (a[i] != b[i]) + return false; + } + + return true; +} + +ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) +{ + for (unsigned i = 0; i != length; ++i) { + if (a[i] != b[i]) + return false; + } + + return true; +} +#endif + +ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) +{ + for (unsigned i = 0; i != length; ++i) { + if (a[i] != b[i]) + return false; + } + + return true; +} + +ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) +{ + for (unsigned i = 0; i != length; ++i) { + if (a[i] != b[i]) + return false; + } + + return true; +} + +bool equalIgnoringCase(StringImpl*, StringImpl*); +bool equalIgnoringCase(StringImpl*, const LChar*); +inline bool equalIgnoringCase(const LChar* a, StringImpl* b) { return equalIgnoringCase(b, a); } +bool equalIgnoringCase(const UChar*, const LChar*, unsigned); +inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast(b), length); } +inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } +inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast(a), length); } + +bool equalIgnoringNullity(StringImpl*, StringImpl*); + +template +bool equalIgnoringNullity(const Vector& a, StringImpl* b) +{ + if (!b) + return !a.size(); + if (a.size() != b->length()) + return false; + return !memcmp(a.data(), b->characters(), b->length()); +} + +int codePointCompare(const StringImpl*, const StringImpl*); + +static inline bool isSpaceOrNewline(UChar c) +{ + // Use isASCIISpace() for basic Latin-1. + // This will include newlines, which aren't included in Unicode DirWS. + return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral; +} + +inline PassRefPtr StringImpl::isolatedCopy() const +{ + if (is8Bit()) + return create(m_data8, m_length); + return create(m_data16, m_length); +} + +struct StringHash; + +// StringHash is the default hash for StringImpl* and RefPtr +template struct DefaultHash; +template<> struct DefaultHash { + typedef StringHash Hash; +}; +template<> struct DefaultHash > { + typedef StringHash Hash; +}; + +} + +using WTF::StringImpl; +using WTF::equal; +using WTF::TextCaseSensitivity; +using WTF::TextCaseSensitive; +using WTF::TextCaseInsensitive; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/StringOperators.h b/Source/JavaScriptCore/wtf/text/StringOperators.h new file mode 100644 index 000000000..9e1637be1 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringOperators.h @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. + * Copyright (C) Research In Motion Limited 2011. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef StringOperators_h +#define StringOperators_h + +namespace WTF { + +template +class StringAppend { +public: + StringAppend(StringType1 string1, StringType2 string2) + : m_string1(string1) + , m_string2(string2) + { + } + + operator String() const + { + RefPtr resultImpl = tryMakeString(m_string1, m_string2); + if (!resultImpl) + CRASH(); + return resultImpl.release(); + } + + operator AtomicString() const + { + return operator String(); + } + + bool is8Bit() + { + StringTypeAdapter adapter1(m_string1); + StringTypeAdapter adapter2(m_string2); + return adapter1.is8Bit() && adapter2.is8Bit(); + } + + void writeTo(LChar* destination) + { + ASSERT(is8Bit()); + StringTypeAdapter adapter1(m_string1); + StringTypeAdapter adapter2(m_string2); + adapter1.writeTo(destination); + adapter2.writeTo(destination + adapter1.length()); + } + + void writeTo(UChar* destination) + { + StringTypeAdapter adapter1(m_string1); + StringTypeAdapter adapter2(m_string2); + adapter1.writeTo(destination); + adapter2.writeTo(destination + adapter1.length()); + } + + unsigned length() + { + StringTypeAdapter adapter1(m_string1); + StringTypeAdapter adapter2(m_string2); + return adapter1.length() + adapter2.length(); + } + +private: + StringType1 m_string1; + StringType2 m_string2; +}; + +template +class StringTypeAdapter > { +public: + StringTypeAdapter >(StringAppend& buffer) + : m_buffer(buffer) + { + } + + unsigned length() { return m_buffer.length(); } + + bool is8Bit() { return m_buffer.is8Bit(); } + + void writeTo(LChar* destination) { m_buffer.writeTo(destination); } + void writeTo(UChar* destination) { m_buffer.writeTo(destination); } + +private: + StringAppend& m_buffer; +}; + +inline StringAppend operator+(const char* string1, const String& string2) +{ + return StringAppend(string1, string2); +} + +inline StringAppend operator+(const char* string1, const AtomicString& string2) +{ + return StringAppend(string1, string2); +} + +template +StringAppend > operator+(const char* string1, const StringAppend& string2) +{ + return StringAppend >(string1, string2); +} + +inline StringAppend operator+(const UChar* string1, const String& string2) +{ + return StringAppend(string1, string2); +} + +inline StringAppend operator+(const UChar* string1, const AtomicString& string2) +{ + return StringAppend(string1, string2); +} + +template +StringAppend > operator+(const UChar* string1, const StringAppend& string2) +{ + return StringAppend >(string1, string2); +} + +template +StringAppend operator+(const String& string1, T string2) +{ + return StringAppend(string1, string2); +} + +template +StringAppend, W> operator+(const StringAppend& string1, W string2) +{ + return StringAppend, W>(string1, string2); +} + +} // namespace WTF + +#endif // StringOperators_h diff --git a/Source/JavaScriptCore/wtf/text/StringStatics.cpp b/Source/JavaScriptCore/wtf/text/StringStatics.cpp new file mode 100644 index 000000000..1a80f6d48 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringStatics.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2010 Apple Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#ifdef SKIP_STATIC_CONSTRUCTORS_ON_GCC +#define ATOMICSTRING_HIDE_GLOBALS 1 +#endif + +#include "AtomicString.h" +#include "DynamicAnnotations.h" +#include "MainThread.h" +#include "StaticConstructors.h" +#include "StringImpl.h" + +namespace WTF { + +StringImpl* StringImpl::empty() +{ + // FIXME: This works around a bug in our port of PCRE, that a regular expression + // run on the empty string may still perform a read from the first element, and + // as such we need this to be a valid pointer. No code should ever be reading + // from a zero length string, so this should be able to be a non-null pointer + // into the zero-page. + // Replace this with 'reinterpret_cast(static_cast(1))' once + // PCRE goes away. + static LChar emptyLCharData = 0; + DEFINE_STATIC_LOCAL(StringImpl, emptyString, (&emptyLCharData, 0, ConstructStaticString)); + WTF_ANNOTATE_BENIGN_RACE(&emptyString, "Benign race on StringImpl::emptyString reference counter"); + return &emptyString; +} + +WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, nullAtom) +WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, emptyAtom, "") +WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, textAtom, "#text") +WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, commentAtom, "#comment") +WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, starAtom, "*") +WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlAtom, "xml") +WTF_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlnsAtom, "xmlns") + +NEVER_INLINE unsigned StringImpl::hashSlowCase() const +{ + if (is8Bit()) + setHash(StringHasher::computeHash(m_data8, m_length)); + else + setHash(StringHasher::computeHash(m_data16, m_length)); + return existingHash(); +} + +void AtomicString::init() +{ + static bool initialized; + if (!initialized) { + // Initialization is not thread safe, so this function must be called from the main thread first. + ASSERT(isMainThread()); + + // Use placement new to initialize the globals. + new (NotNull, (void*)&nullAtom) AtomicString; + new (NotNull, (void*)&emptyAtom) AtomicString(""); + new (NotNull, (void*)&textAtom) AtomicString("#text"); + new (NotNull, (void*)&commentAtom) AtomicString("#comment"); + new (NotNull, (void*)&starAtom) AtomicString("*"); + new (NotNull, (void*)&xmlAtom) AtomicString("xml"); + new (NotNull, (void*)&xmlnsAtom) AtomicString("xmlns"); + + initialized = true; + } +} + +} diff --git a/Source/JavaScriptCore/wtf/text/TextPosition.h b/Source/JavaScriptCore/wtf/text/TextPosition.h new file mode 100644 index 000000000..be49c157a --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/TextPosition.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2010, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TextPosition_h +#define TextPosition_h + +#include + +namespace WTF { + +// An abstract number of element in a sequence. The sequence has a first element. +// This type should be used instead of integer because 2 contradicting traditions can +// call a first element '0' or '1' which makes integer type ambiguous. +class OrdinalNumber { +public: + static OrdinalNumber fromZeroBasedInt(int zeroBasedInt) { return OrdinalNumber(zeroBasedInt); } + static OrdinalNumber fromOneBasedInt(int oneBasedInt) { return OrdinalNumber(oneBasedInt - 1); } + OrdinalNumber() : m_zeroBasedValue(0) { } + + int zeroBasedInt() const { return m_zeroBasedValue; } + int oneBasedInt() const { return m_zeroBasedValue + 1; } + + bool operator==(OrdinalNumber other) { return m_zeroBasedValue == other.m_zeroBasedValue; } + bool operator!=(OrdinalNumber other) { return !((*this) == other); } + + static OrdinalNumber first() { return OrdinalNumber(0); } + static OrdinalNumber beforeFirst() { return OrdinalNumber(-1); } + +private: + OrdinalNumber(int zeroBasedInt) : m_zeroBasedValue(zeroBasedInt) { } + int m_zeroBasedValue; +}; + + +// TextPosition structure specifies coordinates within an text resource. It is used mostly +// for saving script source position. +class TextPosition { +public: + TextPosition(OrdinalNumber line, OrdinalNumber column) + : m_line(line) + , m_column(column) + { + } + TextPosition() { } + bool operator==(const TextPosition& other) { return m_line == other.m_line && m_column == other.m_column; } + bool operator!=(const TextPosition& other) { return !((*this) == other); } + + // A 'minimum' value of position, used as a default value. + static TextPosition minimumPosition() { return TextPosition(OrdinalNumber::first(), OrdinalNumber::first()); } + + // A value with line value less than a minimum; used as an impossible position. + static TextPosition belowRangePosition() { return TextPosition(OrdinalNumber::beforeFirst(), OrdinalNumber::beforeFirst()); } + + OrdinalNumber m_line; + OrdinalNumber m_column; +}; + +} + +using WTF::OrdinalNumber; + +using WTF::TextPosition; + +#endif // TextPosition_h diff --git a/Source/JavaScriptCore/wtf/text/WTFString.cpp b/Source/JavaScriptCore/wtf/text/WTFString.cpp new file mode 100644 index 000000000..4c42ed6a2 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/WTFString.cpp @@ -0,0 +1,1126 @@ +/* + * (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserved. + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "WTFString.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +namespace WTF { + +using namespace Unicode; +using namespace std; + +// Construct a string with UTF-16 data. +String::String(const UChar* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) +{ +} + +// Construct a string with UTF-16 data, from a null-terminated source. +String::String(const UChar* str) +{ + if (!str) + return; + + size_t len = 0; + while (str[len] != UChar(0)) + len++; + + if (len > numeric_limits::max()) + CRASH(); + + m_impl = StringImpl::create(str, len); +} + +// Construct a string with latin1 data. +String::String(const LChar* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) +{ +} + +String::String(const char* characters, unsigned length) + : m_impl(characters ? StringImpl::create(reinterpret_cast(characters), length) : 0) +{ +} + +// Construct a string with latin1 data, from a null-terminated source. +String::String(const LChar* characters) + : m_impl(characters ? StringImpl::create(characters) : 0) +{ +} + +String::String(const char* characters) + : m_impl(characters ? StringImpl::create(reinterpret_cast(characters)) : 0) +{ +} + +void String::append(const String& str) +{ + if (str.isEmpty()) + return; + + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (str.m_impl) { + if (m_impl) { + UChar* data; + if (str.length() > numeric_limits::max() - m_impl->length()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar)); + m_impl = newImpl.release(); + } else + m_impl = str.m_impl; + } +} + +void String::append(LChar c) +{ + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (m_impl) { + UChar* data; + if (m_impl->length() >= numeric_limits::max()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + data[m_impl->length()] = c; + m_impl = newImpl.release(); + } else + m_impl = StringImpl::create(&c, 1); +} + +void String::append(UChar c) +{ + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (m_impl) { + UChar* data; + if (m_impl->length() >= numeric_limits::max()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + data[m_impl->length()] = c; + m_impl = newImpl.release(); + } else + m_impl = StringImpl::create(&c, 1); +} + +int codePointCompare(const String& a, const String& b) +{ + return codePointCompare(a.impl(), b.impl()); +} + +void String::insert(const String& str, unsigned pos) +{ + if (str.isEmpty()) { + if (str.isNull()) + return; + if (isNull()) + m_impl = str.impl(); + return; + } + insert(str.characters(), str.length(), pos); +} + +void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) +{ + if (!m_impl) { + if (!charactersToAppend) + return; + m_impl = StringImpl::create(charactersToAppend, lengthToAppend); + return; + } + + if (!lengthToAppend) + return; + + ASSERT(charactersToAppend); + UChar* data; + if (lengthToAppend > numeric_limits::max() - length()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data); + memcpy(data, characters(), length() * sizeof(UChar)); + memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar)); + m_impl = newImpl.release(); +} + +void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position) +{ + if (position >= length()) { + append(charactersToInsert, lengthToInsert); + return; + } + + ASSERT(m_impl); + + if (!lengthToInsert) + return; + + ASSERT(charactersToInsert); + UChar* data; + if (lengthToInsert > numeric_limits::max() - length()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data); + memcpy(data, characters(), position * sizeof(UChar)); + memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); + memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar)); + m_impl = newImpl.release(); +} + +UChar32 String::characterStartingAt(unsigned i) const +{ + if (!m_impl || i >= m_impl->length()) + return 0; + return m_impl->characterStartingAt(i); +} + +void String::truncate(unsigned position) +{ + if (position >= length()) + return; + UChar* data; + RefPtr newImpl = StringImpl::createUninitialized(position, data); + memcpy(data, characters(), position * sizeof(UChar)); + m_impl = newImpl.release(); +} + +void String::remove(unsigned position, int lengthToRemove) +{ + if (lengthToRemove <= 0) + return; + if (position >= length()) + return; + if (static_cast(lengthToRemove) > length() - position) + lengthToRemove = length() - position; + UChar* data; + RefPtr newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data); + memcpy(data, characters(), position * sizeof(UChar)); + memcpy(data + position, characters() + position + lengthToRemove, + (length() - lengthToRemove - position) * sizeof(UChar)); + m_impl = newImpl.release(); +} + +String String::substring(unsigned pos, unsigned len) const +{ + if (!m_impl) + return String(); + return m_impl->substring(pos, len); +} + +String String::substringSharingImpl(unsigned offset, unsigned length) const +{ + // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar). + + unsigned stringLength = this->length(); + offset = min(offset, stringLength); + length = min(length, stringLength - offset); + + if (!offset && length == stringLength) + return *this; + return String(StringImpl::create(m_impl, offset, length)); +} + +String String::lower() const +{ + if (!m_impl) + return String(); + return m_impl->lower(); +} + +String String::upper() const +{ + if (!m_impl) + return String(); + return m_impl->upper(); +} + +String String::stripWhiteSpace() const +{ + if (!m_impl) + return String(); + return m_impl->stripWhiteSpace(); +} + +String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const +{ + if (!m_impl) + return String(); + return m_impl->stripWhiteSpace(isWhiteSpace); +} + +String String::simplifyWhiteSpace() const +{ + if (!m_impl) + return String(); + return m_impl->simplifyWhiteSpace(); +} + +String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const +{ + if (!m_impl) + return String(); + return m_impl->simplifyWhiteSpace(isWhiteSpace); +} + +String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const +{ + if (!m_impl) + return String(); + return m_impl->removeCharacters(findMatch); +} + +String String::foldCase() const +{ + if (!m_impl) + return String(); + return m_impl->foldCase(); +} + +bool String::percentage(int& result) const +{ + if (!m_impl || !m_impl->length()) + return false; + + if ((*m_impl)[m_impl->length() - 1] != '%') + return false; + + result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1); + return true; +} + +const UChar* String::charactersWithNullTermination() +{ + if (!m_impl) + return 0; + if (m_impl->hasTerminatingNullCharacter()) + return m_impl->characters(); + m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl); + return m_impl->characters(); +} + +String String::format(const char *format, ...) +{ +#if PLATFORM(QT) + // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf. + // https://bugs.webkit.org/show_bug.cgi?id=18994 + va_list args; + va_start(args, format); + + QString buffer; + buffer.vsprintf(format, args); + + va_end(args); + + QByteArray ba = buffer.toUtf8(); + return StringImpl::create(reinterpret_cast(ba.constData()), ba.length()); + +#elif OS(WINCE) + va_list args; + va_start(args, format); + + Vector buffer; + + int bufferSize = 256; + buffer.resize(bufferSize); + for (;;) { + int written = vsnprintf(buffer.data(), bufferSize, format, args); + va_end(args); + + if (written == 0) + return String(""); + if (written > 0) + return StringImpl::create(reinterpret_cast(buffer.data()), written); + + bufferSize <<= 1; + buffer.resize(bufferSize); + va_start(args, format); + } + +#else + va_list args; + va_start(args, format); + + Vector buffer; + + // Do the format once to get the length. +#if COMPILER(MSVC) + int result = _vscprintf(format, args); +#else + char ch; + int result = vsnprintf(&ch, 1, format, args); + // We need to call va_end() and then va_start() again here, as the + // contents of args is undefined after the call to vsnprintf + // according to http://man.cx/snprintf(3) + // + // Not calling va_end/va_start here happens to work on lots of + // systems, but fails e.g. on 64bit Linux. + va_end(args); + va_start(args, format); +#endif + + if (result == 0) + return String(""); + if (result < 0) + return String(); + unsigned len = result; + buffer.grow(len + 1); + + // Now do the formatting again, guaranteed to fit. + vsnprintf(buffer.data(), buffer.size(), format, args); + + va_end(args); + + return StringImpl::create(reinterpret_cast(buffer.data()), len); +#endif +} + +String String::number(short n) +{ + return String::format("%hd", n); +} + +String String::number(unsigned short n) +{ + return String::format("%hu", n); +} + +String String::number(int n) +{ + return String::format("%d", n); +} + +String String::number(unsigned n) +{ + return String::format("%u", n); +} + +String String::number(long n) +{ + return String::format("%ld", n); +} + +String String::number(unsigned long n) +{ + return String::format("%lu", n); +} + +String String::number(long long n) +{ +#if OS(WINDOWS) && !PLATFORM(QT) + return String::format("%I64i", n); +#else + return String::format("%lli", n); +#endif +} + +String String::number(unsigned long long n) +{ +#if OS(WINDOWS) && !PLATFORM(QT) + return String::format("%I64u", n); +#else + return String::format("%llu", n); +#endif +} + +String String::number(double number, unsigned flags, unsigned precision) +{ + NumberToStringBuffer buffer; + + // Mimic String::format("%.[precision]g", ...), but use dtoas rounding facilities. + if (flags & ShouldRoundSignificantFigures) + return String(numberToFixedPrecisionString(number, precision, buffer, flags & ShouldTruncateTrailingZeros)); + + // Mimic String::format("%.[precision]f", ...), but use dtoas rounding facilities. + return String(numberToFixedWidthString(number, precision, buffer)); +} + +int String::toIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntStrict(ok, base); +} + +unsigned String::toUIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUIntStrict(ok, base); +} + +int64_t String::toInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt64Strict(ok, base); +} + +uint64_t String::toUInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt64Strict(ok, base); +} + +intptr_t String::toIntPtrStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntPtrStrict(ok, base); +} + + +int String::toInt(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt(ok); +} + +unsigned String::toUInt(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt(ok); +} + +int64_t String::toInt64(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt64(ok); +} + +uint64_t String::toUInt64(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt64(ok); +} + +intptr_t String::toIntPtr(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntPtr(ok); +} + +double String::toDouble(bool* ok, bool* didReadNumber) const +{ + if (!m_impl) { + if (ok) + *ok = false; + if (didReadNumber) + *didReadNumber = false; + return 0.0; + } + return m_impl->toDouble(ok, didReadNumber); +} + +float String::toFloat(bool* ok, bool* didReadNumber) const +{ + if (!m_impl) { + if (ok) + *ok = false; + if (didReadNumber) + *didReadNumber = false; + return 0.0f; + } + return m_impl->toFloat(ok, didReadNumber); +} + +String String::isolatedCopy() const +{ + if (!m_impl) + return String(); + return m_impl->isolatedCopy(); +} + +void String::split(const String& separator, bool allowEmptyEntries, Vector& result) const +{ + result.clear(); + + unsigned startPos = 0; + size_t endPos; + while ((endPos = find(separator, startPos)) != notFound) { + if (allowEmptyEntries || startPos != endPos) + result.append(substring(startPos, endPos - startPos)); + startPos = endPos + separator.length(); + } + if (allowEmptyEntries || startPos != length()) + result.append(substring(startPos)); +} + +void String::split(const String& separator, Vector& result) const +{ + split(separator, false, result); +} + +void String::split(UChar separator, bool allowEmptyEntries, Vector& result) const +{ + result.clear(); + + unsigned startPos = 0; + size_t endPos; + while ((endPos = find(separator, startPos)) != notFound) { + if (allowEmptyEntries || startPos != endPos) + result.append(substring(startPos, endPos - startPos)); + startPos = endPos + 1; + } + if (allowEmptyEntries || startPos != length()) + result.append(substring(startPos)); +} + +void String::split(UChar separator, Vector& result) const +{ + split(String(&separator, 1), false, result); +} + +CString String::ascii() const +{ + // Printable ASCII characters 32..127 and the null character are + // preserved, characters outside of this range are converted to '?'. + + unsigned length = this->length(); + + if (!length) { + char* characterBuffer; + return CString::newUninitialized(length, characterBuffer); + } + + if (this->is8Bit()) { + const LChar* characters = this->characters8(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + LChar ch = characters[i]; + characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; + } + + return result; + } + + const UChar* characters = this->characters16(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; + } + + return result; +} + +CString String::latin1() const +{ + // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are + // preserved, characters outside of this range are converted to '?'. + + unsigned length = this->length(); + const UChar* characters = this->characters(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch > 0xff ? '?' : ch; + } + + return result; +} + +// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available. +static inline void putUTF8Triple(char*& buffer, UChar ch) +{ + ASSERT(ch >= 0x0800); + *buffer++ = static_cast(((ch >> 12) & 0x0F) | 0xE0); + *buffer++ = static_cast(((ch >> 6) & 0x3F) | 0x80); + *buffer++ = static_cast((ch & 0x3F) | 0x80); +} + +CString String::utf8(bool strict) const +{ + unsigned length = this->length(); + + if (!length) + return CString("", 0); + + // Allocate a buffer big enough to hold all the characters + // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). + // Optimization ideas, if we find this function is hot: + // * We could speculatively create a CStringBuffer to contain 'length' + // characters, and resize if necessary (i.e. if the buffer contains + // non-ascii characters). (Alternatively, scan the buffer first for + // ascii characters, so we know this will be sufficient). + // * We could allocate a CStringBuffer with an appropriate size to + // have a good chance of being able to write the string into the + // buffer without reallocing (say, 1.5 x length). + if (length > numeric_limits::max() / 3) + return CString(); + Vector bufferVector(length * 3); + + char* buffer = bufferVector.data(); + + if (is8Bit()) { + const LChar* characters = this->characters8(); + + ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size()); + ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion + } else { + const UChar* characters = this->characters16(); + + ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict); + ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion + + // Only produced from strict conversion. + if (result == sourceIllegal) + return CString(); + + // Check for an unconverted high surrogate. + if (result == sourceExhausted) { + if (strict) + return CString(); + // This should be one unpaired high surrogate. Treat it the same + // was as an unpaired high surrogate would have been handled in + // the middle of a string with non-strict conversion - which is + // to say, simply encode it to UTF-8. + ASSERT((characters + 1) == (this->characters() + length)); + ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); + // There should be room left, since one UChar hasn't been converted. + ASSERT((buffer + 3) <= (buffer + bufferVector.size())); + putUTF8Triple(buffer, *characters); + } + } + + return CString(bufferVector.data(), buffer - bufferVector.data()); +} + +String String::fromUTF8(const LChar* stringStart, size_t length) +{ + if (length > numeric_limits::max()) + CRASH(); + + if (!stringStart) + return String(); + + // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be + // the right length, if there are any multi-byte sequences this buffer will be too large. + UChar* buffer; + String stringBuffer(StringImpl::createUninitialized(length, buffer)); + UChar* bufferEnd = buffer + length; + + // Try converting into the buffer. + const char* stringCurrent = reinterpret_cast(stringStart); + if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast(stringStart + length), &buffer, bufferEnd) != conversionOK) + return String(); + + // stringBuffer is full (the input must have been all ascii) so just return it! + if (buffer == bufferEnd) + return stringBuffer; + + // stringBuffer served its purpose as a buffer, copy the contents out into a new string. + unsigned utf16Length = buffer - stringBuffer.characters(); + ASSERT(utf16Length < length); + return String(stringBuffer.characters(), utf16Length); +} + +String String::fromUTF8(const LChar* string) +{ + if (!string) + return String(); + return fromUTF8(string, strlen(reinterpret_cast(string))); +} + +String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size) +{ + String utf8 = fromUTF8(string, size); + if (!utf8) + return String(string, size); + return utf8; +} + +// String Operations + +static bool isCharacterAllowedInBase(UChar c, int base) +{ + if (c > 0x7F) + return false; + if (isASCIIDigit(c)) + return c - '0' < base; + if (isASCIIAlpha(c)) { + if (base > 36) + base = 36; + return (c >= 'a' && c < 'a' + base - 10) + || (c >= 'A' && c < 'A' + base - 10); + } + return false; +} + +template +static inline IntegralType toIntegralType(const CharType* data, size_t length, bool* ok, int base) +{ + static const IntegralType integralMax = numeric_limits::max(); + static const bool isSigned = numeric_limits::is_signed; + const IntegralType maxMultiplier = integralMax / base; + + IntegralType value = 0; + bool isOk = false; + bool isNegative = false; + + if (!data) + goto bye; + + // skip leading whitespace + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (isSigned && length && *data == '-') { + length--; + data++; + isNegative = true; + } else if (length && *data == '+') { + length--; + data++; + } + + if (!length || !isCharacterAllowedInBase(*data, base)) + goto bye; + + while (length && isCharacterAllowedInBase(*data, base)) { + length--; + IntegralType digitValue; + CharType c = *data; + if (isASCIIDigit(c)) + digitValue = c - '0'; + else if (c >= 'a') + digitValue = c - 'a' + 10; + else + digitValue = c - 'A' + 10; + + if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) + goto bye; + + value = base * value + digitValue; + data++; + } + +#if COMPILER(MSVC) +#pragma warning(push, 0) +#pragma warning(disable:4146) +#endif + + if (isNegative) + value = -value; + +#if COMPILER(MSVC) +#pragma warning(pop) +#endif + + // skip trailing space + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (!length) + isOk = true; +bye: + if (ok) + *ok = isOk; + return isOk ? value : 0; +} + +template +static unsigned lengthOfCharactersAsInteger(const CharType* data, size_t length) +{ + size_t i = 0; + + // Allow leading spaces. + for (; i != length; ++i) { + if (!isSpaceOrNewline(data[i])) + break; + } + + // Allow sign. + if (i != length && (data[i] == '+' || data[i] == '-')) + ++i; + + // Allow digits. + for (; i != length; ++i) { + if (!isASCIIDigit(data[i])) + break; + } + + return i; +} + +int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType(data, length, ok, base); +} + +int charactersToInt(const LChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +int charactersToInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +unsigned charactersToUInt(const LChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +int64_t charactersToInt64(const LChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +template +static inline double toDoubleType(const CharType* data, size_t length, bool* ok, bool* didReadNumber) +{ + if (!length) { + if (ok) + *ok = false; + if (didReadNumber) + *didReadNumber = false; + return 0.0; + } + + Vector bytes(length + 1); + for (unsigned i = 0; i < length; ++i) + bytes[i] = data[i] < 0x7F ? data[i] : '?'; + bytes[length] = '\0'; + char* start = bytes.data(); + char* end; + double val = WTF::strtod(start, &end); + if (ok) + *ok = (end == 0 || *end == '\0'); + if (didReadNumber) + *didReadNumber = end - start; + return val; +} + +double charactersToDouble(const LChar* data, size_t length, bool* ok, bool* didReadNumber) +{ + return toDoubleType(data, length, ok, didReadNumber); +} + +double charactersToDouble(const UChar* data, size_t length, bool* ok, bool* didReadNumber) +{ + return toDoubleType(data, length, ok, didReadNumber); +} + +float charactersToFloat(const LChar* data, size_t length, bool* ok, bool* didReadNumber) +{ + // FIXME: This will return ok even when the string fits into a double but not a float. + return static_cast(toDoubleType(data, length, ok, didReadNumber)); +} + +float charactersToFloat(const UChar* data, size_t length, bool* ok, bool* didReadNumber) +{ + // FIXME: This will return ok even when the string fits into a double but not a float. + return static_cast(toDoubleType(data, length, ok, didReadNumber)); +} + +const String& emptyString() +{ + DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty())); + return emptyString; +} + +} // namespace WTF + +#ifndef NDEBUG +// For use in the debugger +String* string(const char*); +Vector asciiDebug(StringImpl* impl); +Vector asciiDebug(String& string); + +void String::show() const +{ + fprintf(stderr, "%s\n", asciiDebug(impl()).data()); +} + +String* string(const char* s) +{ + // leaks memory! + return new String(s); +} + +Vector asciiDebug(StringImpl* impl) +{ + if (!impl) + return asciiDebug(String("[null]").impl()); + + Vector buffer; + unsigned length = impl->length(); + const UChar* characters = impl->characters(); + + buffer.resize(length + 1); + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + buffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; + } + buffer[length] = '\0'; + + return buffer; +} + +Vector asciiDebug(String& string) +{ + return asciiDebug(string.impl()); +} + +#endif diff --git a/Source/JavaScriptCore/wtf/text/WTFString.h b/Source/JavaScriptCore/wtf/text/WTFString.h new file mode 100644 index 000000000..3cecc0afd --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/WTFString.h @@ -0,0 +1,648 @@ +/* + * (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef WTFString_h +#define WTFString_h + +// This file would be called String.h, but that conflicts with +// on systems without case-sensitive file systems. + +#include "ASCIIFastPath.h" +#include "StringImpl.h" + +#ifdef __OBJC__ +#include +#endif + +#if USE(CF) +typedef const struct __CFString * CFStringRef; +#endif + +#if PLATFORM(QT) +QT_BEGIN_NAMESPACE +class QString; +QT_END_NAMESPACE +#include +#endif + +#if PLATFORM(WX) +class wxString; +#endif + +#if PLATFORM(BLACKBERRY) +namespace BlackBerry { +namespace WebKit { + class WebString; +} +} +#endif + +namespace WTF { + +class CString; +struct StringHash; + +// Declarations of string operations + +WTF_EXPORT_PRIVATE int charactersToIntStrict(const LChar*, size_t, bool* ok = 0, int base = 10); +WTF_EXPORT_PRIVATE int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); +WTF_EXPORT_PRIVATE unsigned charactersToUIntStrict(const LChar*, size_t, bool* ok = 0, int base = 10); +WTF_EXPORT_PRIVATE unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); +int64_t charactersToInt64Strict(const LChar*, size_t, bool* ok = 0, int base = 10); +int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); +uint64_t charactersToUInt64Strict(const LChar*, size_t, bool* ok = 0, int base = 10); +uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); +intptr_t charactersToIntPtrStrict(const LChar*, size_t, bool* ok = 0, int base = 10); +intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10); + +int charactersToInt(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage +int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +unsigned charactersToUInt(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage +unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +int64_t charactersToInt64(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage +int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +uint64_t charactersToUInt64(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage +uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +intptr_t charactersToIntPtr(const LChar*, size_t, bool* ok = 0); // ignores trailing garbage +intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage + +WTF_EXPORT_PRIVATE double charactersToDouble(const LChar*, size_t, bool* ok = 0, bool* didReadNumber = 0); +WTF_EXPORT_PRIVATE double charactersToDouble(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0); +float charactersToFloat(const LChar*, size_t, bool* ok = 0, bool* didReadNumber = 0); +float charactersToFloat(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0); + +enum FloatConversionFlags { + ShouldRoundSignificantFigures = 1 << 0, + ShouldRoundDecimalPlaces = 1 << 1, + ShouldTruncateTrailingZeros = 1 << 2 +}; + +template bool isAllSpecialCharacters(const UChar*, size_t); + +class String { +public: + // Construct a null string, distinguishable from an empty string. + String() { } + + // Construct a string with UTF-16 data. + WTF_EXPORT_PRIVATE String(const UChar* characters, unsigned length); + + // Construct a string by copying the contents of a vector. To avoid + // copying, consider using String::adopt instead. + template + explicit String(const Vector&); + + // Construct a string with UTF-16 data, from a null-terminated source. + WTF_EXPORT_PRIVATE String(const UChar*); + + // Construct a string with latin1 data. + WTF_EXPORT_PRIVATE String(const LChar* characters, unsigned length); + WTF_EXPORT_PRIVATE String(const char* characters, unsigned length); + + // Construct a string with latin1 data, from a null-terminated source. + WTF_EXPORT_PRIVATE String(const LChar* characters); + WTF_EXPORT_PRIVATE String(const char* characters); + + // Construct a string referencing an existing StringImpl. + String(StringImpl* impl) : m_impl(impl) { } + String(PassRefPtr impl) : m_impl(impl) { } + String(RefPtr impl) : m_impl(impl) { } + + // Inline the destructor. + ALWAYS_INLINE ~String() { } + + void swap(String& o) { m_impl.swap(o.m_impl); } + + static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); } + static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); } + template + static String adopt(Vector& vector) { return StringImpl::adopt(vector); } + + bool isNull() const { return !m_impl; } + bool isEmpty() const { return !m_impl || !m_impl->length(); } + + StringImpl* impl() const { return m_impl.get(); } + + unsigned length() const + { + if (!m_impl) + return 0; + return m_impl->length(); + } + + const UChar* characters() const + { + if (!m_impl) + return 0; + return m_impl->characters(); + } + + const LChar* characters8() const + { + if (!m_impl) + return 0; + ASSERT(m_impl->is8Bit()); + return m_impl->characters8(); + } + + const UChar* characters16() const + { + if (!m_impl) + return 0; + ASSERT(!m_impl->is8Bit()); + return m_impl->characters16(); + } + + template + inline const CharType* getCharacters() const; + + bool is8Bit() const { return m_impl->is8Bit(); } + + WTF_EXPORT_PRIVATE CString ascii() const; + WTF_EXPORT_PRIVATE CString latin1() const; + WTF_EXPORT_PRIVATE CString utf8(bool strict = false) const; + + UChar operator[](unsigned index) const + { + if (!m_impl || index >= m_impl->length()) + return 0; + return m_impl->characters()[index]; + } + + WTF_EXPORT_PRIVATE static String number(short); + WTF_EXPORT_PRIVATE static String number(unsigned short); + WTF_EXPORT_PRIVATE static String number(int); + WTF_EXPORT_PRIVATE static String number(unsigned); + WTF_EXPORT_PRIVATE static String number(long); + WTF_EXPORT_PRIVATE static String number(unsigned long); + WTF_EXPORT_PRIVATE static String number(long long); + WTF_EXPORT_PRIVATE static String number(unsigned long long); + WTF_EXPORT_PRIVATE static String number(double, unsigned = ShouldRoundSignificantFigures | ShouldTruncateTrailingZeros, unsigned precision = 6); + + // Find a single character or string, also with match function & latin1 forms. + size_t find(UChar c, unsigned start = 0) const + { return m_impl ? m_impl->find(c, start) : notFound; } + size_t find(const String& str, unsigned start = 0) const + { return m_impl ? m_impl->find(str.impl(), start) : notFound; } + size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const + { return m_impl ? m_impl->find(matchFunction, start) : notFound; } + size_t find(const LChar* str, unsigned start = 0) const + { return m_impl ? m_impl->find(str, start) : notFound; } + + // Find the last instance of a single character or string. + size_t reverseFind(UChar c, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFind(c, start) : notFound; } + size_t reverseFind(const String& str, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; } + + // Case insensitive string matching. + size_t findIgnoringCase(const LChar* str, unsigned start = 0) const + { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; } + size_t findIgnoringCase(const String& str, unsigned start = 0) const + { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; } + size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; } + + // Wrappers for find & reverseFind adding dynamic sensitivity check. + size_t find(const LChar* str, unsigned start, bool caseSensitive) const + { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } + size_t find(const String& str, unsigned start, bool caseSensitive) const + { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } + size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const + { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); } + + WTF_EXPORT_PRIVATE const UChar* charactersWithNullTermination(); + + WTF_EXPORT_PRIVATE UChar32 characterStartingAt(unsigned) const; // Ditto. + + bool contains(UChar c) const { return find(c) != notFound; } + bool contains(const LChar* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } + bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } + + bool startsWith(const String& s, bool caseSensitive = true) const + { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); } + bool endsWith(const String& s, bool caseSensitive = true) const + { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); } + + WTF_EXPORT_PRIVATE void append(const String&); + WTF_EXPORT_PRIVATE void append(LChar); + void append(char c) { append(static_cast(c)); }; + WTF_EXPORT_PRIVATE void append(UChar); + WTF_EXPORT_PRIVATE void append(const UChar*, unsigned length); + WTF_EXPORT_PRIVATE void insert(const String&, unsigned pos); + void insert(const UChar*, unsigned length, unsigned pos); + + String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; } + String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; } + String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; } + String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; } + + void makeLower() { if (m_impl) m_impl = m_impl->lower(); } + void makeUpper() { if (m_impl) m_impl = m_impl->upper(); } + void fill(UChar c) { if (m_impl) m_impl = m_impl->fill(c); } + + WTF_EXPORT_PRIVATE void truncate(unsigned len); + WTF_EXPORT_PRIVATE void remove(unsigned pos, int len = 1); + + WTF_EXPORT_PRIVATE String substring(unsigned pos, unsigned len = UINT_MAX) const; + String substringSharingImpl(unsigned pos, unsigned len = UINT_MAX) const; + String left(unsigned len) const { return substring(0, len); } + String right(unsigned len) const { return substring(length() - len, len); } + + // Returns a lowercase/uppercase version of the string + WTF_EXPORT_PRIVATE String lower() const; + WTF_EXPORT_PRIVATE String upper() const; + + WTF_EXPORT_PRIVATE String stripWhiteSpace() const; + WTF_EXPORT_PRIVATE String stripWhiteSpace(IsWhiteSpaceFunctionPtr) const; + WTF_EXPORT_PRIVATE String simplifyWhiteSpace() const; + WTF_EXPORT_PRIVATE String simplifyWhiteSpace(IsWhiteSpaceFunctionPtr) const; + + WTF_EXPORT_PRIVATE String removeCharacters(CharacterMatchFunctionPtr) const; + template bool isAllSpecialCharacters() const; + + // Return the string with case folded for case insensitive comparison. + WTF_EXPORT_PRIVATE String foldCase() const; + +#if !PLATFORM(QT) + WTF_EXPORT_PRIVATE static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2); +#else + WTF_EXPORT_PRIVATE static String format(const char *, ...); +#endif + + // Returns an uninitialized string. The characters needs to be written + // into the buffer returned in data before the returned string is used. + // Failure to do this will have unpredictable results. + static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); } + + WTF_EXPORT_PRIVATE void split(const String& separator, Vector& result) const; + WTF_EXPORT_PRIVATE void split(const String& separator, bool allowEmptyEntries, Vector& result) const; + WTF_EXPORT_PRIVATE void split(UChar separator, Vector& result) const; + WTF_EXPORT_PRIVATE void split(UChar separator, bool allowEmptyEntries, Vector& result) const; + + WTF_EXPORT_PRIVATE int toIntStrict(bool* ok = 0, int base = 10) const; + WTF_EXPORT_PRIVATE unsigned toUIntStrict(bool* ok = 0, int base = 10) const; + WTF_EXPORT_PRIVATE int64_t toInt64Strict(bool* ok = 0, int base = 10) const; + WTF_EXPORT_PRIVATE uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const; + WTF_EXPORT_PRIVATE intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const; + + WTF_EXPORT_PRIVATE int toInt(bool* ok = 0) const; + WTF_EXPORT_PRIVATE unsigned toUInt(bool* ok = 0) const; + int64_t toInt64(bool* ok = 0) const; + WTF_EXPORT_PRIVATE uint64_t toUInt64(bool* ok = 0) const; + WTF_EXPORT_PRIVATE intptr_t toIntPtr(bool* ok = 0) const; + WTF_EXPORT_PRIVATE double toDouble(bool* ok = 0, bool* didReadNumber = 0) const; + WTF_EXPORT_PRIVATE float toFloat(bool* ok = 0, bool* didReadNumber = 0) const; + + bool percentage(int& percentage) const; + + WTF_EXPORT_PRIVATE String isolatedCopy() const; + + // Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that + // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*). + typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA* (String::*UnspecifiedBoolTypeA); + typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB* (String::*UnspecifiedBoolTypeB); + operator UnspecifiedBoolTypeA() const; + operator UnspecifiedBoolTypeB() const; + +#if USE(CF) + String(CFStringRef); + CFStringRef createCFString() const; +#endif + +#ifdef __OBJC__ + String(NSString*); + + // This conversion maps NULL to "", which loses the meaning of NULL, but we + // need this mapping because AppKit crashes when passed nil NSStrings. + operator NSString*() const { if (!m_impl) return @""; return *m_impl; } +#endif + +#if PLATFORM(QT) + String(const QString&); + String(const QStringRef&); + operator QString() const; +#endif + +#if PLATFORM(WX) + WTF_EXPORT_PRIVATE String(const wxString&); + WTF_EXPORT_PRIVATE operator wxString() const; +#endif + +#if PLATFORM(BLACKBERRY) + String(const BlackBerry::WebKit::WebString&); + operator BlackBerry::WebKit::WebString() const; +#endif + + // String::fromUTF8 will return a null string if + // the input data contains invalid UTF-8 sequences. + WTF_EXPORT_PRIVATE static String fromUTF8(const LChar*, size_t); + WTF_EXPORT_PRIVATE static String fromUTF8(const LChar*); + static String fromUTF8(const char* s, size_t length) { return fromUTF8(reinterpret_cast(s), length); }; + static String fromUTF8(const char* s) { return fromUTF8(reinterpret_cast(s)); }; + + // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8. + WTF_EXPORT_PRIVATE static String fromUTF8WithLatin1Fallback(const LChar*, size_t); + static String fromUTF8WithLatin1Fallback(const char* s, size_t length) { return fromUTF8WithLatin1Fallback(reinterpret_cast(s), length); }; + + // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3. + WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0) const + { + if (m_impl) + return m_impl->defaultWritingDirection(hasStrongDirectionality); + if (hasStrongDirectionality) + *hasStrongDirectionality = false; + return WTF::Unicode::LeftToRight; + } + + bool containsOnlyASCII() const; + bool containsOnlyLatin1() const; + bool containsOnlyWhitespace() const { return !m_impl || m_impl->containsOnlyWhitespace(); } + + // Hash table deleted values, which are only constructed and never copied or destroyed. + String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { } + bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); } + +#ifndef NDEBUG + void show() const; +#endif + +private: + RefPtr m_impl; +}; + +#if PLATFORM(QT) +QDataStream& operator<<(QDataStream& stream, const String& str); +QDataStream& operator>>(QDataStream& stream, String& str); +#endif + +inline String& operator+=(String& a, const String& b) { a.append(b); return a; } + +inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); } +inline bool operator==(const String& a, const LChar* b) { return equal(a.impl(), b); } +inline bool operator==(const String& a, const char* b) { return equal(a.impl(), reinterpret_cast(b)); } +inline bool operator==(const LChar* a, const String& b) { return equal(a, b.impl()); } +inline bool operator==(const char* a, const String& b) { return equal(reinterpret_cast(a), b.impl()); } +template +inline bool operator==(const Vector& a, const String& b) { return equal(b.impl(), a.data(), a.size()); } +template +inline bool operator==(const String& a, const Vector& b) { return b == a; } + + +inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); } +inline bool operator!=(const String& a, const LChar* b) { return !equal(a.impl(), b); } +inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), reinterpret_cast(b)); } +inline bool operator!=(const LChar* a, const String& b) { return !equal(a, b.impl()); } +inline bool operator!=(const char* a, const String& b) { return !equal(reinterpret_cast(a), b.impl()); } +template +inline bool operator!=(const Vector& a, const String& b) { return !(a == b); } +template +inline bool operator!=(const String& a, const Vector& b) { return b != a; } + +inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); } +inline bool equalIgnoringCase(const String& a, const LChar* b) { return equalIgnoringCase(a.impl(), b); } +inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), reinterpret_cast(b)); } +inline bool equalIgnoringCase(const LChar* a, const String& b) { return equalIgnoringCase(a, b.impl()); } +inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(reinterpret_cast(a), b.impl()); } + +inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase) +{ + return ignoreCase ? equalIgnoringCase(a, b) : (a == b); +} + +inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); } + +template +inline bool equalIgnoringNullity(const Vector& a, const String& b) { return equalIgnoringNullity(a, b.impl()); } + +inline bool operator!(const String& str) { return str.isNull(); } + +inline void swap(String& a, String& b) { a.swap(b); } + +// Definitions of string operations + +template +String::String(const Vector& vector) + : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size()) : 0) +{ +} + +template<> +inline const LChar* String::getCharacters() const +{ + ASSERT(is8Bit()); + return characters8(); +} + +template<> +inline const UChar* String::getCharacters() const +{ + ASSERT(!is8Bit()); + return characters16(); +} + +inline bool String::containsOnlyLatin1() const +{ + if (isEmpty()) + return true; + + if (is8Bit()) + return true; + + const UChar* characters = characters16(); + UChar ored = 0; + for (size_t i = 0; i < m_impl->length(); ++i) + ored |= characters[i]; + return !(ored & 0xFF00); +} + + +#ifdef __OBJC__ +// This is for situations in WebKit where the long standing behavior has been +// "nil if empty", so we try to maintain longstanding behavior for the sake of +// entrenched clients +inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; } +#endif + +inline bool String::containsOnlyASCII() const +{ + if (isEmpty()) + return true; + + if (is8Bit()) + return charactersAreAllASCII(characters8(), m_impl->length()); + + return charactersAreAllASCII(characters16(), m_impl->length()); +} + +WTF_EXPORT_PRIVATE int codePointCompare(const String&, const String&); + +inline bool codePointCompareLessThan(const String& a, const String& b) +{ + return codePointCompare(a.impl(), b.impl()) < 0; +} + +inline size_t find(const LChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) +{ + while (index < length) { + if (characters[index] == matchCharacter) + return index; + ++index; + } + return notFound; +} + +inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) +{ + while (index < length) { + if (characters[index] == matchCharacter) + return index; + ++index; + } + return notFound; +} + +inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) +{ + while (index < length) { + if (matchFunction(characters[index])) + return index; + ++index; + } + return notFound; +} + +inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) +{ + while (index < length) { + if (matchFunction(characters[index])) + return index; + ++index; + } + return notFound; +} + +inline size_t reverseFind(const LChar* characters, unsigned length, LChar matchCharacter, unsigned index = UINT_MAX) +{ + if (!length) + return notFound; + if (index >= length) + index = length - 1; + while (characters[index] != matchCharacter) { + if (!index--) + return notFound; + } + return index; +} + +inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX) +{ + if (!length) + return notFound; + if (index >= length) + index = length - 1; + while (characters[index] != matchCharacter) { + if (!index--) + return notFound; + } + return index; +} + +inline void append(Vector& vector, const String& string) +{ + vector.append(string.characters(), string.length()); +} + +inline void appendNumber(Vector& vector, unsigned char number) +{ + int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1); + size_t vectorSize = vector.size(); + vector.grow(vectorSize + numberLength); + + switch (numberLength) { + case 3: + vector[vectorSize + 2] = number % 10 + '0'; + number /= 10; + + case 2: + vector[vectorSize + 1] = number % 10 + '0'; + number /= 10; + + case 1: + vector[vectorSize] = number % 10 + '0'; + } +} + +template inline bool isAllSpecialCharacters(const UChar* characters, size_t length) +{ + for (size_t i = 0; i < length; ++i) { + if (!isSpecialCharacter(characters[i])) + return false; + } + return true; +} + +template inline bool String::isAllSpecialCharacters() const +{ + return WTF::isAllSpecialCharacters(characters(), length()); +} + +// StringHash is the default hash for String +template struct DefaultHash; +template<> struct DefaultHash { + typedef StringHash Hash; +}; + +template <> struct VectorTraits : SimpleClassVectorTraits { }; + +// Shared global empty string. +WTF_EXPORT_PRIVATE const String& emptyString(); + +} + +using WTF::CString; +using WTF::String; +using WTF::emptyString; +using WTF::append; +using WTF::appendNumber; +using WTF::charactersAreAllASCII; +using WTF::charactersToIntStrict; +using WTF::charactersToUIntStrict; +using WTF::charactersToInt64Strict; +using WTF::charactersToUInt64Strict; +using WTF::charactersToIntPtrStrict; +using WTF::charactersToInt; +using WTF::charactersToUInt; +using WTF::charactersToInt64; +using WTF::charactersToUInt64; +using WTF::charactersToIntPtr; +using WTF::charactersToDouble; +using WTF::charactersToFloat; +using WTF::equal; +using WTF::equalIgnoringCase; +using WTF::find; +using WTF::isAllSpecialCharacters; +using WTF::isSpaceOrNewline; +using WTF::reverseFind; +using WTF::ShouldRoundDecimalPlaces; + +#include "AtomicString.h" +#endif -- cgit v1.2.1