summaryrefslogtreecommitdiff
path: root/Source/JavaScriptCore/KeywordLookupGenerator.py
diff options
context:
space:
mode:
Diffstat (limited to 'Source/JavaScriptCore/KeywordLookupGenerator.py')
-rw-r--r--Source/JavaScriptCore/KeywordLookupGenerator.py310
1 files changed, 310 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/KeywordLookupGenerator.py b/Source/JavaScriptCore/KeywordLookupGenerator.py
new file mode 100644
index 000000000..d13daba61
--- /dev/null
+++ b/Source/JavaScriptCore/KeywordLookupGenerator.py
@@ -0,0 +1,310 @@
+# Copyright (C) 2011 Apple Inc. All rights reserved.
+# Copyright (C) 2012 Sony Network Entertainment. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import string
+import operator
+
+keywordsText = open(sys.argv[1]).read()
+
+# A second argument signifies that the output
+# should be redirected to a file
+redirect_to_file = len(sys.argv) > 2
+
+# Change stdout to point to the file if requested
+if redirect_to_file:
+ file_output = open(sys.argv[-1], "w")
+ sys.stdout = file_output
+
+# Observed weights of the most common keywords, rounded to 2.s.d
+keyWordWeights = {
+ "catch": 0.01,
+ "try": 0.01,
+ "while": 0.01,
+ "case": 0.01,
+ "break": 0.01,
+ "new": 0.01,
+ "in": 0.01,
+ "typeof": 0.02,
+ "true": 0.02,
+ "false": 0.02,
+ "for": 0.03,
+ "null": 0.03,
+ "else": 0.03,
+ "return": 0.13,
+ "var": 0.13,
+ "if": 0.16,
+ "function": 0.18,
+ "this": 0.18,
+}
+
+
+def allWhitespace(str):
+ for c in str:
+ if not(c in string.whitespace):
+ return False
+ return True
+
+
+def parseKeywords(keywordsText):
+
+ if sys.platform == "cygwin":
+ keywordsText = keywordsText.replace("\r\n", "\n")
+
+ lines = keywordsText.split("\n")
+ lines = [line.split("#")[0] for line in lines]
+ lines = [line for line in lines if (not allWhitespace(line))]
+ name = lines[0].split()
+ terminator = lines[-1]
+
+ if not name[0] == "@begin":
+ raise Exception("expected description beginning with @begin")
+ if not terminator == "@end":
+ raise Exception("expected description ending with @end")
+
+ lines = lines[1:-1] # trim off the old heading
+ return [line.split() for line in lines]
+
+
+def makePadding(size):
+ str = ""
+ for i in range(size):
+ str = str + " "
+ return str
+
+
+class Trie:
+ def __init__(self, prefix):
+ self.prefix = prefix
+ self.keys = {}
+ self.value = None
+
+ def insert(self, key, value):
+ if len(key) == 0:
+ self.value = value
+ return
+ if not (key[0] in self.keys):
+ self.keys[key[0]] = Trie(key[0])
+ self.keys[key[0]].insert(key[1:], value)
+
+ def coalesce(self):
+ keys = {}
+ for k, v in self.keys.items():
+ t = v.coalesce()
+ keys[t.prefix] = t
+ self.keys = keys
+ if self.value != None:
+ return self
+ if len(self.keys) != 1:
+ return self
+ # Python 3: for() loop for compatibility. Use next() when Python 2.6 is the baseline.
+ for (prefix, suffix) in self.keys.items():
+ res = Trie(self.prefix + prefix)
+ res.value = suffix.value
+ res.keys = suffix.keys
+ return res
+
+ def fillOut(self, prefix=""):
+ self.fullPrefix = prefix + self.prefix
+ weight = 0
+ if self.fullPrefix in keyWordWeights:
+ weight = weight + keyWordWeights[self.fullPrefix]
+ self.selfWeight = weight
+ for trie in self.keys.values():
+ trie.fillOut(self.fullPrefix)
+ weight = weight + trie.weight
+ self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)]
+ self.weight = weight
+
+ def printSubTreeAsC(self, typeName, indent):
+ str = makePadding(indent)
+
+ if self.value != None:
+ print(str + "if (!isIdentPartIncludingEscape(code+%d, m_codeEnd)) {" % (len(self.fullPrefix)))
+ print(str + " internalShift<%d>();" % len(self.fullPrefix))
+ print(str + " if (shouldCreateIdentifier)")
+ print(str + (" data->ident = &m_vm->propertyNames->%sKeyword;" % self.fullPrefix))
+ print(str + " return " + self.value + ";")
+ print(str + "}")
+ rootIndex = len(self.fullPrefix)
+ itemCount = 0
+ for k, trie in self.keys:
+ baseIndex = rootIndex
+ if (baseIndex > 0) and (len(k) == 3):
+ baseIndex = baseIndex - 1
+ k = trie.fullPrefix[baseIndex] + k
+ test = [("'%s'" % c) for c in k]
+ if len(test) == 1:
+ comparison = "code[%d] == %s" % (baseIndex, test[0])
+ else:
+ base = "code"
+ if baseIndex > 0:
+ base = "code + %d" % baseIndex
+ comparison = ("COMPARE_%d%sS(%s, " % (len(test), typeName, base)) + ", ".join(test) + ")"
+ if itemCount == 0:
+ print(str + "if (" + comparison + ") {")
+ else:
+ print(str + "} else if (" + comparison + ") {")
+
+ trie.printSubTreeAsC(typeName, indent + 4)
+ itemCount = itemCount + 1
+
+ if itemCount == len(self.keys):
+ print(str + "}")
+
+ def maxLength(self):
+ max = len(self.fullPrefix)
+ for (_, trie) in self.keys:
+ l = trie.maxLength()
+ if l > max:
+ max = l
+ return max
+
+ def printAsC(self):
+ print("namespace JSC {")
+ print("")
+ print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd);")
+ print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd);")
+ # max length + 1 so we don't need to do any bounds checking at all
+ print("static const int maxTokenLength = %d;" % (self.maxLength() + 1))
+ print("")
+ print("template <>")
+ print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseKeyword(JSTokenData* data)")
+ print("{")
+ print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);")
+ print("")
+ print(" const UChar* code = m_code;")
+ self.printSubTreeAsC("UCHAR", 4)
+ print(" return IDENT;")
+ print("}")
+ print("")
+ print("template <>")
+ print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseKeyword(JSTokenData* data)")
+ print("{")
+ print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);")
+ print("")
+ print(" const LChar* code = m_code;")
+ self.printSubTreeAsC("CHAR", 4)
+ print(" return IDENT;")
+ print("}")
+ print("")
+ print("} // namespace JSC")
+
+keywords = parseKeywords(keywordsText)
+trie = Trie("")
+for k, v in keywords:
+ trie.insert(k, v)
+trie.coalesce()
+trie.fillOut()
+print("// This file was generated by KeywordLookupGenerator.py. Do not edit.")
+print("""
+#if CPU(NEEDS_ALIGNED_ACCESS)
+
+#define COMPARE_2CHARS(address, char1, char2) \\
+ (((address)[0] == char1) && ((address)[1] == char2))
+#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
+ (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4))
+#define COMPARE_2UCHARS(address, char1, char2) \\
+ (((address)[0] == char1) && ((address)[1] == char2))
+#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
+ (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4))
+
+#else // CPU(NEEDS_ALIGNED_ACCESS)
+
+#if CPU(BIG_ENDIAN)
+
+#define CHARPAIR_TOUINT16(a, b) \\
+ ((((uint16_t)(a)) << 8) + (uint16_t)(b))
+#define CHARQUAD_TOUINT32(a, b, c, d) \\
+ ((((uint32_t)(CHARPAIR_TOUINT16(a, b))) << 16) + CHARPAIR_TOUINT16(c, d))
+#define UCHARPAIR_TOUINT32(a, b) \\
+ ((((uint32_t)(a)) << 16) + (uint32_t)(b))
+#define UCHARQUAD_TOUINT64(a, b, c, d) \\
+ ((((uint64_t)(UCHARQUAD_TOUINT64(a, b))) << 32) + UCHARPAIR_TOUINT32(c, d))
+
+#else // CPU(BIG_ENDIAN)
+
+#define CHARPAIR_TOUINT16(a, b) \\
+ ((((uint16_t)(b)) << 8) + (uint16_t)(a))
+#define CHARQUAD_TOUINT32(a, b, c, d) \\
+ ((((uint32_t)(CHARPAIR_TOUINT16(c, d))) << 16) + CHARPAIR_TOUINT16(a, b))
+#define UCHARPAIR_TOUINT32(a, b) \\
+ ((((uint32_t)(b)) << 16) + (uint32_t)(a))
+#define UCHARQUAD_TOUINT64(a, b, c, d) \\
+ ((((uint64_t)(UCHARPAIR_TOUINT32(c, d))) << 32) + UCHARPAIR_TOUINT32(a, b))
+
+#endif // CPU(BIG_ENDIAN)
+
+
+#define COMPARE_2CHARS(address, char1, char2) \\
+ ((reinterpret_cast<const uint16_t*>(address))[0] == CHARPAIR_TOUINT16(char1, char2))
+#define COMPARE_2UCHARS(address, char1, char2) \\
+ ((reinterpret_cast<const uint32_t*>(address))[0] == UCHARPAIR_TOUINT32(char1, char2))
+
+#if CPU(X86_64)
+
+#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
+ ((reinterpret_cast<const uint32_t*>(address))[0] == CHARQUAD_TOUINT32(char1, char2, char3, char4))
+#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
+ ((reinterpret_cast<const uint64_t*>(address))[0] == UCHARQUAD_TOUINT64(char1, char2, char3, char4))
+
+#else // CPU(X86_64)
+
+#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
+ (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4))
+#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
+ (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4))
+
+#endif // CPU(X86_64)
+
+#endif // CPU(NEEDS_ALIGNED_ACCESS)
+
+#define COMPARE_3CHARS(address, char1, char2, char3) \\
+ (COMPARE_2CHARS(address, char1, char2) && ((address)[2] == (char3)))
+#define COMPARE_3UCHARS(address, char1, char2, char3) \\
+ (COMPARE_2UCHARS(address, char1, char2) && ((address)[2] == (char3)))
+#define COMPARE_5CHARS(address, char1, char2, char3, char4, char5) \\
+ (COMPARE_4CHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
+#define COMPARE_5UCHARS(address, char1, char2, char3, char4, char5) \\
+ (COMPARE_4UCHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
+#define COMPARE_6CHARS(address, char1, char2, char3, char4, char5, char6) \\
+ (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_2CHARS(address + 4, char5, char6))
+#define COMPARE_6UCHARS(address, char1, char2, char3, char4, char5, char6) \\
+ (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_2UCHARS(address + 4, char5, char6))
+#define COMPARE_7CHARS(address, char1, char2, char3, char4, char5, char6, char7) \\
+ (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 3, char4, char5, char6, char7))
+#define COMPARE_7UCHARS(address, char1, char2, char3, char4, char5, char6, char7) \\
+ (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 3, char4, char5, char6, char7))
+#define COMPARE_8CHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\
+ (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 4, char5, char6, char7, char8))
+#define COMPARE_8UCHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\
+ (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 4, char5, char6, char7, char8))
+""")
+
+trie.printAsC()
+
+# Close the redirected file if requested
+if (redirect_to_file):
+ file_output.close()
+ sys.stdout = sys.__stdout__