// Copyright 2012 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef V8_PREPARSER_H #define V8_PREPARSER_H #include "hashmap.h" #include "token.h" #include "scanner.h" namespace v8 { namespace internal { // Common base class shared between parser and pre-parser. class ParserBase { public: ParserBase(Scanner* scanner, uintptr_t stack_limit) : scanner_(scanner), stack_limit_(stack_limit), stack_overflow_(false), allow_lazy_(false), allow_natives_syntax_(false), allow_generators_(false), allow_for_of_(false) { } // TODO(mstarzinger): Only virtual until message reporting has been unified. virtual ~ParserBase() { } // Getters that indicate whether certain syntactical constructs are // allowed to be parsed by this instance of the parser. bool allow_lazy() const { return allow_lazy_; } bool allow_natives_syntax() const { return allow_natives_syntax_; } bool allow_generators() const { return allow_generators_; } bool allow_for_of() const { return allow_for_of_; } bool allow_modules() const { return scanner()->HarmonyModules(); } bool allow_harmony_scoping() const { return scanner()->HarmonyScoping(); } bool allow_harmony_numeric_literals() const { return scanner()->HarmonyNumericLiterals(); } // Setters that determine whether certain syntactical constructs are // allowed to be parsed by this instance of the parser. void set_allow_lazy(bool allow) { allow_lazy_ = allow; } void set_allow_natives_syntax(bool allow) { allow_natives_syntax_ = allow; } void set_allow_generators(bool allow) { allow_generators_ = allow; } void set_allow_for_of(bool allow) { allow_for_of_ = allow; } void set_allow_modules(bool allow) { scanner()->SetHarmonyModules(allow); } void set_allow_harmony_scoping(bool allow) { scanner()->SetHarmonyScoping(allow); } void set_allow_harmony_numeric_literals(bool allow) { scanner()->SetHarmonyNumericLiterals(allow); } protected: Scanner* scanner() const { return scanner_; } int position() { return scanner_->location().beg_pos; } int peek_position() { return scanner_->peek_location().beg_pos; } bool stack_overflow() const { return stack_overflow_; } void set_stack_overflow() { stack_overflow_ = true; } INLINE(Token::Value peek()) { if (stack_overflow_) return Token::ILLEGAL; return scanner()->peek(); } INLINE(Token::Value Next()) { if (stack_overflow_) return Token::ILLEGAL; { int marker; if (reinterpret_cast(&marker) < stack_limit_) { // Any further calls to Next or peek will return the illegal token. // The current call must return the next token, which might already // have been peek'ed. stack_overflow_ = true; } } return scanner()->Next(); } void Consume(Token::Value token) { Token::Value next = Next(); USE(next); USE(token); ASSERT(next == token); } bool Check(Token::Value token) { Token::Value next = peek(); if (next == token) { Consume(next); return true; } return false; } void Expect(Token::Value token, bool* ok) { Token::Value next = Next(); if (next != token) { ReportUnexpectedToken(next); *ok = false; } } bool peek_any_identifier(); void ExpectSemicolon(bool* ok); bool CheckContextualKeyword(Vector keyword); void ExpectContextualKeyword(Vector keyword, bool* ok); // Strict mode octal literal validation. void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok); // Determine precedence of given token. static int Precedence(Token::Value token, bool accept_IN); // Report syntax errors. virtual void ReportUnexpectedToken(Token::Value token) = 0; virtual void ReportMessageAt(Scanner::Location loc, const char* type) = 0; // Used to detect duplicates in object literals. Each of the values // kGetterProperty, kSetterProperty and kValueProperty represents // a type of object literal property. When parsing a property, its // type value is stored in the DuplicateFinder for the property name. // Values are chosen so that having intersection bits means the there is // an incompatibility. // I.e., you can add a getter to a property that already has a setter, since // kGetterProperty and kSetterProperty doesn't intersect, but not if it // already has a getter or a value. Adding the getter to an existing // setter will store the value (kGetterProperty | kSetterProperty), which // is incompatible with adding any further properties. enum PropertyKind { kNone = 0, // Bit patterns representing different object literal property types. kGetterProperty = 1, kSetterProperty = 2, kValueProperty = 7, // Helper constants. kValueFlag = 4 }; // Validation per ECMA 262 - 11.1.5 "Object Initialiser". class ObjectLiteralChecker { public: ObjectLiteralChecker(ParserBase* parser, LanguageMode mode) : parser_(parser), finder_(scanner()->unicode_cache()), language_mode_(mode) { } void CheckProperty(Token::Value property, PropertyKind type, bool* ok); private: ParserBase* parser() const { return parser_; } Scanner* scanner() const { return parser_->scanner(); } // Checks the type of conflict based on values coming from PropertyType. bool HasConflict(PropertyKind type1, PropertyKind type2) { return (type1 & type2) != 0; } bool IsDataDataConflict(PropertyKind type1, PropertyKind type2) { return ((type1 & type2) & kValueFlag) != 0; } bool IsDataAccessorConflict(PropertyKind type1, PropertyKind type2) { return ((type1 ^ type2) & kValueFlag) != 0; } bool IsAccessorAccessorConflict(PropertyKind type1, PropertyKind type2) { return ((type1 | type2) & kValueFlag) == 0; } ParserBase* parser_; DuplicateFinder finder_; LanguageMode language_mode_; }; private: Scanner* scanner_; uintptr_t stack_limit_; bool stack_overflow_; bool allow_lazy_; bool allow_natives_syntax_; bool allow_generators_; bool allow_for_of_; }; // Preparsing checks a JavaScript program and emits preparse-data that helps // a later parsing to be faster. // See preparse-data-format.h for the data format. // The PreParser checks that the syntax follows the grammar for JavaScript, // and collects some information about the program along the way. // The grammar check is only performed in order to understand the program // sufficiently to deduce some information about it, that can be used // to speed up later parsing. Finding errors is not the goal of pre-parsing, // rather it is to speed up properly written and correct programs. // That means that contextual checks (like a label being declared where // it is used) are generally omitted. class PreParser : public ParserBase { public: enum PreParseResult { kPreParseStackOverflow, kPreParseSuccess }; PreParser(Scanner* scanner, ParserRecorder* log, uintptr_t stack_limit) : ParserBase(scanner, stack_limit), log_(log), scope_(NULL), strict_mode_violation_location_(Scanner::Location::invalid()), strict_mode_violation_type_(NULL), parenthesized_function_(false) { } ~PreParser() {} // Pre-parse the program from the character stream; returns true on // success (even if parsing failed, the pre-parse data successfully // captured the syntax error), and false if a stack-overflow happened // during parsing. PreParseResult PreParseProgram() { Scope top_scope(&scope_, kTopLevelScope); bool ok = true; int start_position = scanner()->peek_location().beg_pos; ParseSourceElements(Token::EOS, &ok); if (stack_overflow()) return kPreParseStackOverflow; if (!ok) { ReportUnexpectedToken(scanner()->current_token()); } else if (!scope_->is_classic_mode()) { CheckOctalLiteral(start_position, scanner()->location().end_pos, &ok); } return kPreParseSuccess; } // Parses a single function literal, from the opening parentheses before // parameters to the closing brace after the body. // Returns a FunctionEntry describing the body of the function in enough // detail that it can be lazily compiled. // The scanner is expected to have matched the "function" or "function*" // keyword and parameters, and have consumed the initial '{'. // At return, unless an error occurred, the scanner is positioned before the // the final '}'. PreParseResult PreParseLazyFunction(LanguageMode mode, bool is_generator, ParserRecorder* log); private: // These types form an algebra over syntactic categories that is just // rich enough to let us recognize and propagate the constructs that // are either being counted in the preparser data, or is important // to throw the correct syntax error exceptions. enum ScopeType { kTopLevelScope, kFunctionScope }; enum VariableDeclarationContext { kSourceElement, kStatement, kForStatement }; // If a list of variable declarations includes any initializers. enum VariableDeclarationProperties { kHasInitializers, kHasNoInitializers }; class Expression; class Identifier { public: static Identifier Default() { return Identifier(kUnknownIdentifier); } static Identifier Eval() { return Identifier(kEvalIdentifier); } static Identifier Arguments() { return Identifier(kArgumentsIdentifier); } static Identifier FutureReserved() { return Identifier(kFutureReservedIdentifier); } static Identifier FutureStrictReserved() { return Identifier(kFutureStrictReservedIdentifier); } static Identifier Yield() { return Identifier(kYieldIdentifier); } bool IsEval() { return type_ == kEvalIdentifier; } bool IsArguments() { return type_ == kArgumentsIdentifier; } bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; } bool IsYield() { return type_ == kYieldIdentifier; } bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; } bool IsFutureStrictReserved() { return type_ == kFutureStrictReservedIdentifier; } bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; } private: enum Type { kUnknownIdentifier, kFutureReservedIdentifier, kFutureStrictReservedIdentifier, kYieldIdentifier, kEvalIdentifier, kArgumentsIdentifier }; explicit Identifier(Type type) : type_(type) { } Type type_; friend class Expression; }; // Bits 0 and 1 are used to identify the type of expression: // If bit 0 is set, it's an identifier. // if bit 1 is set, it's a string literal. // If neither is set, it's no particular type, and both set isn't // use yet. // Bit 2 is used to mark the expression as being parenthesized, // so "(foo)" isn't recognized as a pure identifier (and possible label). class Expression { public: static Expression Default() { return Expression(kUnknownExpression); } static Expression FromIdentifier(Identifier id) { return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift)); } static Expression StringLiteral() { return Expression(kUnknownStringLiteral); } static Expression UseStrictStringLiteral() { return Expression(kUseStrictString); } static Expression This() { return Expression(kThisExpression); } static Expression ThisProperty() { return Expression(kThisPropertyExpression); } static Expression StrictFunction() { return Expression(kStrictFunctionExpression); } bool IsIdentifier() { return (code_ & kIdentifierFlag) != 0; } // Only works corretly if it is actually an identifier expression. PreParser::Identifier AsIdentifier() { return PreParser::Identifier( static_cast(code_ >> kIdentifierShift)); } bool IsParenthesized() { // If bit 0 or 1 is set, we interpret bit 2 as meaning parenthesized. return (code_ & 7) > 4; } bool IsRawIdentifier() { return !IsParenthesized() && IsIdentifier(); } bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; } bool IsRawStringLiteral() { return !IsParenthesized() && IsStringLiteral(); } bool IsUseStrictLiteral() { return (code_ & kStringLiteralMask) == kUseStrictString; } bool IsThis() { return code_ == kThisExpression; } bool IsThisProperty() { return code_ == kThisPropertyExpression; } bool IsStrictFunction() { return code_ == kStrictFunctionExpression; } Expression Parenthesize() { int type = code_ & 3; if (type != 0) { // Identifiers and string literals can be parenthesized. // They no longer work as labels or directive prologues, // but are still recognized in other contexts. return Expression(code_ | kParenthesizedExpressionFlag); } // For other types of expressions, it's not important to remember // the parentheses. return *this; } private: // First two/three bits are used as flags. // Bit 0 and 1 represent identifiers or strings literals, and are // mutually exclusive, but can both be absent. // If bit 0 or 1 are set, bit 2 marks that the expression has // been wrapped in parentheses (a string literal can no longer // be a directive prologue, and an identifier can no longer be // a label. enum { kUnknownExpression = 0, // Identifiers kIdentifierFlag = 1, // Used to detect labels. kIdentifierShift = 3, kStringLiteralFlag = 2, // Used to detect directive prologue. kUnknownStringLiteral = kStringLiteralFlag, kUseStrictString = kStringLiteralFlag | 8, kStringLiteralMask = kUseStrictString, // Only if identifier or string literal. kParenthesizedExpressionFlag = 4, // Below here applies if neither identifier nor string literal. kThisExpression = 4, kThisPropertyExpression = 8, kStrictFunctionExpression = 12 }; explicit Expression(int expression_code) : code_(expression_code) { } int code_; }; class Statement { public: static Statement Default() { return Statement(kUnknownStatement); } static Statement FunctionDeclaration() { return Statement(kFunctionDeclaration); } // Creates expression statement from expression. // Preserves being an unparenthesized string literal, possibly // "use strict". static Statement ExpressionStatement(Expression expression) { if (!expression.IsParenthesized()) { if (expression.IsUseStrictLiteral()) { return Statement(kUseStrictExpressionStatement); } if (expression.IsStringLiteral()) { return Statement(kStringLiteralExpressionStatement); } } return Default(); } bool IsStringLiteral() { return code_ == kStringLiteralExpressionStatement; } bool IsUseStrictLiteral() { return code_ == kUseStrictExpressionStatement; } bool IsFunctionDeclaration() { return code_ == kFunctionDeclaration; } private: enum Type { kUnknownStatement, kStringLiteralExpressionStatement, kUseStrictExpressionStatement, kFunctionDeclaration }; explicit Statement(Type code) : code_(code) {} Type code_; }; enum SourceElements { kUnknownSourceElements }; typedef int Arguments; class Scope { public: Scope(Scope** variable, ScopeType type) : variable_(variable), prev_(*variable), type_(type), materialized_literal_count_(0), expected_properties_(0), with_nesting_count_(0), language_mode_( (prev_ != NULL) ? prev_->language_mode() : CLASSIC_MODE), is_generator_(false) { *variable = this; } ~Scope() { *variable_ = prev_; } void NextMaterializedLiteralIndex() { materialized_literal_count_++; } void AddProperty() { expected_properties_++; } ScopeType type() { return type_; } int expected_properties() { return expected_properties_; } int materialized_literal_count() { return materialized_literal_count_; } bool IsInsideWith() { return with_nesting_count_ != 0; } bool is_generator() { return is_generator_; } void set_is_generator(bool is_generator) { is_generator_ = is_generator; } bool is_classic_mode() { return language_mode_ == CLASSIC_MODE; } LanguageMode language_mode() { return language_mode_; } void set_language_mode(LanguageMode language_mode) { language_mode_ = language_mode; } class InsideWith { public: explicit InsideWith(Scope* scope) : scope_(scope) { scope->with_nesting_count_++; } ~InsideWith() { scope_->with_nesting_count_--; } private: Scope* scope_; DISALLOW_COPY_AND_ASSIGN(InsideWith); }; private: Scope** const variable_; Scope* const prev_; const ScopeType type_; int materialized_literal_count_; int expected_properties_; int with_nesting_count_; LanguageMode language_mode_; bool is_generator_; }; // Report syntax error void ReportUnexpectedToken(Token::Value token); void ReportMessageAt(Scanner::Location location, const char* type) { ReportMessageAt(location, type, NULL); } void ReportMessageAt(Scanner::Location location, const char* type, const char* name_opt) { log_->LogMessage(location.beg_pos, location.end_pos, type, name_opt); } void ReportMessageAt(int start_pos, int end_pos, const char* type, const char* name_opt) { log_->LogMessage(start_pos, end_pos, type, name_opt); } // All ParseXXX functions take as the last argument an *ok parameter // which is set to false if parsing failed; it is unchanged otherwise. // By making the 'exception handling' explicit, we are forced to check // for failure at the call sites. Statement ParseSourceElement(bool* ok); SourceElements ParseSourceElements(int end_token, bool* ok); Statement ParseStatement(bool* ok); Statement ParseFunctionDeclaration(bool* ok); Statement ParseBlock(bool* ok); Statement ParseVariableStatement(VariableDeclarationContext var_context, bool* ok); Statement ParseVariableDeclarations(VariableDeclarationContext var_context, VariableDeclarationProperties* decl_props, int* num_decl, bool* ok); Statement ParseExpressionOrLabelledStatement(bool* ok); Statement ParseIfStatement(bool* ok); Statement ParseContinueStatement(bool* ok); Statement ParseBreakStatement(bool* ok); Statement ParseReturnStatement(bool* ok); Statement ParseWithStatement(bool* ok); Statement ParseSwitchStatement(bool* ok); Statement ParseDoWhileStatement(bool* ok); Statement ParseWhileStatement(bool* ok); Statement ParseForStatement(bool* ok); Statement ParseThrowStatement(bool* ok); Statement ParseTryStatement(bool* ok); Statement ParseDebuggerStatement(bool* ok); Expression ParseExpression(bool accept_IN, bool* ok); Expression ParseAssignmentExpression(bool accept_IN, bool* ok); Expression ParseYieldExpression(bool* ok); Expression ParseConditionalExpression(bool accept_IN, bool* ok); Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok); Expression ParseUnaryExpression(bool* ok); Expression ParsePostfixExpression(bool* ok); Expression ParseLeftHandSideExpression(bool* ok); Expression ParseNewExpression(bool* ok); Expression ParseMemberExpression(bool* ok); Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok); Expression ParsePrimaryExpression(bool* ok); Expression ParseArrayLiteral(bool* ok); Expression ParseObjectLiteral(bool* ok); Expression ParseRegExpLiteral(bool seen_equal, bool* ok); Expression ParseV8Intrinsic(bool* ok); Arguments ParseArguments(bool* ok); Expression ParseFunctionLiteral(bool is_generator, bool* ok); void ParseLazyFunctionLiteralBody(bool* ok); Identifier ParseIdentifier(bool* ok); Identifier ParseIdentifierName(bool* ok); Identifier ParseIdentifierNameOrGetOrSet(bool* is_get, bool* is_set, bool* ok); // Logs the currently parsed literal as a symbol in the preparser data. void LogSymbol(); // Log the currently parsed identifier. Identifier GetIdentifierSymbol(); // Log the currently parsed string literal. Expression GetStringSymbol(); void set_language_mode(LanguageMode language_mode) { scope_->set_language_mode(language_mode); } bool is_classic_mode() { return scope_->language_mode() == CLASSIC_MODE; } bool is_extended_mode() { return scope_->language_mode() == EXTENDED_MODE; } LanguageMode language_mode() { return scope_->language_mode(); } bool CheckInOrOf(bool accept_OF); void SetStrictModeViolation(Scanner::Location, const char* type, bool* ok); void CheckDelayedStrictModeViolation(int beg_pos, int end_pos, bool* ok); void StrictModeIdentifierViolation(Scanner::Location, const char* eval_args_type, Identifier identifier, bool* ok); ParserRecorder* log_; Scope* scope_; Scanner::Location strict_mode_violation_location_; const char* strict_mode_violation_type_; bool parenthesized_function_; }; } } // v8::internal #endif // V8_PREPARSER_H