summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/lua.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay/scanners/lua.rb')
-rw-r--r--lib/coderay/scanners/lua.rb524
1 files changed, 267 insertions, 257 deletions
diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb
index 64763dc..3bee275 100644
--- a/lib/coderay/scanners/lua.rb
+++ b/lib/coderay/scanners/lua.rb
@@ -1,265 +1,275 @@
-# -*- coding: utf-8 -*-
+# encoding: utf-8
-# Scanner for the Lua[http://lua.org] programming lanuage.
-#
-# The language’s complete syntax is defined in
-# {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
-# which is what this scanner tries to conform to.
-class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner
+module CodeRay
+module Scanners
- register_for :lua
- file_extension "lua"
- title "Lua"
-
- # Keywords used in Lua.
- KEYWORDS = %w[and break do else elseif end
- for function goto if in
- local not or repeat return
- then until while
- ]
-
- # Constants set by the Lua core.
- PREDEFINED_CONSTANTS = %w[false true nil]
-
- # The expressions contained in this array are parts of Lua’s `basic'
- # library. Although it’s not entirely necessary to load that library,
- # it is highly recommended and one would have to provide own implementations
- # of some of these expressions if one does not do so. They however aren’t
- # keywords, neither are they constants, but nearly predefined, so they
- # get tagged as `predefined' rather than anything else.
+ # Scanner for the Lua[http://lua.org] programming lanuage.
#
- # This list excludes values of form `_UPPERCASE' because the Lua manual
- # requires such identifiers to be reserved by Lua anyway and they are
- # highlighted directly accordingly, without the need for specific
- # identifiers to be listed here.
- PREDEFINED_EXPRESSIONS = %w[
- assert collectgarbage dofile error getmetatable
- ipairs load loadfile next pairs pcall print
- rawequal rawget rawlen rawset select setmetatable
- tonumber tostring type xpcall
- ]
-
- # Automatic token kind selection for normal words.
- IDENT_KIND = CodeRay::WordList.new(:ident).
- add(KEYWORDS, :keyword).
- add(PREDEFINED_CONSTANTS, :predefined_constant).
- add(PREDEFINED_EXPRESSIONS, :predefined)
-
- protected
-
- # Scanner initialization.
- def setup
- @state = :initial
- @brace_depth = 0
- end
-
- # CodeRay entry hook. Starts parsing.
- def scan_tokens(encoder, options)
- @encoder = encoder
- @options = options
-
- until eos?
- case state
-
- when :initial
- if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]]
- @num_equals = match.count("=") # Number must match for comment end
- @encoder.begin_group(:comment)
- @encoder.text_token(match, :delimiter)
- @state = :long_comment
-
- elsif match = scan(/--.*?$/) # --Lua comment
- @encoder.text_token(match, :comment)
-
- elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]]
- @num_equals = match.count("=") # Number must match for comment end
- @encoder.begin_group(:string)
- @encoder.text_token(match, :delimiter)
- @state = :long_string
-
- elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
- @encoder.text_token(match, :label)
-
- elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
- @encoder.text_token(match, :predefined)
-
- elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
- kind = IDENT_KIND[match]
-
- # Extra highlighting for entities following certain keywords
- if kind == :keyword and match == "function"
- @state = :function_expected
- elsif kind == :keyword and match == "goto"
- @state = :goto_label_expected
- elsif kind == :keyword and match == "local"
- @state = :local_var_expected
+ # The language’s complete syntax is defined in
+ # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+ # which is what this scanner tries to conform to.
+ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner
+
+ register_for :lua
+ file_extension "lua"
+ title "Lua"
+
+ # Keywords used in Lua.
+ KEYWORDS = %w[and break do else elseif end
+ for function goto if in
+ local not or repeat return
+ then until while
+ ]
+
+ # Constants set by the Lua core.
+ PREDEFINED_CONSTANTS = %w[false true nil]
+
+ # The expressions contained in this array are parts of Lua’s `basic'
+ # library. Although it’s not entirely necessary to load that library,
+ # it is highly recommended and one would have to provide own implementations
+ # of some of these expressions if one does not do so. They however aren’t
+ # keywords, neither are they constants, but nearly predefined, so they
+ # get tagged as `predefined' rather than anything else.
+ #
+ # This list excludes values of form `_UPPERCASE' because the Lua manual
+ # requires such identifiers to be reserved by Lua anyway and they are
+ # highlighted directly accordingly, without the need for specific
+ # identifiers to be listed here.
+ PREDEFINED_EXPRESSIONS = %w[
+ assert collectgarbage dofile error getmetatable
+ ipairs load loadfile next pairs pcall print
+ rawequal rawget rawlen rawset select setmetatable
+ tonumber tostring type xpcall
+ ]
+
+ # Automatic token kind selection for normal words.
+ IDENT_KIND = CodeRay::WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_CONSTANTS, :predefined_constant).
+ add(PREDEFINED_EXPRESSIONS, :predefined)
+
+ protected
+
+ # Scanner initialization.
+ def setup
+ @state = :initial
+ @brace_depth = 0
+ end
+
+ # CodeRay entry hook. Starts parsing.
+ def scan_tokens(encoder, options)
+ state = options[:state] || @state
+
+ until eos?
+ case state
+
+ when :initial
+ if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]]
+ @num_equals = match.count("=") # Number must match for comment end
+ encoder.begin_group(:comment)
+ encoder.text_token(match, :delimiter)
+ state = :long_comment
+
+ elsif match = scan(/--.*$/) # --Lua comment
+ encoder.text_token(match, :comment)
+
+ elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]]
+ @num_equals = match.count("=") # Number must match for comment end
+ encoder.begin_group(:string)
+ encoder.text_token(match, :delimiter)
+ state = :long_string
+
+ elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
+ encoder.text_token(match, :label)
+
+ elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
+ encoder.text_token(match, :predefined)
+
+ elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
+ kind = IDENT_KIND[match]
+
+ # Extra highlighting for entities following certain keywords
+ if kind == :keyword and match == "function"
+ state = :function_expected
+ elsif kind == :keyword and match == "goto"
+ state = :goto_label_expected
+ elsif kind == :keyword and match == "local"
+ state = :local_var_expected
+ end
+
+ encoder.text_token(match, kind)
+
+ elsif match = scan(/\{/) # Opening table brace {
+ encoder.begin_group(:map)
+ encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter)
+ @brace_depth += 1
+ state = :map
+
+ elsif match = scan(/\}/) # Closing table brace }
+ if @brace_depth == 1
+ @brace_depth = 0
+ encoder.text_token(match, :delimiter)
+ encoder.end_group(:map)
+ elsif @brace_depth == 0 # Mismatched brace
+ encoder.text_token(match, :error)
+ else
+ @brace_depth -= 1
+ encoder.text_token(match, :inline_delimiter)
+ encoder.end_group(:map)
+ state = :map
+ end
+
+ elsif match = scan(/["']/) # String delimiters " and '
+ encoder.begin_group(:string)
+ encoder.text_token(match, :delimiter)
+ @start_delim = match
+ state = :string
+
+ # ↓Prefix hex number ←|→ decimal number
+ elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+ encoder.text_token(match, :float)
+
+ # ↓Prefix hex number ←|→ decimal number
+ elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+ encoder.text_token(match, :integer)
+
+ elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
+ encoder.text_token(match, :operator)
+
+ elsif match = scan(/\s+/) # Space
+ encoder.text_token(match, :space)
+
+ else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
+ encoder.text_token(getch, :error)
+ end
+
+ # It may be that we’re scanning a full-blown subexpression of a table
+ # (tables can contain full expressions in parts).
+ # If this is the case, return to :map scanning state.
+ state = :map if state == :initial && @brace_depth >= 1
+
+ when :function_expected
+ if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
+ encoder.text_token(match, :operator)
+ state = :initial
+ elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
+ encoder.text_token(match, :ident)
+ elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
+ encoder.text_token(match, :function)
+ state = :initial
+ elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
+ encoder.text_token(match, :space)
+ else
+ encoder.text_token(getch, :error)
+ state = :initial
+ end
+
+ when :goto_label_expected
+ if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+ encoder.text_token(match, :label)
+ state = :initial
+ elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
+ encoder.text_token(match, :space)
+ else
+ encoder.text_token(getch, :error)
+ end
+
+ when :local_var_expected
+ if match = scan(/function/) # local function ...
+ encoder.text_token(match, :keyword)
+ state = :function_expected
+ elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+ encoder.text_token(match, :local_variable)
+ elsif match = scan(/,/)
+ encoder.text_token(match, :operator)
+ elsif match = scan(/\=/)
+ encoder.text_token(match, :operator)
+ # After encountering the equal sign, arbitrary expressions are
+ # allowed again, so just return to the main state for further
+ # parsing.
+ state = :initial
+ elsif match = scan(/\n/)
+ encoder.text_token(match, :space)
+ state = :initial
+ elsif match = scan(/\s+/)
+ encoder.text_token(match, :space)
+ else
+ encoder.text_token(getch, :error)
+ end
+
+ when :long_comment
+ if match = scan(/.*?(?=\]={#@num_equals}\])/m)
+ encoder.text_token(match, :content)
+
+ delim = scan(/\]={#@num_equals}\]/)
+ encoder.text_token(delim, :delimiter)
+ else # No terminator found till EOF
+ encoder.text_token(rest, :error)
+ terminate
+ end
+ encoder.end_group(:comment)
+ state = :initial
+
+ when :long_string
+ if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences
+ encoder.text_token(match, :content)
+
+ delim = scan(/\]={#@num_equals}\]/)
+ encoder.text_token(delim, :delimiter)
+ else # No terminator found till EOF
+ encoder.text_token(rest, :error)
+ terminate
+ end
+ encoder.end_group(:string)
+ state = :initial
+
+ when :string
+ if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+ encoder.text_token(match, :content)
+ elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
+ encoder.text_token(match, :char)
+ elsif match = scan(Regexp.compile(@start_delim))
+ encoder.text_token(match, :delimiter)
+ encoder.end_group(:string)
+ state = :initial
+ elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
+ encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+ encoder.end_group(:string)
+ state = :initial
+ else
+ encoder.text_token(getch, :error)
+ end
+
+ when :map
+ if match = scan(/[,;]/)
+ encoder.text_token(match, :operator)
+ elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
+ encoder.text_token(match, :key)
+ encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
+ encoder.text_token(scan(/\=/), :operator)
+ state = :initial
+ elsif match = scan(/\s+/m)
+ encoder.text_token(match, :space)
+ else
+ # Note this clause doesn’t advance the scan pointer, it’s a kind of
+ # "retry with other options" (the :initial state then of course
+ # advances the pointer).
+ state = :initial
+ end
+ else
+ raise
+ end
+
end
-
- @encoder.text_token(match, kind)
-
- elsif match = scan(/\{/) # Opening table brace {
- @encoder.begin_group(:map)
- @encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter)
- @brace_depth += 1
- @state = :map
-
- elsif match = scan(/\}/) # Closing table brace }
- if @brace_depth == 1
- @brace_depth = 0
- @encoder.text_token(match, :delimiter)
- elsif @brace_depth == 0 # Mismatched brace
- @encoder.text_token(match, :error)
- else
- @brace_depth -= 1
- @encoder.text_token(match, :inline_delimiter)
- @state = :map
+
+ if options[:keep_state]
+ @state = state
end
- @encoder.end_group(:map)
-
- elsif match = scan(/["']/) # String delimiters " and '
- @encoder.begin_group(:string)
- @encoder.text_token(match, :delimiter)
- @start_delim = match
- @state = :string
-
- # ↓Prefix hex number ←|→ decimal number
- elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
- @encoder.text_token(match, :float)
-
- # ↓Prefix hex number ←|→ decimal number
- elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
- @encoder.text_token(match, :integer)
-
- elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
- @encoder.text_token(match, :operator)
-
- elsif match = scan(/\s+/) # Space
- @encoder.text_token(match, :space)
-
- else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
- @encoder.text_token(getch, :error)
- end
-
- # It may be that we’re scanning a full-blown subexpression of a table
- # (tables can contain full expressions in parts).
- # If this is the case, return to :map scanning state.
- @state = :map if @state == :initial && @brace_depth >= 1
-
- when :function_expected
- if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
- @encoder.text_token(match, :operator)
- @state = :initial
- elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
- @encoder.text_token(match, :ident)
- elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
- @encoder.text_token(match, :function)
- @state = :initial
- elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
- @encoder.text_token(match, :space)
- else
- @encoder.text_token(getch, :error)
- @state = :initial
- end
-
- when :goto_label_expected
- if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
- @encoder.text_token(match, :label)
- @state = :initial
- elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
- @encoder.text_token(match, :space)
- else
- @encoder.text_token(getch, :error)
- end
-
- when :local_var_expected
- if match = scan(/function/) # local function ...
- @encoder.text_token(match, :keyword)
- @state = :function_expected
- elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
- @encoder.text_token(match, :local_variable)
- elsif match = scan(/,/)
- @encoder.text_token(match, :operator)
- elsif match = scan(/\=/)
- @encoder.text_token(match, :operator)
- # After encountering the equal sign, arbitrary expressions are
- # allowed again, so just return to the main state for further
- # parsing.
- @state = :initial
- elsif match = scan(/\n/)
- @encoder.text_token(match, :space)
- @state = :initial
- elsif match = scan(/\s+/)
- @encoder.text_token(match, :space)
- else
- @encoder.text_token(getch, :error)
- end
-
- when :long_comment
- if match = scan(/.*?(?=\]={#@num_equals}\])/m)
- @encoder.text_token(match, :content)
-
- delim = scan(/\]={#@num_equals}\]/)
- @encoder.text_token(delim, :delimiter)
- else # No terminator found till EOF
- @encoder.text_token(rest, :error)
- terminate
- end
- @encoder.end_group(:comment)
- @state = :initial
-
- when :long_string
- if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences
- @encoder.text_token(match, :content)
-
- delim = scan(/\]={#@num_equals}\]/)
- @encoder.text_token(delim, :delimiter)
- else # No terminator found till EOF
- @encoder.text_token(rest, :error)
- terminate
- end
- @encoder.end_group(:string)
- @state = :initial
-
- when :string
- if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
- @encoder.text_token(match, :content)
- elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
- @encoder.text_token(match, :char)
- elsif match = scan(Regexp.compile(@start_delim))
- @encoder.text_token(match, :delimiter)
- @encoder.end_group(:string)
- @state = :initial
- elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
- @encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
- @encoder.end_group(:string)
- @state = :initial
- else
- @encoder.text_token(getch, :error)
- end
-
- when :map
- if match = scan(/[,;]/)
- @encoder.text_token(match, :operator)
- elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
- @encoder.text_token(match, :key)
- @encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
- @encoder.text_token(scan(/\=/), :operator)
- @state = :initial
- elsif match = scan(/\s+/m)
- @encoder.text_token(match, :space)
- else
- # Note this clause doesn’t advance the scan pointer, it’s a kind of
- # "retry with other options" (the :initial state then of course
- # advances the pointer).
- @state = :initial
+
+ encoder
end
- else
- raise
+
end
-
- end
-
- @encoder
- end
-
+
+end
end