From 501df7613c67bcd743eaa00071f0dbb179607394 Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 22:10:42 +0200 Subject: Lua scanner for CodeRay. Meta-commit. This commit is a super-commit containing all the subcommits for implementing the Lua scanner. --- lib/coderay/scanners/lua.rb | 267 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 lib/coderay/scanners/lua.rb (limited to 'lib/coderay/scanners/lua.rb') diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb new file mode 100644 index 0000000..e7706fc --- /dev/null +++ b/lib/coderay/scanners/lua.rb @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- + +# Scanner for the Lua[http://lua.org] programming lanuage. +# +# The language’s complete syntax is defined in +# {the Lua manual}[http://www.lua.org/manual/5.2/manual.html], +# which is what this scanner tries to conform to. +class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner + + register_for :lua + file_extension "lua" + title "Lua" + + # Keywords used in Lua. + KEYWORDS = %w[and break do else elseif end + for function goto if in + local not or repeat return + then until while + ] + + # Constants set by the Lua core. + PREDEFINED_CONSTANTS = %w[false true nil] + + # The expressions contained in this array are parts of Lua’s `basic' + # library. Although it’s not entirely necessary to load that library, + # it is highly recommended and one would have to provide own implementations + # of some of these expressions if one does not do so. They however aren’t + # keywords, neither are they constants, but nearly predefined, so they + # get tagged as `predefined' rather than anything else. + # + # This list excludes values of form `_UPPERCASE' because the Lua manual + # requires such identifiers to be reserved by Lua anyway and they are + # highlighted directly accordingly, without the need for specific + # identifiers to be listed here. + PREDEFINED_EXPRESSIONS = %w[ + assert collectgarbage dofile error getmetatable + ipairs load loadfile next pairs pcall print + rawequal rawget rawlen rawset select setmetatable + tonumber tostring type xpcall + ] + + # Automatic token kind selection for normal words. + IDENT_KIND = CodeRay::WordList.new(:ident) + .add(KEYWORDS, :keyword) + .add(PREDEFINED_CONSTANTS, :predefined_constant) + .add(PREDEFINED_EXPRESSIONS, :predefined) + + protected + + # Scanner initialization. + def setup + @state = :initial + @brace_depth = 0 + end + + # CodeRay entry hook. Starts parsing. + def scan_tokens(encoder, options) + @encoder = encoder + @options = options + + send(:"handle_state_#@state") until eos? + + @encoder + end + + def handle_state_initial + if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]] + @num_equals = match.count("=") # Number must match for comment end + @encoder.begin_group(:comment) + @encoder.text_token(match, :delimiter) + @state = :long_comment + + elsif match = scan(/--.*?$/) # --Lua comment + @encoder.text_token(match, :comment) + + elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]] + @num_equals = match.count("=") # Number must match for comment end + @encoder.begin_group(:string) + @encoder.text_token(match, :delimiter) + @state = :long_string + + elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label:: + @encoder.text_token(match, :label) + + elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua + @encoder.text_token(match, :predefined) + + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits) + kind = IDENT_KIND[match] + + # Extra highlighting for entities following certain keywords + if kind == :keyword and match == "function" + @state = :function_expected + elsif kind == :keyword and match == "goto" + @state = :goto_label_expected + elsif kind == :keyword and match == "local" + @state = :local_var_expected + end + + @encoder.text_token(match, kind) + + elsif match = scan(/{/) # Opening table brace { + @encoder.begin_group(:table) + @encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter) + @brace_depth += 1 + @state = :table + + elsif match = scan(/}/) # Closing table brace } + if @brace_depth == 1 + @brace_depth = 0 + @encoder.text_token(match, :delimiter) + elsif @brace_depth == 0 # Mismatched brace + @encoder.text_token(match, :error) + else + @brace_depth -= 1 + @encoder.text_token(match, :inline_delimiter) + @state = :table + end + @encoder.end_group(:table) + + elsif match = scan(/["']/) # String delimiters " and ' + @encoder.begin_group(:string) + @encoder.text_token(match, :delimiter) + @start_delim = match + @state = :string + + # ↓Prefix hex number ←|→ decimal number + elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power + @encoder.text_token(match, :float) + + # ↓Prefix hex number ←|→ decimal number + elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power + @encoder.text_token(match, :integer) + + elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators + @encoder.text_token(match, :operator) + + elsif match = scan(/\s+/) # Space + @encoder.text_token(match, :space) + + else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors. + @encoder.text_token(getch, :error) + end + + # It may be that we’re scanning a full-blown subexpression of a table + # (tables can contain full expressions in parts). + # If this is the case, return to :table scanning state. + @state = :table if @state == :initial && @brace_depth >= 1 + end + + def handle_state_function_expected + if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name + @encoder.text_token(match, :operator) + @state = :initial + elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator + @encoder.text_token(match, :ident) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo() + @encoder.text_token(match, :function) + @state = :initial + elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace + @encoder.text_token(match, :space) + else + @encoder.text_token(getch, :error) + @state = :initial + end + end + + def handle_state_goto_label_expected + if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + @encoder.text_token(match, :label) + @state = :initial + elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace + @encoder.text_token(match, :space) + else + @encoder.text_token(getch, :error) + end + end + + def handle_state_local_var_expected + if match = scan(/function/) # local function ... + @encoder.text_token(match, :keyword) + @state = :function_expected + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + @encoder.text_token(match, :local_variable) + elsif match = scan(/,/) + @encoder.text_token(match, :operator) + elsif match = scan(/=/) + @encoder.text_token(match, :operator) + # After encountering the equal sign, arbitrary expressions are + # allowed again, so just return to the main state for further + # parsing. + @state = :initial + elsif match = scan(/\n/) + @encoder.text_token(match, :space) + @state = :initial + elsif match = scan(/\s+/) + @encoder.text_token(match, :space) + else + @encoder.text_token(getch, :error) + end + end + + def handle_state_long_comment + if match = scan(/.*?(?=\]={#@num_equals}\])/m) + @encoder.text_token(match, :content) + + delim = scan(/\]={#@num_equals}\]/) + @encoder.text_token(delim, :delimiter) + else # No terminator found till EOF + @encoder.text_token(rest, :error) + terminate + end + @encoder.end_group(:comment) + @state = :initial + end + + def handle_state_long_string + if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences + @encoder.text_token(match, :content) + + delim = scan(/\]={#@num_equals}\]/) + @encoder.text_token(delim, :delimiter) + else # No terminator found till EOF + @encoder.text_token(rest, :error) + terminate + end + @encoder.end_group(:string) + @state = :initial + end + + def handle_state_string + if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) + @encoder.text_token(match, :content) + elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m) + @encoder.text_token(match, :char) + elsif match = scan(Regexp.compile(@start_delim)) + @encoder.text_token(match, :delimiter) + @encoder.end_group(:string) + @state = :initial + elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings + @encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings + @encoder.end_group(:string) + @state = :initial + else + @encoder.text_token(getch, :error) + end + end + + def handle_state_table + if match = scan(/[,;]/) + @encoder.text_token(match, :operator) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x) + @encoder.text_token(match, :key) + @encoder.text_token(scan(/\s+/), :space) if check(/\s+/) + @encoder.text_token(scan(/=/), :operator) + @state = :initial + elsif match = scan(/\s+/m) + @encoder.text_token(match, :space) + else + # Note this clause doesn’t advance the scan pointer, it’s a kind of + # "retry with other options" (the :initial state then of course + # advances the pointer). + @state = :initial + end + end + +end -- cgit v1.2.1 From 8979cc621431248fded86c341e2102a67c5344bb Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Tue, 19 Jun 2012 17:45:49 +0200 Subject: use case+when instead of send and methods --- lib/coderay/scanners/lua.rb | 64 ++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 33 deletions(-) (limited to 'lib/coderay/scanners/lua.rb') diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index e7706fc..e712061 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -40,10 +40,10 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner ] # Automatic token kind selection for normal words. - IDENT_KIND = CodeRay::WordList.new(:ident) - .add(KEYWORDS, :keyword) - .add(PREDEFINED_CONSTANTS, :predefined_constant) - .add(PREDEFINED_EXPRESSIONS, :predefined) + IDENT_KIND = CodeRay::WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(PREDEFINED_EXPRESSIONS, :predefined) protected @@ -57,13 +57,11 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner def scan_tokens(encoder, options) @encoder = encoder @options = options - - send(:"handle_state_#@state") until eos? - - @encoder - end - - def handle_state_initial + + until eos? + case state + + when :initial if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]] @num_equals = match.count("=") # Number must match for comment end @encoder.begin_group(:comment) @@ -99,13 +97,13 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner @encoder.text_token(match, kind) - elsif match = scan(/{/) # Opening table brace { + elsif match = scan(/\{/) # Opening table brace { @encoder.begin_group(:table) @encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter) @brace_depth += 1 @state = :table - elsif match = scan(/}/) # Closing table brace } + elsif match = scan(/\}/) # Closing table brace } if @brace_depth == 1 @brace_depth = 0 @encoder.text_token(match, :delimiter) @@ -146,9 +144,8 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner # (tables can contain full expressions in parts). # If this is the case, return to :table scanning state. @state = :table if @state == :initial && @brace_depth >= 1 - end - - def handle_state_function_expected + + when :function_expected if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name @encoder.text_token(match, :operator) @state = :initial @@ -163,9 +160,8 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner @encoder.text_token(getch, :error) @state = :initial end - end - def handle_state_goto_label_expected + when :goto_label_expected if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) @encoder.text_token(match, :label) @state = :initial @@ -174,9 +170,8 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner else @encoder.text_token(getch, :error) end - end - - def handle_state_local_var_expected + + when :local_var_expected if match = scan(/function/) # local function ... @encoder.text_token(match, :keyword) @state = :function_expected @@ -198,9 +193,8 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner else @encoder.text_token(getch, :error) end - end - - def handle_state_long_comment + + when :long_comment if match = scan(/.*?(?=\]={#@num_equals}\])/m) @encoder.text_token(match, :content) @@ -212,9 +206,8 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner end @encoder.end_group(:comment) @state = :initial - end - - def handle_state_long_string + + when :long_string if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences @encoder.text_token(match, :content) @@ -226,9 +219,8 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner end @encoder.end_group(:string) @state = :initial - end - - def handle_state_string + + when :string if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) @encoder.text_token(match, :content) elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m) @@ -244,9 +236,8 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner else @encoder.text_token(getch, :error) end - end - - def handle_state_table + + when :table if match = scan(/[,;]/) @encoder.text_token(match, :operator) elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x) @@ -262,6 +253,13 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner # advances the pointer). @state = :initial end + else + raise + end + + end + + @encoder end end -- cgit v1.2.1 From b081c1f3ab670301199005c04ecbc11614593285 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Tue, 11 Sep 2012 15:04:45 -0600 Subject: escape = in regex (my editor gets confused:-) --- lib/coderay/scanners/lua.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib/coderay/scanners/lua.rb') diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index e712061..e640397 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -57,10 +57,10 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner def scan_tokens(encoder, options) @encoder = encoder @options = options - + until eos? case state - + when :initial if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]] @num_equals = match.count("=") # Number must match for comment end @@ -144,7 +144,7 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner # (tables can contain full expressions in parts). # If this is the case, return to :table scanning state. @state = :table if @state == :initial && @brace_depth >= 1 - + when :function_expected if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name @encoder.text_token(match, :operator) @@ -170,7 +170,7 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner else @encoder.text_token(getch, :error) end - + when :local_var_expected if match = scan(/function/) # local function ... @encoder.text_token(match, :keyword) @@ -179,7 +179,7 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner @encoder.text_token(match, :local_variable) elsif match = scan(/,/) @encoder.text_token(match, :operator) - elsif match = scan(/=/) + elsif match = scan(/\=/) @encoder.text_token(match, :operator) # After encountering the equal sign, arbitrary expressions are # allowed again, so just return to the main state for further @@ -193,7 +193,7 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner else @encoder.text_token(getch, :error) end - + when :long_comment if match = scan(/.*?(?=\]={#@num_equals}\])/m) @encoder.text_token(match, :content) @@ -206,7 +206,7 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner end @encoder.end_group(:comment) @state = :initial - + when :long_string if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences @encoder.text_token(match, :content) @@ -219,7 +219,7 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner end @encoder.end_group(:string) @state = :initial - + when :string if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) @encoder.text_token(match, :content) @@ -236,14 +236,14 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner else @encoder.text_token(getch, :error) end - + when :table if match = scan(/[,;]/) @encoder.text_token(match, :operator) elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x) @encoder.text_token(match, :key) @encoder.text_token(scan(/\s+/), :space) if check(/\s+/) - @encoder.text_token(scan(/=/), :operator) + @encoder.text_token(scan(/\=/), :operator) @state = :initial elsif match = scan(/\s+/m) @encoder.text_token(match, :space) -- cgit v1.2.1 From 279348d3c2159df4ce6ac5949ada0177aa7c7159 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sat, 27 Oct 2012 23:41:52 -0600 Subject: :map token kind Use :map instead of :table. It's more generic, and won't be confused with the :table rendering style. --- lib/coderay/scanners/lua.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib/coderay/scanners/lua.rb') diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index e640397..64763dc 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -98,10 +98,10 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner @encoder.text_token(match, kind) elsif match = scan(/\{/) # Opening table brace { - @encoder.begin_group(:table) + @encoder.begin_group(:map) @encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter) @brace_depth += 1 - @state = :table + @state = :map elsif match = scan(/\}/) # Closing table brace } if @brace_depth == 1 @@ -112,9 +112,9 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner else @brace_depth -= 1 @encoder.text_token(match, :inline_delimiter) - @state = :table + @state = :map end - @encoder.end_group(:table) + @encoder.end_group(:map) elsif match = scan(/["']/) # String delimiters " and ' @encoder.begin_group(:string) @@ -142,8 +142,8 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner # It may be that we’re scanning a full-blown subexpression of a table # (tables can contain full expressions in parts). - # If this is the case, return to :table scanning state. - @state = :table if @state == :initial && @brace_depth >= 1 + # If this is the case, return to :map scanning state. + @state = :map if @state == :initial && @brace_depth >= 1 when :function_expected if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name @@ -237,7 +237,7 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner @encoder.text_token(getch, :error) end - when :table + when :map if match = scan(/[,;]/) @encoder.text_token(match, :operator) elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x) -- cgit v1.2.1 From da1425058e9b9e04bd988ddf606331d9cd0c827b Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 01:06:33 +0200 Subject: cleanup Lua scanner, fix end_group(:map) issue - use local instead of instance variables --- lib/coderay/scanners/lua.rb | 524 ++++++++++++++++++++++---------------------- 1 file changed, 267 insertions(+), 257 deletions(-) (limited to 'lib/coderay/scanners/lua.rb') diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index 64763dc..3bee275 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -1,265 +1,275 @@ -# -*- coding: utf-8 -*- +# encoding: utf-8 -# Scanner for the Lua[http://lua.org] programming lanuage. -# -# The language’s complete syntax is defined in -# {the Lua manual}[http://www.lua.org/manual/5.2/manual.html], -# which is what this scanner tries to conform to. -class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner +module CodeRay +module Scanners - register_for :lua - file_extension "lua" - title "Lua" - - # Keywords used in Lua. - KEYWORDS = %w[and break do else elseif end - for function goto if in - local not or repeat return - then until while - ] - - # Constants set by the Lua core. - PREDEFINED_CONSTANTS = %w[false true nil] - - # The expressions contained in this array are parts of Lua’s `basic' - # library. Although it’s not entirely necessary to load that library, - # it is highly recommended and one would have to provide own implementations - # of some of these expressions if one does not do so. They however aren’t - # keywords, neither are they constants, but nearly predefined, so they - # get tagged as `predefined' rather than anything else. + # Scanner for the Lua[http://lua.org] programming lanuage. # - # This list excludes values of form `_UPPERCASE' because the Lua manual - # requires such identifiers to be reserved by Lua anyway and they are - # highlighted directly accordingly, without the need for specific - # identifiers to be listed here. - PREDEFINED_EXPRESSIONS = %w[ - assert collectgarbage dofile error getmetatable - ipairs load loadfile next pairs pcall print - rawequal rawget rawlen rawset select setmetatable - tonumber tostring type xpcall - ] - - # Automatic token kind selection for normal words. - IDENT_KIND = CodeRay::WordList.new(:ident). - add(KEYWORDS, :keyword). - add(PREDEFINED_CONSTANTS, :predefined_constant). - add(PREDEFINED_EXPRESSIONS, :predefined) - - protected - - # Scanner initialization. - def setup - @state = :initial - @brace_depth = 0 - end - - # CodeRay entry hook. Starts parsing. - def scan_tokens(encoder, options) - @encoder = encoder - @options = options - - until eos? - case state - - when :initial - if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]] - @num_equals = match.count("=") # Number must match for comment end - @encoder.begin_group(:comment) - @encoder.text_token(match, :delimiter) - @state = :long_comment - - elsif match = scan(/--.*?$/) # --Lua comment - @encoder.text_token(match, :comment) - - elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]] - @num_equals = match.count("=") # Number must match for comment end - @encoder.begin_group(:string) - @encoder.text_token(match, :delimiter) - @state = :long_string - - elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label:: - @encoder.text_token(match, :label) - - elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua - @encoder.text_token(match, :predefined) - - elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits) - kind = IDENT_KIND[match] - - # Extra highlighting for entities following certain keywords - if kind == :keyword and match == "function" - @state = :function_expected - elsif kind == :keyword and match == "goto" - @state = :goto_label_expected - elsif kind == :keyword and match == "local" - @state = :local_var_expected + # The language’s complete syntax is defined in + # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html], + # which is what this scanner tries to conform to. + class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner + + register_for :lua + file_extension "lua" + title "Lua" + + # Keywords used in Lua. + KEYWORDS = %w[and break do else elseif end + for function goto if in + local not or repeat return + then until while + ] + + # Constants set by the Lua core. + PREDEFINED_CONSTANTS = %w[false true nil] + + # The expressions contained in this array are parts of Lua’s `basic' + # library. Although it’s not entirely necessary to load that library, + # it is highly recommended and one would have to provide own implementations + # of some of these expressions if one does not do so. They however aren’t + # keywords, neither are they constants, but nearly predefined, so they + # get tagged as `predefined' rather than anything else. + # + # This list excludes values of form `_UPPERCASE' because the Lua manual + # requires such identifiers to be reserved by Lua anyway and they are + # highlighted directly accordingly, without the need for specific + # identifiers to be listed here. + PREDEFINED_EXPRESSIONS = %w[ + assert collectgarbage dofile error getmetatable + ipairs load loadfile next pairs pcall print + rawequal rawget rawlen rawset select setmetatable + tonumber tostring type xpcall + ] + + # Automatic token kind selection for normal words. + IDENT_KIND = CodeRay::WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(PREDEFINED_EXPRESSIONS, :predefined) + + protected + + # Scanner initialization. + def setup + @state = :initial + @brace_depth = 0 + end + + # CodeRay entry hook. Starts parsing. + def scan_tokens(encoder, options) + state = options[:state] || @state + + until eos? + case state + + when :initial + if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]] + @num_equals = match.count("=") # Number must match for comment end + encoder.begin_group(:comment) + encoder.text_token(match, :delimiter) + state = :long_comment + + elsif match = scan(/--.*$/) # --Lua comment + encoder.text_token(match, :comment) + + elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]] + @num_equals = match.count("=") # Number must match for comment end + encoder.begin_group(:string) + encoder.text_token(match, :delimiter) + state = :long_string + + elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label:: + encoder.text_token(match, :label) + + elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua + encoder.text_token(match, :predefined) + + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits) + kind = IDENT_KIND[match] + + # Extra highlighting for entities following certain keywords + if kind == :keyword and match == "function" + state = :function_expected + elsif kind == :keyword and match == "goto" + state = :goto_label_expected + elsif kind == :keyword and match == "local" + state = :local_var_expected + end + + encoder.text_token(match, kind) + + elsif match = scan(/\{/) # Opening table brace { + encoder.begin_group(:map) + encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter) + @brace_depth += 1 + state = :map + + elsif match = scan(/\}/) # Closing table brace } + if @brace_depth == 1 + @brace_depth = 0 + encoder.text_token(match, :delimiter) + encoder.end_group(:map) + elsif @brace_depth == 0 # Mismatched brace + encoder.text_token(match, :error) + else + @brace_depth -= 1 + encoder.text_token(match, :inline_delimiter) + encoder.end_group(:map) + state = :map + end + + elsif match = scan(/["']/) # String delimiters " and ' + encoder.begin_group(:string) + encoder.text_token(match, :delimiter) + @start_delim = match + state = :string + + # ↓Prefix hex number ←|→ decimal number + elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power + encoder.text_token(match, :float) + + # ↓Prefix hex number ←|→ decimal number + elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power + encoder.text_token(match, :integer) + + elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators + encoder.text_token(match, :operator) + + elsif match = scan(/\s+/) # Space + encoder.text_token(match, :space) + + else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors. + encoder.text_token(getch, :error) + end + + # It may be that we’re scanning a full-blown subexpression of a table + # (tables can contain full expressions in parts). + # If this is the case, return to :map scanning state. + state = :map if state == :initial && @brace_depth >= 1 + + when :function_expected + if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name + encoder.text_token(match, :operator) + state = :initial + elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator + encoder.text_token(match, :ident) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo() + encoder.text_token(match, :function) + state = :initial + elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace + encoder.text_token(match, :space) + else + encoder.text_token(getch, :error) + state = :initial + end + + when :goto_label_expected + if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + encoder.text_token(match, :label) + state = :initial + elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace + encoder.text_token(match, :space) + else + encoder.text_token(getch, :error) + end + + when :local_var_expected + if match = scan(/function/) # local function ... + encoder.text_token(match, :keyword) + state = :function_expected + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + encoder.text_token(match, :local_variable) + elsif match = scan(/,/) + encoder.text_token(match, :operator) + elsif match = scan(/\=/) + encoder.text_token(match, :operator) + # After encountering the equal sign, arbitrary expressions are + # allowed again, so just return to the main state for further + # parsing. + state = :initial + elsif match = scan(/\n/) + encoder.text_token(match, :space) + state = :initial + elsif match = scan(/\s+/) + encoder.text_token(match, :space) + else + encoder.text_token(getch, :error) + end + + when :long_comment + if match = scan(/.*?(?=\]={#@num_equals}\])/m) + encoder.text_token(match, :content) + + delim = scan(/\]={#@num_equals}\]/) + encoder.text_token(delim, :delimiter) + else # No terminator found till EOF + encoder.text_token(rest, :error) + terminate + end + encoder.end_group(:comment) + state = :initial + + when :long_string + if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences + encoder.text_token(match, :content) + + delim = scan(/\]={#@num_equals}\]/) + encoder.text_token(delim, :delimiter) + else # No terminator found till EOF + encoder.text_token(rest, :error) + terminate + end + encoder.end_group(:string) + state = :initial + + when :string + if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) + encoder.text_token(match, :content) + elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m) + encoder.text_token(match, :char) + elsif match = scan(Regexp.compile(@start_delim)) + encoder.text_token(match, :delimiter) + encoder.end_group(:string) + state = :initial + elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings + encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings + encoder.end_group(:string) + state = :initial + else + encoder.text_token(getch, :error) + end + + when :map + if match = scan(/[,;]/) + encoder.text_token(match, :operator) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x) + encoder.text_token(match, :key) + encoder.text_token(scan(/\s+/), :space) if check(/\s+/) + encoder.text_token(scan(/\=/), :operator) + state = :initial + elsif match = scan(/\s+/m) + encoder.text_token(match, :space) + else + # Note this clause doesn’t advance the scan pointer, it’s a kind of + # "retry with other options" (the :initial state then of course + # advances the pointer). + state = :initial + end + else + raise + end + end - - @encoder.text_token(match, kind) - - elsif match = scan(/\{/) # Opening table brace { - @encoder.begin_group(:map) - @encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter) - @brace_depth += 1 - @state = :map - - elsif match = scan(/\}/) # Closing table brace } - if @brace_depth == 1 - @brace_depth = 0 - @encoder.text_token(match, :delimiter) - elsif @brace_depth == 0 # Mismatched brace - @encoder.text_token(match, :error) - else - @brace_depth -= 1 - @encoder.text_token(match, :inline_delimiter) - @state = :map + + if options[:keep_state] + @state = state end - @encoder.end_group(:map) - - elsif match = scan(/["']/) # String delimiters " and ' - @encoder.begin_group(:string) - @encoder.text_token(match, :delimiter) - @start_delim = match - @state = :string - - # ↓Prefix hex number ←|→ decimal number - elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power - @encoder.text_token(match, :float) - - # ↓Prefix hex number ←|→ decimal number - elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power - @encoder.text_token(match, :integer) - - elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators - @encoder.text_token(match, :operator) - - elsif match = scan(/\s+/) # Space - @encoder.text_token(match, :space) - - else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors. - @encoder.text_token(getch, :error) - end - - # It may be that we’re scanning a full-blown subexpression of a table - # (tables can contain full expressions in parts). - # If this is the case, return to :map scanning state. - @state = :map if @state == :initial && @brace_depth >= 1 - - when :function_expected - if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name - @encoder.text_token(match, :operator) - @state = :initial - elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator - @encoder.text_token(match, :ident) - elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo() - @encoder.text_token(match, :function) - @state = :initial - elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace - @encoder.text_token(match, :space) - else - @encoder.text_token(getch, :error) - @state = :initial - end - - when :goto_label_expected - if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) - @encoder.text_token(match, :label) - @state = :initial - elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace - @encoder.text_token(match, :space) - else - @encoder.text_token(getch, :error) - end - - when :local_var_expected - if match = scan(/function/) # local function ... - @encoder.text_token(match, :keyword) - @state = :function_expected - elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) - @encoder.text_token(match, :local_variable) - elsif match = scan(/,/) - @encoder.text_token(match, :operator) - elsif match = scan(/\=/) - @encoder.text_token(match, :operator) - # After encountering the equal sign, arbitrary expressions are - # allowed again, so just return to the main state for further - # parsing. - @state = :initial - elsif match = scan(/\n/) - @encoder.text_token(match, :space) - @state = :initial - elsif match = scan(/\s+/) - @encoder.text_token(match, :space) - else - @encoder.text_token(getch, :error) - end - - when :long_comment - if match = scan(/.*?(?=\]={#@num_equals}\])/m) - @encoder.text_token(match, :content) - - delim = scan(/\]={#@num_equals}\]/) - @encoder.text_token(delim, :delimiter) - else # No terminator found till EOF - @encoder.text_token(rest, :error) - terminate - end - @encoder.end_group(:comment) - @state = :initial - - when :long_string - if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences - @encoder.text_token(match, :content) - - delim = scan(/\]={#@num_equals}\]/) - @encoder.text_token(delim, :delimiter) - else # No terminator found till EOF - @encoder.text_token(rest, :error) - terminate - end - @encoder.end_group(:string) - @state = :initial - - when :string - if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) - @encoder.text_token(match, :content) - elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m) - @encoder.text_token(match, :char) - elsif match = scan(Regexp.compile(@start_delim)) - @encoder.text_token(match, :delimiter) - @encoder.end_group(:string) - @state = :initial - elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings - @encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings - @encoder.end_group(:string) - @state = :initial - else - @encoder.text_token(getch, :error) - end - - when :map - if match = scan(/[,;]/) - @encoder.text_token(match, :operator) - elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x) - @encoder.text_token(match, :key) - @encoder.text_token(scan(/\s+/), :space) if check(/\s+/) - @encoder.text_token(scan(/\=/), :operator) - @state = :initial - elsif match = scan(/\s+/m) - @encoder.text_token(match, :space) - else - # Note this clause doesn’t advance the scan pointer, it’s a kind of - # "retry with other options" (the :initial state then of course - # advances the pointer). - @state = :initial + + encoder end - else - raise + end - - end - - @encoder - end - + +end end -- cgit v1.2.1 From 90c401c9001e0a670360a55d1189e9f814ae7592 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 01:17:49 +0200 Subject: some more code cleanups before merge --- lib/coderay/scanners/lua.rb | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'lib/coderay/scanners/lua.rb') diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index 3bee275..25bebbe 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -8,17 +8,17 @@ module Scanners # The language’s complete syntax is defined in # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html], # which is what this scanner tries to conform to. - class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner + class Lua < Scanner register_for :lua - file_extension "lua" - title "Lua" + file_extension 'lua' + title 'Lua' # Keywords used in Lua. KEYWORDS = %w[and break do else elseif end - for function goto if in - local not or repeat return - then until while + for function goto if in + local not or repeat return + then until while ] # Constants set by the Lua core. @@ -36,10 +36,10 @@ module Scanners # highlighted directly accordingly, without the need for specific # identifiers to be listed here. PREDEFINED_EXPRESSIONS = %w[ - assert collectgarbage dofile error getmetatable - ipairs load loadfile next pairs pcall print - rawequal rawget rawlen rawset select setmetatable - tonumber tostring type xpcall + assert collectgarbage dofile error getmetatable + ipairs load loadfile next pairs pcall print + rawequal rawget rawlen rawset select setmetatable + tonumber tostring type xpcall ] # Automatic token kind selection for normal words. @@ -52,7 +52,7 @@ module Scanners # Scanner initialization. def setup - @state = :initial + @state = :initial @brace_depth = 0 end -- cgit v1.2.1 From 7f45168efd1abe679b261a7a91f3641984091d12 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 06:06:16 +0200 Subject: remove instance variables from Lua scanner --- lib/coderay/scanners/lua.rb | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'lib/coderay/scanners/lua.rb') diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index 25bebbe..185c133 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -59,13 +59,15 @@ module Scanners # CodeRay entry hook. Starts parsing. def scan_tokens(encoder, options) state = options[:state] || @state + brace_depth = @brace_depth + num_equals = nil until eos? case state when :initial if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]] - @num_equals = match.count("=") # Number must match for comment end + num_equals = match.count("=") # Number must match for comment end encoder.begin_group(:comment) encoder.text_token(match, :delimiter) state = :long_comment @@ -74,7 +76,7 @@ module Scanners encoder.text_token(match, :comment) elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]] - @num_equals = match.count("=") # Number must match for comment end + num_equals = match.count("=") # Number must match for comment end encoder.begin_group(:string) encoder.text_token(match, :delimiter) state = :long_string @@ -101,19 +103,19 @@ module Scanners elsif match = scan(/\{/) # Opening table brace { encoder.begin_group(:map) - encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter) - @brace_depth += 1 + encoder.text_token(match, brace_depth >= 1 ? :inline_delimiter : :delimiter) + brace_depth += 1 state = :map elsif match = scan(/\}/) # Closing table brace } - if @brace_depth == 1 - @brace_depth = 0 + if brace_depth == 1 + brace_depth = 0 encoder.text_token(match, :delimiter) encoder.end_group(:map) - elsif @brace_depth == 0 # Mismatched brace + elsif brace_depth == 0 # Mismatched brace encoder.text_token(match, :error) else - @brace_depth -= 1 + brace_depth -= 1 encoder.text_token(match, :inline_delimiter) encoder.end_group(:map) state = :map @@ -122,7 +124,7 @@ module Scanners elsif match = scan(/["']/) # String delimiters " and ' encoder.begin_group(:string) encoder.text_token(match, :delimiter) - @start_delim = match + start_delim = match state = :string # ↓Prefix hex number ←|→ decimal number @@ -146,7 +148,7 @@ module Scanners # It may be that we’re scanning a full-blown subexpression of a table # (tables can contain full expressions in parts). # If this is the case, return to :map scanning state. - state = :map if state == :initial && @brace_depth >= 1 + state = :map if state == :initial && brace_depth >= 1 when :function_expected if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name @@ -198,10 +200,10 @@ module Scanners end when :long_comment - if match = scan(/.*?(?=\]={#@num_equals}\])/m) + if match = scan(/.*?(?=\]={#{num_equals}}\])/m) encoder.text_token(match, :content) - delim = scan(/\]={#@num_equals}\]/) + delim = scan(/\]={#{num_equals}}\]/) encoder.text_token(delim, :delimiter) else # No terminator found till EOF encoder.text_token(rest, :error) @@ -211,10 +213,10 @@ module Scanners state = :initial when :long_string - if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences + if match = scan(/.*?(?=\]={#{num_equals}}\])/m) # Long strings do not interpret any escape sequences encoder.text_token(match, :content) - delim = scan(/\]={#@num_equals}\]/) + delim = scan(/\]={#{num_equals}}\]/) encoder.text_token(delim, :delimiter) else # No terminator found till EOF encoder.text_token(rest, :error) @@ -224,11 +226,11 @@ module Scanners state = :initial when :string - if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) + if match = scan(/[^\\#{start_delim}\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) encoder.text_token(match, :content) elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m) encoder.text_token(match, :char) - elsif match = scan(Regexp.compile(@start_delim)) + elsif match = scan(Regexp.compile(start_delim)) encoder.text_token(match, :delimiter) encoder.end_group(:string) state = :initial -- cgit v1.2.1 From 45bb0c576b67b7a3c0dead02078b3a16b5583154 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 06:06:52 +0200 Subject: close open token groups in Lua scanner --- lib/coderay/scanners/lua.rb | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/coderay/scanners/lua.rb') diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index 185c133..fb1e45a 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -268,6 +268,9 @@ module Scanners @state = state end + encoder.end_group :string if [:string].include? state + brace_depth.times { encoder.end_group :map } + encoder end -- cgit v1.2.1