From abb92f30b12e11781afa76f43a344627520b5b34 Mon Sep 17 00:00:00 2001 From: Eric Guo Date: Sun, 8 Jul 2012 14:32:28 +0800 Subject: New: *Go Encoder* Draft version, copy from c --- lib/coderay/scanners/go.rb | 195 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 lib/coderay/scanners/go.rb (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb new file mode 100644 index 0000000..4431ef2 --- /dev/null +++ b/lib/coderay/scanners/go.rb @@ -0,0 +1,195 @@ +module CodeRay +module Scanners + + # Scanner for Go, copy from c + class Go < Scanner + + register_for :go + file_extension 'go' + + # http://golang.org/ref/spec#Keywords + KEYWORDS = [ + 'break', 'default', 'func', 'interface', 'select', + 'case', 'defer', 'go', 'map', 'struct', + 'chan', 'else', 'goto', 'package', 'switch', + 'const', 'fallthrough', 'if', 'range', 'type', + 'continue', 'for', 'import', 'return', 'var', + ] # :nodoc: + + # http://golang.org/ref/spec#Types + PREDEFINED_TYPES = [ + 'bool', + 'uint8', 'uint16', 'uint32', 'uint64', + 'int8', 'int16', 'int32', 'int64', + 'float32', 'float64', + 'complex64', 'complex128', + 'byte', 'rune', + 'uint', 'int', 'uintptr', + ] # :nodoc: + + PREDEFINED_CONSTANTS = [ + 'nil', 'iota', + 'true', 'false', + ] # :nodoc: + + DIRECTIVES = [ + 'go_no_directive', # Seems no directive concept in Go? + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_TYPES, :predefined_type). + add(DIRECTIVES, :directive). + add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: + + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + + protected + + def scan_tokens encoder, options + + state = :initial + label_expected = true + case_expected = false + label_expected_before_preproc_line = nil + in_preproc_line = false + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + if in_preproc_line && match != "\\\n" && match.index(?\n) + in_preproc_line = false + label_expected = label_expected_before_preproc_line + end + encoder.text_token match, :space + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :comment + + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + label_expected = match =~ /[;\{\}]/ + if case_expected + label_expected = true if match == ':' + case_expected = false + end + encoder.text_token match, :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) + kind = :label + match << matched + else + label_expected = false + if kind == :keyword + case match + when 'case', 'default' + case_expected = true + end + end + end + encoder.text_token match, kind + + elsif match = scan(/L?"/) + encoder.begin_group :string + if match[0] == ?L + encoder.text_token 'L', :modifier + match = '"' + end + encoder.text_token match, :delimiter + state = :string + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + encoder.text_token match, :comment + + elsif match = scan(/#[ \t]*(\w*)/) + encoder.text_token match, :preprocessor + in_preproc_line = true + label_expected_before_preproc_line = label_expected + state = :include_expected if self[1] == 'include' + + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + label_expected = false + encoder.text_token match, :char + + elsif match = scan(/\$/) + encoder.text_token match, :ident + + elsif match = scan(/0[xX][0-9A-Fa-f]+/) + label_expected = false + encoder.text_token match, :hex + + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) + label_expected = false + encoder.text_token match, :octal + + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false + encoder.text_token match, :integer + + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + label_expected = false + encoder.text_token match, :float + + else + encoder.text_token getch, :error + + end + + when :string + if match = scan(/[^\\\n"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + label_expected = false + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \\ | $ /x) + encoder.end_group :string + encoder.text_token match, :error + state = :initial + label_expected = false + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + + when :include_expected + if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + encoder.text_token match, :include + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + state = :initial if match.index ?\n + + else + state = :initial + + end + + else + raise_inspect 'Unknown state', encoder + + end + + end + + if state == :string + encoder.end_group :string + end + + encoder + end + + end + +end +end -- cgit v1.2.1 From fd8c81f5c338e4f1448007c9100d15d912fadd27 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 09:43:54 -0600 Subject: additional types: string, error http://golang.org/ref/spec#Predeclared_identifiers --- lib/coderay/scanners/go.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 4431ef2..a66a5e3 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -23,7 +23,7 @@ module Scanners 'int8', 'int16', 'int32', 'int64', 'float32', 'float64', 'complex64', 'complex128', - 'byte', 'rune', + 'byte', 'rune', 'string', 'error', 'uint', 'int', 'uintptr', ] # :nodoc: -- cgit v1.2.1 From 004d0c83222113ff732101a3e25785ce0625bfa2 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:14:29 +0200 Subject: whitespace --- lib/coderay/scanners/go.rb | 82 +++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 41 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 4431ef2..d4a3598 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -1,12 +1,12 @@ module CodeRay module Scanners - + # Scanner for Go, copy from c class Go < Scanner - + register_for :go file_extension 'go' - + # http://golang.org/ref/spec#Keywords KEYWORDS = [ 'break', 'default', 'func', 'interface', 'select', @@ -15,7 +15,7 @@ module Scanners 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for', 'import', 'return', 'var', ] # :nodoc: - + # http://golang.org/ref/spec#Types PREDEFINED_TYPES = [ 'bool', @@ -26,51 +26,51 @@ module Scanners 'byte', 'rune', 'uint', 'int', 'uintptr', ] # :nodoc: - + PREDEFINED_CONSTANTS = [ 'nil', 'iota', 'true', 'false', ] # :nodoc: - + DIRECTIVES = [ 'go_no_directive', # Seems no directive concept in Go? ] # :nodoc: - + IDENT_KIND = WordList.new(:ident). add(KEYWORDS, :keyword). add(PREDEFINED_TYPES, :predefined_type). add(DIRECTIVES, :directive). add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: - + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: - - protected - + + protected + def scan_tokens encoder, options - + state = :initial label_expected = true case_expected = false label_expected_before_preproc_line = nil in_preproc_line = false - + until eos? - + case state - + when :initial - + if match = scan(/ \s+ | \\\n /x) if in_preproc_line && match != "\\\n" && match.index(?\n) in_preproc_line = false label_expected = label_expected_before_preproc_line end encoder.text_token match, :space - + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) encoder.text_token match, :comment - + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) label_expected = match =~ /[;\{\}]/ if case_expected @@ -78,7 +78,7 @@ module Scanners case_expected = false end encoder.text_token match, :operator - + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) kind = IDENT_KIND[match] if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) @@ -94,7 +94,7 @@ module Scanners end end encoder.text_token match, kind - + elsif match = scan(/L?"/) encoder.begin_group :string if match[0] == ?L @@ -107,41 +107,41 @@ module Scanners elsif match = scan(/ \# \s* if \s* 0 /x) match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? encoder.text_token match, :comment - + elsif match = scan(/#[ \t]*(\w*)/) encoder.text_token match, :preprocessor in_preproc_line = true label_expected_before_preproc_line = label_expected state = :include_expected if self[1] == 'include' - + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) label_expected = false encoder.text_token match, :char - + elsif match = scan(/\$/) encoder.text_token match, :ident - + elsif match = scan(/0[xX][0-9A-Fa-f]+/) label_expected = false encoder.text_token match, :hex - + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) label_expected = false encoder.text_token match, :octal - + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) label_expected = false encoder.text_token match, :integer - + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) label_expected = false encoder.text_token match, :float - + else encoder.text_token getch, :error - + end - + when :string if match = scan(/[^\\\n"]+/) encoder.text_token match, :content @@ -160,36 +160,36 @@ module Scanners else raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end - + when :include_expected if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) encoder.text_token match, :include state = :initial - + elsif match = scan(/\s+/) encoder.text_token match, :space state = :initial if match.index ?\n - + else state = :initial - + end - + else raise_inspect 'Unknown state', encoder - + end - + end - + if state == :string encoder.end_group :string end - + encoder end - + end - + end end -- cgit v1.2.1 From afa2be73c03d033056024354787abd66f2ff7fa0 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:15:05 +0200 Subject: add string as predefined type --- lib/coderay/scanners/go.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index d4a3598..dbf9595 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -23,7 +23,7 @@ module Scanners 'int8', 'int16', 'int32', 'int64', 'float32', 'float64', 'complex64', 'complex128', - 'byte', 'rune', + 'byte', 'rune', 'string', 'uint', 'int', 'uintptr', ] # :nodoc: -- cgit v1.2.1 From 7ef6f7783d788bf656a3c59600cd45cd5b694376 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:15:22 +0200 Subject: add support for raw strings in Go --- lib/coderay/scanners/go.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index dbf9595..c0c602d 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -103,7 +103,14 @@ module Scanners end encoder.text_token match, :delimiter state = :string - + + elsif match = scan(/ ` ([^`]+)? (`)? /x) + encoder.begin_group :shell + encoder.text_token '`', :delimiter + encoder.text_token self[1], :content if self[1] + encoder.text_token self[2], :delimiter if self[2] + encoder.end_group :shell + elsif match = scan(/ \# \s* if \s* 0 /x) match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? encoder.text_token match, :comment -- cgit v1.2.1 From a24c207a7e547408de0f6157d495b0dd2e8d5a40 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:15:36 +0200 Subject: fix empty token in Go scanner --- lib/coderay/scanners/go.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index c0c602d..76a0820 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -161,7 +161,7 @@ module Scanners encoder.text_token match, :char elsif match = scan(/ \\ | $ /x) encoder.end_group :string - encoder.text_token match, :error + encoder.text_token match, :error unless match.empty? state = :initial label_expected = false else -- cgit v1.2.1 From 4669b7086136647f2eece41a122fa204eb16e1e9 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:16:47 +0200 Subject: fix label_expected (test case?) --- lib/coderay/scanners/go.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 76a0820..23cbfe6 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -64,6 +64,7 @@ module Scanners if match = scan(/ \s+ | \\\n /x) if in_preproc_line && match != "\\\n" && match.index(?\n) in_preproc_line = false + case_expected = false label_expected = label_expected_before_preproc_line end encoder.text_token match, :space @@ -72,7 +73,6 @@ module Scanners encoder.text_token match, :comment elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) - label_expected = match =~ /[;\{\}]/ if case_expected label_expected = true if match == ':' case_expected = false @@ -83,6 +83,7 @@ module Scanners kind = IDENT_KIND[match] if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) kind = :label + label_expected = false match << matched else label_expected = false -- cgit v1.2.1 From 85275cf21e7d15459abc11fd76606bd7d38fb8b5 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 12:11:49 -0600 Subject: Go doesn't have a "f" suffix for floats like C. --- lib/coderay/scanners/go.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 49d24c2..afcfca5 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -141,7 +141,7 @@ module Scanners label_expected = false encoder.text_token match, :integer - elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + elsif match = scan(/\d|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) label_expected = false encoder.text_token match, :float -- cgit v1.2.1 From cad9a00e28d61781d5a12a0556be7126eb790725 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 12:35:26 -0600 Subject: add imaginary numbers to Go scanner --- lib/coderay/scanners/go.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index afcfca5..04504ab 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -128,7 +128,11 @@ module Scanners elsif match = scan(/\$/) encoder.text_token match, :ident - + + elsif match = scan(/\d*(\.\d*)?([eE][+-]?\d+)?i/) + label_expected = false + encoder.text_token match, :imaginary + elsif match = scan(/0[xX][0-9A-Fa-f]+/) label_expected = false encoder.text_token match, :hex @@ -137,13 +141,13 @@ module Scanners label_expected = false encoder.text_token match, :octal - elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) - label_expected = false - encoder.text_token match, :integer - elsif match = scan(/\d|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) label_expected = false encoder.text_token match, :float + + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false + encoder.text_token match, :integer else encoder.text_token getch, :error -- cgit v1.2.1 From 4c877bf0eb655cf2c04a0efb24b4e5f83b97e160 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 12:43:50 -0600 Subject: predeclared identifiers http://golang.org/ref/spec#Predeclared_identifiers --- lib/coderay/scanners/go.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 04504ab..ae50aa4 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -31,6 +31,11 @@ module Scanners 'nil', 'iota', 'true', 'false', ] # :nodoc: + + PREDEFINED_FUNCTIONS = %w[ + append cap close complex copy delete imag len + make new panic print println real recover + ] # :nodoc: DIRECTIVES = [ 'go_no_directive', # Seems no directive concept in Go? @@ -40,7 +45,8 @@ module Scanners add(KEYWORDS, :keyword). add(PREDEFINED_TYPES, :predefined_type). add(DIRECTIVES, :directive). - add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(PREDEFINED_FUNCTIONS, :predefined) # :nodoc: ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: -- cgit v1.2.1 From dd9ec43a3ea4cfce8eb1e8dd38504a9d8f24df54 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 12:46:15 -0600 Subject: yup, no C-style directives (auto extern static) --- lib/coderay/scanners/go.rb | 5 ----- 1 file changed, 5 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index ae50aa4..eb06fb0 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -37,14 +37,9 @@ module Scanners make new panic print println real recover ] # :nodoc: - DIRECTIVES = [ - 'go_no_directive', # Seems no directive concept in Go? - ] # :nodoc: - IDENT_KIND = WordList.new(:ident). add(KEYWORDS, :keyword). add(PREDEFINED_TYPES, :predefined_type). - add(DIRECTIVES, :directive). add(PREDEFINED_CONSTANTS, :predefined_constant). add(PREDEFINED_FUNCTIONS, :predefined) # :nodoc: -- cgit v1.2.1 From 94e4bb3366537def28ed257fcf2b70a634711c9f Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Fri, 12 Jul 2013 14:57:46 +0200 Subject: don't change value of objects you don't own --- lib/coderay/scanners/css.rb | 2 +- lib/coderay/scanners/sass.rb | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb index 9ed4618..5977b9c 100644 --- a/lib/coderay/scanners/css.rb +++ b/lib/coderay/scanners/css.rb @@ -53,7 +53,7 @@ module Scanners end def scan_tokens encoder, options - states = Array(options[:state] || @state) + states = Array(options[:state] || @state).dup value_expected = @value_expected until eos? diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb index e20bebe..7ba9bf5 100644 --- a/lib/coderay/scanners/sass.rb +++ b/lib/coderay/scanners/sass.rb @@ -19,7 +19,7 @@ module Scanners end def scan_tokens encoder, options - states = Array(options[:state] || @state) + states = Array(options[:state] || @state).dup string_delimiter = nil until eos? @@ -119,7 +119,7 @@ module Scanners else #:nocov: - raise_inspect 'Unknown state', encoder + raise_inspect 'Unknown state: %p' % [states.last], encoder #:nocov: end @@ -215,7 +215,7 @@ module Scanners end if options[:keep_state] - @state = states + @state = states.dup end while state = states.pop -- cgit v1.2.1 From 990ed25fd1bf350dabae4bed031e07ee485beb79 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 16:20:59 +0200 Subject: split '" string states in Sass scanner (edge case) --- lib/coderay/scanners/sass.rb | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb index 7ba9bf5..85b4711 100644 --- a/lib/coderay/scanners/sass.rb +++ b/lib/coderay/scanners/sass.rb @@ -7,11 +7,6 @@ module Scanners register_for :sass file_extension 'sass' - STRING_CONTENT_PATTERN = { - "'" => /(?:[^\n\'\#]+|\\\n|#{RE::Escape}|#(?!\{))+/, - '"' => /(?:[^\n\"\#]+|\\\n|#{RE::Escape}|#(?!\{))+/, - } - protected def setup @@ -20,7 +15,8 @@ module Scanners def scan_tokens encoder, options states = Array(options[:state] || @state).dup - string_delimiter = nil + + encoder.begin_group :string if states.last == :sqstring || states.last == :dqstring until eos? @@ -91,24 +87,23 @@ module Scanners next end - when :string - if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) + when :sqstring, :dqstring + if match = scan(states.last == :sqstring ? /(?:[^\n\'\#]+|\\\n|#{RE::Escape}|#(?!\{))+/o : /(?:[^\n\"\#]+|\\\n|#{RE::Escape}|#(?!\{))+/o) encoder.text_token match, :content elsif match = scan(/['"]/) encoder.text_token match, :delimiter encoder.end_group :string - string_delimiter = nil states.pop elsif match = scan(/#\{/) encoder.begin_group :inline encoder.text_token match, :inline_delimiter states.push :sass_inline elsif match = scan(/ \\ | $ /x) - encoder.end_group :string + encoder.end_group states.last encoder.text_token match, :error unless match.empty? states.pop else - raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder + raise_inspect "else case #{states.last} reached; %p not handled." % peek(1), encoder end when :include @@ -157,15 +152,15 @@ module Scanners elsif match = scan(/['"]/) encoder.begin_group :string - string_delimiter = match encoder.text_token match, :delimiter if states.include? :sass_inline - content = scan_until(/(?=#{string_delimiter}|\}|\z)/) + # no nesting, just scan the string until delimiter + content = scan_until(/(?=#{match}|\}|\z)/) encoder.text_token content, :content unless content.empty? - encoder.text_token string_delimiter, :delimiter if scan(/#{string_delimiter}/) + encoder.text_token match, :delimiter if scan(/#{match}/) encoder.end_group :string else - states.push :string + states.push match == "'" ? :sqstring : :dqstring end elsif match = scan(/#{RE::Function}/o) @@ -221,7 +216,7 @@ module Scanners while state = states.pop if state == :sass_inline encoder.end_group :inline - elsif state == :string + elsif state == :sqstring || state == :dqstring encoder.end_group :string end end -- cgit v1.2.1 From def7e09db1963368e20bfd53c72532d6a631e0e8 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 16:21:42 +0200 Subject: fix #139: don't scan for :include after eos --- lib/coderay/scanners/sass.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb index 85b4711..1bbd534 100644 --- a/lib/coderay/scanners/sass.rb +++ b/lib/coderay/scanners/sass.rb @@ -209,6 +209,8 @@ module Scanners end + states.pop if states.last == :include + if options[:keep_state] @state = states.dup end -- cgit v1.2.1 From 62a0be9509f8814902a4a97df4ad84913728d059 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 16:34:17 +0200 Subject: fix #143 (Sass scanner key vs tag heuristic) --- lib/coderay/scanners/sass.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb index 1bbd534..e3296b9 100644 --- a/lib/coderay/scanners/sass.rb +++ b/lib/coderay/scanners/sass.rb @@ -44,7 +44,7 @@ module Scanners elsif case states.last when :initial, :media, :sass_inline if match = scan(/(?>#{RE::Ident})(?!\()/ox) - encoder.text_token match, value_expected ? :value : (check(/.*:/) ? :key : :tag) + encoder.text_token match, value_expected ? :value : (check(/.*:(?![a-z])/) ? :key : :tag) next elsif !value_expected && (match = scan(/\*/)) encoder.text_token match, :tag -- cgit v1.2.1 From 70c9ba896e1bba5ac727fb6fdfc3ba94510e652d Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 16:34:41 +0200 Subject: fix CSS scanner for things like "nth-child(2n)" --- lib/coderay/scanners/css.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb index 5977b9c..55d5239 100644 --- a/lib/coderay/scanners/css.rb +++ b/lib/coderay/scanners/css.rb @@ -25,7 +25,7 @@ module Scanners HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/ - Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)/ + Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)n?/ Name = /#{NMChar}+/ Ident = /-?#{NMStart}#{NMChar}*/ AtKeyword = /@#{Ident}/ -- cgit v1.2.1 From bbe4d72ba785f1bd6fd703d63b096a907da1b09f Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 20:31:34 +0200 Subject: tweak numeral tokens handling (#147) --- lib/coderay/scanners/go.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index eb06fb0..938da9d 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -31,7 +31,7 @@ module Scanners 'nil', 'iota', 'true', 'false', ] # :nodoc: - + PREDEFINED_FUNCTIONS = %w[ append cap close complex copy delete imag len make new panic print println real recover @@ -73,7 +73,7 @@ module Scanners elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) encoder.text_token match, :comment - elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + elsif match = scan(/ ?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) if case_expected label_expected = true if match == ':' case_expected = false @@ -129,24 +129,24 @@ module Scanners elsif match = scan(/\$/) encoder.text_token match, :ident - - elsif match = scan(/\d*(\.\d*)?([eE][+-]?\d+)?i/) + + elsif match = scan(/-?\d*(\.\d*)?([eE][+-]?\d+)?i/) label_expected = false encoder.text_token match, :imaginary - - elsif match = scan(/0[xX][0-9A-Fa-f]+/) + + elsif match = scan(/-?0[xX][0-9A-Fa-f]+/) label_expected = false encoder.text_token match, :hex - elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) + elsif match = scan(/-?(?:0[0-7]+)(?![89.eEfF])/) label_expected = false encoder.text_token match, :octal - elsif match = scan(/\d|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) + elsif match = scan(/-?(?:\d*\.\d+|\d+\.)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) label_expected = false encoder.text_token match, :float - - elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + + elsif match = scan(/-?(?:\d+)(?![.eEfF])L?L?/) label_expected = false encoder.text_token match, :integer -- cgit v1.2.1 From 6dd14ef018ee4417771504117819121bd4b8520d Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 20:34:10 +0200 Subject: be a bit more graceful with buggy Go strings --- lib/coderay/scanners/go.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 938da9d..5034adc 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -165,9 +165,10 @@ module Scanners label_expected = false elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) encoder.text_token match, :char - elsif match = scan(/ \\ | $ /x) + elsif match = scan(/ \\ /x) + encoder.text_token match, :error + elsif match = scan(/$/) encoder.end_group :string - encoder.text_token match, :error unless match.empty? state = :initial label_expected = false else -- cgit v1.2.1 From 82864efabda9dc17fa6a28b0740ffc8f58706126 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 20:36:27 +0200 Subject: allow unicode characters in char literals --- lib/coderay/scanners/go.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 5034adc..59473f6 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -44,7 +44,7 @@ module Scanners add(PREDEFINED_FUNCTIONS, :predefined) # :nodoc: ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: - UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: protected @@ -123,7 +123,7 @@ module Scanners label_expected_before_preproc_line = label_expected state = :include_expected if self[1] == 'include' - elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) )? '? /ox) label_expected = false encoder.text_token match, :char -- cgit v1.2.1 From 59ca07b0d1a1710ab729636ea00de4b638f56110 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 20 Jul 2013 11:17:30 +0200 Subject: =?UTF-8?q?add=20Ruby=202=20syntax:=20%i(=E2=80=A6)=20and=20%I(?= =?UTF-8?q?=E2=80=A6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/coderay/scanners/ruby/patterns.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index ed071d2..0b36e13 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -157,13 +157,16 @@ module Scanners yield ]) - FANCY_STRING_START = / % ( [QqrsWwx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x + FANCY_STRING_START = / % ( [iIqQrswWx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x FANCY_STRING_KIND = Hash.new(:string).merge({ + 'i' => :symbol, + 'I' => :symbol, 'r' => :regexp, 's' => :symbol, 'x' => :shell, }) FANCY_STRING_INTERPRETED = Hash.new(true).merge({ + 'i' => false, 'q' => false, 's' => false, 'w' => false, -- cgit v1.2.1 From 5c23a731ca55729fc65630eca3b37a5b1a71e5b1 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 20 Jul 2013 18:12:04 +0200 Subject: mark possibly problematic spots with FIXME --- lib/coderay/scanners/debug.rb | 2 ++ lib/coderay/scanners/diff.rb | 1 + lib/coderay/scanners/python.rb | 3 +++ lib/coderay/scanners/raydebug.rb | 2 ++ lib/coderay/scanners/ruby/string_state.rb | 1 + 5 files changed, 9 insertions(+) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb index 566bfa7..9d10864 100644 --- a/lib/coderay/scanners/debug.rb +++ b/lib/coderay/scanners/debug.rb @@ -21,6 +21,7 @@ module Scanners encoder.text_token match, :space elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x) + # FIXME: cache attack kind = self[1].to_sym match = self[2].gsub(/\\(.)/m, '\1') unless TokenKinds.has_key? kind @@ -30,6 +31,7 @@ module Scanners encoder.text_token match, kind elsif match = scan(/ (\w+) ([<\[]) /x) + # FIXME: cache attack kind = self[1].to_sym opened_tokens << kind case self[2] diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb index fd1aed6..836fa41 100644 --- a/lib/coderay/scanners/diff.rb +++ b/lib/coderay/scanners/diff.rb @@ -21,6 +21,7 @@ module Scanners line_kind = nil state = :initial deleted_lines_count = 0 + # FIXME: cache attack scanners = Hash.new do |h, lang| h[lang] = Scanners[lang].new '', :keep_tokens => true, :keep_state => true end diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb index 09c8b6e..23630f9 100644 --- a/lib/coderay/scanners/python.rb +++ b/lib/coderay/scanners/python.rb @@ -75,10 +75,12 @@ module Scanners <<=? | >>=? | [<>=]=? | != # comparison and assignment /x # :nodoc: + # FIXME: cache attack STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter| h[delimiter] = Regexp.union delimiter # :nodoc: } + # FIXME: cache attack STRING_CONTENT_REGEXP = Hash.new { |h, delimiter| h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc: } @@ -183,6 +185,7 @@ module Scanners kind = :ident elsif kind == :keyword state = DEF_NEW_STATE[match] + # FIXME: cache attack from_import_state << match.to_sym if state == :include_expected end encoder.text_token match, kind diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb index d39d962..ca35de0 100644 --- a/lib/coderay/scanners/raydebug.rb +++ b/lib/coderay/scanners/raydebug.rb @@ -26,6 +26,7 @@ module Scanners encoder.text_token kind, :class encoder.text_token '(', :operator match = self[2] + # FIXME: cache attack encoder.text_token match, kind.to_sym unless match.empty? encoder.text_token match, :operator if match = scan(/\)/) @@ -39,6 +40,7 @@ module Scanners else raise 'CodeRay bug: This case should not be reached.' end + # FIXME: cache attack kind = kind.to_sym opened_tokens << kind encoder.begin_group kind diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index 2f398d1..fe37d07 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -14,6 +14,7 @@ module Scanners { } ] ].each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with << + # FIXME: cache attack STRING_PATTERN = Hash.new do |h, k| delim, interpreted = *k # delim = delim.dup # workaround for old Ruby -- cgit v1.2.1 From e2546068d0f16fcba15268e740bbb6d9f4f223e9 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 18:28:54 +0200 Subject: prevent Symbol attack in Raydebug scanner --- lib/coderay/scanners/raydebug.rb | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb index ca35de0..6c1c10f 100644 --- a/lib/coderay/scanners/raydebug.rb +++ b/lib/coderay/scanners/raydebug.rb @@ -1,3 +1,5 @@ +require 'set' + module CodeRay module Scanners @@ -12,6 +14,11 @@ module Scanners protected + def setup + super + @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set + end + def scan_tokens encoder, options opened_tokens = [] @@ -26,8 +33,13 @@ module Scanners encoder.text_token kind, :class encoder.text_token '(', :operator match = self[2] - # FIXME: cache attack - encoder.text_token match, kind.to_sym unless match.empty? + unless match.empty? + if @known_token_kinds.include? kind + encoder.text_token match, kind.to_sym + else + encoder.text_token match, :plain + end + end encoder.text_token match, :operator if match = scan(/\)/) elsif match = scan(/ (\w+) ([<\[]) /x) -- cgit v1.2.1 From 2ab42c7b5e674453fac0320fe0c4a40daf6197e1 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 18:53:41 +0200 Subject: prevent Symbol attack in Debug scanner --- lib/coderay/scanners/debug.rb | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb index 9d10864..ac12c16 100644 --- a/lib/coderay/scanners/debug.rb +++ b/lib/coderay/scanners/debug.rb @@ -1,3 +1,5 @@ +require 'set' + module CodeRay module Scanners @@ -11,6 +13,11 @@ module Scanners protected + def setup + super + @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set + end + def scan_tokens encoder, options opened_tokens = [] @@ -21,26 +28,24 @@ module Scanners encoder.text_token match, :space elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x) - # FIXME: cache attack - kind = self[1].to_sym - match = self[2].gsub(/\\(.)/m, '\1') - unless TokenKinds.has_key? kind - kind = :error - match = matched + if @known_token_kinds.include? self[1] + encoder.text_token self[2].gsub(/\\(.)/m, '\1'), self[1].to_sym + else + encoder.text_token matched, :error end - encoder.text_token match, kind elsif match = scan(/ (\w+) ([<\[]) /x) - # FIXME: cache attack - kind = self[1].to_sym - opened_tokens << kind - case self[2] - when '<' - encoder.begin_group kind - when '[' - encoder.begin_line kind - else - raise 'CodeRay bug: This case should not be reached.' + if @known_token_kinds.include? self[1] + kind = self[1].to_sym + opened_tokens << kind + case self[2] + when '<' + encoder.begin_group kind + when '[' + encoder.begin_line kind + else + raise 'CodeRay bug: This case should not be reached.' + end end elsif !opened_tokens.empty? && match = scan(/ > /x) -- cgit v1.2.1 From 5d6bee7f5caced1383e6aac427fb356a4788794b Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:14:21 +0200 Subject: tweak Debug scanners again, introduce :unknown token kind --- lib/coderay/scanners/debug.rb | 25 ++++++++++++++----------- lib/coderay/scanners/raydebug.rb | 17 ++++++----------- 2 files changed, 20 insertions(+), 22 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb index ac12c16..83ede9a 100644 --- a/lib/coderay/scanners/debug.rb +++ b/lib/coderay/scanners/debug.rb @@ -5,7 +5,7 @@ module Scanners # = Debug Scanner # - # Interprets the output of the Encoders::Debug encoder. + # Interprets the output of the Encoders::Debug encoder (basically the inverse function). class Debug < Scanner register_for :debug @@ -31,21 +31,24 @@ module Scanners if @known_token_kinds.include? self[1] encoder.text_token self[2].gsub(/\\(.)/m, '\1'), self[1].to_sym else - encoder.text_token matched, :error + encoder.text_token matched, :unknown end elsif match = scan(/ (\w+) ([<\[]) /x) if @known_token_kinds.include? self[1] kind = self[1].to_sym - opened_tokens << kind - case self[2] - when '<' - encoder.begin_group kind - when '[' - encoder.begin_line kind - else - raise 'CodeRay bug: This case should not be reached.' - end + else + kind = :unknown + end + + opened_tokens << kind + case self[2] + when '<' + encoder.begin_group kind + when '[' + encoder.begin_line kind + else + raise 'CodeRay bug: This case should not be reached.' end elsif !opened_tokens.empty? && match = scan(/ > /x) diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb index 6c1c10f..1effdc8 100644 --- a/lib/coderay/scanners/raydebug.rb +++ b/lib/coderay/scanners/raydebug.rb @@ -3,9 +3,9 @@ require 'set' module CodeRay module Scanners - # = Debug Scanner + # = Raydebug Scanner # - # Parses the output of the Encoders::Debug encoder. + # Highlights the output of the Encoders::Debug encoder. class Raydebug < Scanner register_for :raydebug @@ -43,17 +43,12 @@ module Scanners encoder.text_token match, :operator if match = scan(/\)/) elsif match = scan(/ (\w+) ([<\[]) /x) - kind = self[1] - case self[2] - when '<' - encoder.text_token kind, :class - when '[' - encoder.text_token kind, :class + encoder.text_token self[1], :class + if @known_token_kinds.include? self[1] + kind = self[1].to_sym else - raise 'CodeRay bug: This case should not be reached.' + kind = :unknown end - # FIXME: cache attack - kind = kind.to_sym opened_tokens << kind encoder.begin_group kind encoder.text_token self[2], :operator -- cgit v1.2.1 From 368e053880819edc74fdcef38f38b5fd4806a3f4 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:18:49 +0200 Subject: FileType should guard against attacks here --- lib/coderay/scanners/diff.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb index 836fa41..fd1aed6 100644 --- a/lib/coderay/scanners/diff.rb +++ b/lib/coderay/scanners/diff.rb @@ -21,7 +21,6 @@ module Scanners line_kind = nil state = :initial deleted_lines_count = 0 - # FIXME: cache attack scanners = Hash.new do |h, lang| h[lang] = Scanners[lang].new '', :keep_tokens => true, :keep_state => true end -- cgit v1.2.1 From af04107b8b370452a17fa54e8ea0e8adc8b376b0 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:19:57 +0200 Subject: no attack vector, there are only 4 cases --- lib/coderay/scanners/python.rb | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb index 23630f9..05e1f5f 100644 --- a/lib/coderay/scanners/python.rb +++ b/lib/coderay/scanners/python.rb @@ -75,12 +75,10 @@ module Scanners <<=? | >>=? | [<>=]=? | != # comparison and assignment /x # :nodoc: - # FIXME: cache attack STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter| h[delimiter] = Regexp.union delimiter # :nodoc: } - # FIXME: cache attack STRING_CONTENT_REGEXP = Hash.new { |h, delimiter| h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc: } -- cgit v1.2.1 From e9140073f4dcba5c022a2ad40a1b935a07a6b4c3 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:21:45 +0200 Subject: no attack vector, there are only 2 cases --- lib/coderay/scanners/python.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb index 05e1f5f..09c8b6e 100644 --- a/lib/coderay/scanners/python.rb +++ b/lib/coderay/scanners/python.rb @@ -183,7 +183,6 @@ module Scanners kind = :ident elsif kind == :keyword state = DEF_NEW_STATE[match] - # FIXME: cache attack from_import_state << match.to_sym if state == :include_expected end encoder.text_token match, kind -- cgit v1.2.1 From c3c70e0b3497939dbfb1958a0764f4fd18c05a48 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:31:35 +0200 Subject: cleanup --- lib/coderay/scanners/ruby/string_state.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index fe37d07..bcc0507 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -17,7 +17,6 @@ module Scanners # FIXME: cache attack STRING_PATTERN = Hash.new do |h, k| delim, interpreted = *k - # delim = delim.dup # workaround for old Ruby delim_pattern = Regexp.escape(delim) if closing_paren = CLOSING_PAREN[delim] delim_pattern << Regexp.escape(closing_paren) -- cgit v1.2.1 From 65983f38eaed758a9901adf9e4e8c4be3e3a6123 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:41:55 +0200 Subject: avoid cache attack in Ruby scanner (eg. using Unicode-delimited Fancy Strings) --- lib/coderay/scanners/ruby/string_state.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index bcc0507..28ddd6c 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -14,7 +14,6 @@ module Scanners { } ] ].each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with << - # FIXME: cache attack STRING_PATTERN = Hash.new do |h, k| delim, interpreted = *k delim_pattern = Regexp.escape(delim) @@ -29,12 +28,13 @@ module Scanners # '| [|?*+(){}\[\].^$]' # end - h[k] = - if interpreted && delim != '#' - / (?= [#{delim_pattern}] | \# [{$@] ) /mx - else - / (?= [#{delim_pattern}] ) /mx - end + if interpreted && delim != '#' + / (?= [#{delim_pattern}] | \# [{$@] ) /mx + else + / (?= [#{delim_pattern}] ) /mx + end.tap do |pattern| + h[k] = pattern if (delim.respond_to?(:ord) ? delim.ord : delim[0]) < 256 + end end def initialize kind, interpreted, delim, heredoc = false -- cgit v1.2.1 From a48037b85a12228431b32103786456f36beb355f Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 1 Sep 2013 01:01:35 +0200 Subject: final cleanup --- lib/coderay/scanners/go.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 59473f6..99fdd63 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -1,7 +1,6 @@ module CodeRay module Scanners - # Scanner for Go, copy from c class Go < Scanner register_for :go -- cgit v1.2.1 From d3197be3f207f8fcf52954d8815a0ea1948d25a4 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 22 Feb 2014 00:33:54 +0100 Subject: fix for #163 (SQL scanner), declare 1.1.1 --- lib/coderay/scanners/sql.rb | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb index 93aeaf3..c25f6d2 100644 --- a/lib/coderay/scanners/sql.rb +++ b/lib/coderay/scanners/sql.rb @@ -57,6 +57,12 @@ module Scanners STRING_PREFIXES = /[xnb]|_\w+/i + STRING_CONTENT_PATTERN = { + '"' => / (?: [^\\"] | "" )+ /x, + "'" => / (?: [^\\'] | '' )+ /x, + '`' => / (?: [^\\`] | `` )+ /x, + } + def scan_tokens encoder, options state = :initial @@ -115,40 +121,26 @@ module Scanners end elsif state == :string - if match = scan(/[^\\"'`]+/) - string_content << match - next + if match = scan(STRING_CONTENT_PATTERN[string_type]) + encoder.text_token match, :content elsif match = scan(/["'`]/) if string_type == match if peek(1) == string_type # doubling means escape - string_content << string_type << getch - next - end - unless string_content.empty? - encoder.text_token string_content, :content - string_content = '' + encoder.text_token match + getch, :content + else + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + string_type = nil end - encoder.text_token match, :delimiter - encoder.end_group :string - state = :initial - string_type = nil else - string_content << match + encoder.text_token match, :content end elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - unless string_content.empty? - encoder.text_token string_content, :content - string_content = '' - end encoder.text_token match, :char elsif match = scan(/ \\ . /mox) - string_content << match - next + encoder.text_token match, :content elsif match = scan(/ \\ | $ /x) - unless string_content.empty? - encoder.text_token string_content, :content - string_content = '' - end encoder.text_token match, :error unless match.empty? encoder.end_group :string state = :initial -- cgit v1.2.1 From e1aa98e7386609fd4c84bdcd2c3ea4b26663c8b7 Mon Sep 17 00:00:00 2001 From: BenBasson Date: Wed, 11 Jun 2014 22:47:51 +0100 Subject: Allow $ in SQL object names. --- lib/coderay/scanners/sql.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb index c25f6d2..7d57f77 100644 --- a/lib/coderay/scanners/sql.rb +++ b/lib/coderay/scanners/sql.rb @@ -96,7 +96,7 @@ module Scanners state = :string encoder.text_token match, :delimiter - elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x) + elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9\$]* /x) encoder.text_token match, name_expected ? :ident : (match[0] == ?@ ? :variable : IDENT_KIND[match]) name_expected = false -- cgit v1.2.1 From e5624a07e95cc7a3c704a4d08cddea582adc7f31 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 21 Mar 2015 03:44:49 +0100 Subject: prevent running out of regexp stack --- lib/coderay/scanners/diff.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb index fd1aed6..74a6c27 100644 --- a/lib/coderay/scanners/diff.rb +++ b/lib/coderay/scanners/diff.rb @@ -100,7 +100,7 @@ module Scanners next elsif match = scan(/-/) deleted_lines_count += 1 - if options[:inline_diff] && deleted_lines_count == 1 && (changed_lines_count = 1 + check(/.*(?:\n\-.*)*/).count("\n")) && match?(/(?>.*(?:\n\-.*){#{changed_lines_count - 1}}(?:\n\+.*){#{changed_lines_count}})$(?!\n\+)/) + if options[:inline_diff] && deleted_lines_count == 1 && (changed_lines_count = 1 + check(/.*(?:\n\-.*)*/).count("\n")) && changed_lines_count <= 100_000 && match?(/(?>.*(?:\n\-.*){#{changed_lines_count - 1}}(?:\n\+.*){#{changed_lines_count}})$(?!\n\+)/) deleted_lines = Array.new(changed_lines_count) { |i| skip(/\n\-/) if i > 0; scan(/.*/) } inserted_lines = Array.new(changed_lines_count) { |i| skip(/\n\+/) ; scan(/.*/) } -- cgit v1.2.1 From 080f8a8225cb911d037d1f6e58e581dec9558c58 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 11:40:13 +0100 Subject: add support for Ruby 2.1 number literal suffixes --- lib/coderay/scanners/ruby.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 80165ca..0492a55 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -191,7 +191,10 @@ module Scanners encoder.text_token match, :error method_call_expected = false else - encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary + kind = self[1] ? :float : :integer # TODO: send :hex/:octal/:binary + match << 'r' if match !~ /e/i && scan(/r/) + match << 'i' if scan(/i/) + encoder.text_token match, kind end value_expected = false -- cgit v1.2.1 From 39cbd37815f65f21e0433f4da4cf5fbeda2e1e3f Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 12:06:26 +0100 Subject: add support for Ruby 2.2 quoted hash keys KNOWN ISSUE: string interpolation will not work! --- lib/coderay/scanners/ruby.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 0492a55..165d66b 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -164,15 +164,18 @@ module Scanners end elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx) - encoder.begin_group :string if match.size == 1 + encoder.begin_group :string encoder.text_token match, :delimiter state = self.class::StringState.new :string, match == '"', match # important for streaming else + kind = value_expected == true && scan(/:/) ? :key : :string + encoder.begin_group kind encoder.text_token match[0,1], :delimiter encoder.text_token match[1..-2], :content if match.size > 2 encoder.text_token match[-1,1], :delimiter - encoder.end_group :string + encoder.end_group kind + encoder.text_token ':', :operator if kind == :key value_expected = false end -- cgit v1.2.1 From d9d1eedcb235b371683eed22a6e4217caef73ffa Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 12:08:21 +0100 Subject: add support for Ruby 2.3 safe navigation operator --- lib/coderay/scanners/ruby.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 165d66b..24ab71f 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -201,7 +201,7 @@ module Scanners end value_expected = false - elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x) + elsif match = scan(/ [-+!~^\/]=? | [:;] | &\. | [*|&]{1,2}=? | >>? /x) value_expected = true encoder.text_token match, :operator -- cgit v1.2.1 From 376884d457ac7953914cc84b94fe6404cd904fe0 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 12:10:18 +0100 Subject: add support for Ruby 2.3 squiggly heredoc --- lib/coderay/scanners/ruby/patterns.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index 0b36e13..3dd6ad5 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -114,7 +114,7 @@ module Scanners # NOTE: This is not completely correct, but # nobody needs heredoc delimiters ending with \n. HEREDOC_OPEN = / - << (-)? # $1 = float + << ([-~])? # $1 = float (?: ( [A-Za-z_0-9]+ ) # $2 = delim | -- cgit v1.2.1 From 415498eaf9417cf30656c4a745eef0409b214afc Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 13:11:31 +0100 Subject: allow indentation of squiggly heredoc delimiter --- lib/coderay/scanners/ruby.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 24ab71f..f7feb46 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -214,7 +214,7 @@ module Scanners encoder.end_group kind heredocs ||= [] # create heredocs if empty heredocs << self.class::StringState.new(kind, quote != "'", delim, - self[1] == '-' ? :indented : :linestart) + self[1] ? :indented : :linestart) value_expected = false elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o) -- cgit v1.2.1 From c33f3f5c43064f7b468a59e086dc4a9a4f949ff7 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 13:17:23 +0100 Subject: check for keys with escape sequences, too --- lib/coderay/scanners/ruby.rb | 5 +++-- lib/coderay/scanners/ruby/string_state.rb | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index f7feb46..5b8de42 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -165,9 +165,10 @@ module Scanners elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx) if match.size == 1 - encoder.begin_group :string + kind = check(self.class::StringState.simple_key_pattern(match)) ? :key : :string + encoder.begin_group kind encoder.text_token match, :delimiter - state = self.class::StringState.new :string, match == '"', match # important for streaming + state = self.class::StringState.new kind, match == '"', match # important for streaming else kind = value_expected == true && scan(/:/) ? :key : :string encoder.begin_group kind diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index 28ddd6c..93e7208 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -37,6 +37,14 @@ module Scanners end end + def self.simple_key_pattern delim + if delim == "'" + / (?> (?: [^\\']+ | \\. )* ) ' : /mx + else + / (?> (?: [^\\"\#]+ | \\. | \#\$[\\"] | \#(?!\{) )* ) " : /mx + end + end + def initialize kind, interpreted, delim, heredoc = false if heredoc pattern = heredoc_pattern delim, interpreted, heredoc == :indented -- cgit v1.2.1 From 036fb3291274ed87f106bdbeb65bbd10b4c561f9 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 13:39:08 +0100 Subject: skip over interpolation if not nested --- lib/coderay/scanners/ruby/string_state.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index 93e7208..95f1e83 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -41,7 +41,7 @@ module Scanners if delim == "'" / (?> (?: [^\\']+ | \\. )* ) ' : /mx else - / (?> (?: [^\\"\#]+ | \\. | \#\$[\\"] | \#(?!\{) )* ) " : /mx + / (?> (?: [^\\"\#]+ | \\. | \#\$[\\"] | \#\{[^\{\}]+\} | \#(?!\{) )* ) " : /mx end end -- cgit v1.2.1 From a14639c31bbe33c23853a66d6feb817da4248e1a Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 13:44:18 +0100 Subject: don't ruin indentation --- lib/coderay/scanners/ruby/patterns.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index 3dd6ad5..e5a156d 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -114,7 +114,7 @@ module Scanners # NOTE: This is not completely correct, but # nobody needs heredoc delimiters ending with \n. HEREDOC_OPEN = / - << ([-~])? # $1 = float + << ([-~])? # $1 = float (?: ( [A-Za-z_0-9]+ ) # $2 = delim | -- cgit v1.2.1