From abb92f30b12e11781afa76f43a344627520b5b34 Mon Sep 17 00:00:00 2001 From: Eric Guo Date: Sun, 8 Jul 2012 14:32:28 +0800 Subject: New: *Go Encoder* Draft version, copy from c --- lib/coderay/helpers/file_type.rb | 37 ++++---- lib/coderay/scanners/go.rb | 195 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+), 18 deletions(-) create mode 100644 lib/coderay/scanners/go.rb (limited to 'lib/coderay') diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 637001b..5159054 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -1,5 +1,5 @@ module CodeRay - + # = FileType # # A simple filetype recognizer. @@ -8,18 +8,18 @@ module CodeRay # # # determine the type of the given # lang = FileType[file_name] - # + # # # return :text if the file type is unknown # lang = FileType.fetch file_name, :text - # + # # # try the shebang line, too # lang = FileType.fetch file_name, :text, true module FileType - + UnknownFileType = Class.new Exception - + class << self - + # Try to determine the file type of the file. # # +filename+ is a relative or absolute path to a file. @@ -30,7 +30,7 @@ module CodeRay name = File.basename filename ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot ext2 = filename.to_s[/\.(.*)/, 1] # from first dot - + type = TypeFromExt[ext] || TypeFromExt[ext.downcase] || @@ -39,10 +39,10 @@ module CodeRay TypeFromName[name] || TypeFromName[name.downcase] type ||= shebang(filename) if read_shebang - + type end - + # This works like Hash#fetch. # # If the filetype cannot be found, the +default+ value @@ -51,7 +51,7 @@ module CodeRay if default && block_given? warn 'Block supersedes default value argument; use either.' end - + if type = self[filename, read_shebang] type else @@ -60,9 +60,9 @@ module CodeRay raise UnknownFileType, 'Could not determine type of %p.' % filename end end - + protected - + def shebang filename return unless File.exist? filename File.open filename, 'r' do |f| @@ -73,9 +73,9 @@ module CodeRay end end end - + end - + TypeFromExt = { 'c' => :c, 'cfc' => :xml, @@ -86,6 +86,7 @@ module CodeRay 'dpr' => :delphi, 'erb' => :erb, 'gemspec' => :ruby, + 'go' => :go, 'groovy' => :groovy, 'gvy' => :groovy, 'h' => :c, @@ -128,16 +129,16 @@ module CodeRay for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu] TypeFromExt[cpp_alias] = :cpp end - + TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/ - + TypeFromName = { 'Capfile' => :ruby, 'Rakefile' => :ruby, 'Rantfile' => :ruby, 'Gemfile' => :ruby, } - + end - + end diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb new file mode 100644 index 0000000..4431ef2 --- /dev/null +++ b/lib/coderay/scanners/go.rb @@ -0,0 +1,195 @@ +module CodeRay +module Scanners + + # Scanner for Go, copy from c + class Go < Scanner + + register_for :go + file_extension 'go' + + # http://golang.org/ref/spec#Keywords + KEYWORDS = [ + 'break', 'default', 'func', 'interface', 'select', + 'case', 'defer', 'go', 'map', 'struct', + 'chan', 'else', 'goto', 'package', 'switch', + 'const', 'fallthrough', 'if', 'range', 'type', + 'continue', 'for', 'import', 'return', 'var', + ] # :nodoc: + + # http://golang.org/ref/spec#Types + PREDEFINED_TYPES = [ + 'bool', + 'uint8', 'uint16', 'uint32', 'uint64', + 'int8', 'int16', 'int32', 'int64', + 'float32', 'float64', + 'complex64', 'complex128', + 'byte', 'rune', + 'uint', 'int', 'uintptr', + ] # :nodoc: + + PREDEFINED_CONSTANTS = [ + 'nil', 'iota', + 'true', 'false', + ] # :nodoc: + + DIRECTIVES = [ + 'go_no_directive', # Seems no directive concept in Go? + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_TYPES, :predefined_type). + add(DIRECTIVES, :directive). + add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: + + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + + protected + + def scan_tokens encoder, options + + state = :initial + label_expected = true + case_expected = false + label_expected_before_preproc_line = nil + in_preproc_line = false + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + if in_preproc_line && match != "\\\n" && match.index(?\n) + in_preproc_line = false + label_expected = label_expected_before_preproc_line + end + encoder.text_token match, :space + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :comment + + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + label_expected = match =~ /[;\{\}]/ + if case_expected + label_expected = true if match == ':' + case_expected = false + end + encoder.text_token match, :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) + kind = :label + match << matched + else + label_expected = false + if kind == :keyword + case match + when 'case', 'default' + case_expected = true + end + end + end + encoder.text_token match, kind + + elsif match = scan(/L?"/) + encoder.begin_group :string + if match[0] == ?L + encoder.text_token 'L', :modifier + match = '"' + end + encoder.text_token match, :delimiter + state = :string + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + encoder.text_token match, :comment + + elsif match = scan(/#[ \t]*(\w*)/) + encoder.text_token match, :preprocessor + in_preproc_line = true + label_expected_before_preproc_line = label_expected + state = :include_expected if self[1] == 'include' + + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + label_expected = false + encoder.text_token match, :char + + elsif match = scan(/\$/) + encoder.text_token match, :ident + + elsif match = scan(/0[xX][0-9A-Fa-f]+/) + label_expected = false + encoder.text_token match, :hex + + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) + label_expected = false + encoder.text_token match, :octal + + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false + encoder.text_token match, :integer + + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + label_expected = false + encoder.text_token match, :float + + else + encoder.text_token getch, :error + + end + + when :string + if match = scan(/[^\\\n"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + label_expected = false + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \\ | $ /x) + encoder.end_group :string + encoder.text_token match, :error + state = :initial + label_expected = false + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + + when :include_expected + if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + encoder.text_token match, :include + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + state = :initial if match.index ?\n + + else + state = :initial + + end + + else + raise_inspect 'Unknown state', encoder + + end + + end + + if state == :string + encoder.end_group :string + end + + encoder + end + + end + +end +end -- cgit v1.2.1 From ec39f80865cac97a49a6fec138dedc22e5971bad Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 28 Jul 2012 13:12:37 +0200 Subject: fix whitespace --- lib/coderay/helpers/file_type.rb | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 5159054..5cc7446 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -1,5 +1,5 @@ module CodeRay - + # = FileType # # A simple filetype recognizer. @@ -8,18 +8,18 @@ module CodeRay # # # determine the type of the given # lang = FileType[file_name] - # + # # # return :text if the file type is unknown # lang = FileType.fetch file_name, :text - # + # # # try the shebang line, too # lang = FileType.fetch file_name, :text, true module FileType - + UnknownFileType = Class.new Exception - + class << self - + # Try to determine the file type of the file. # # +filename+ is a relative or absolute path to a file. @@ -30,7 +30,7 @@ module CodeRay name = File.basename filename ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot ext2 = filename.to_s[/\.(.*)/, 1] # from first dot - + type = TypeFromExt[ext] || TypeFromExt[ext.downcase] || @@ -39,10 +39,10 @@ module CodeRay TypeFromName[name] || TypeFromName[name.downcase] type ||= shebang(filename) if read_shebang - + type end - + # This works like Hash#fetch. # # If the filetype cannot be found, the +default+ value @@ -51,7 +51,7 @@ module CodeRay if default && block_given? warn 'Block supersedes default value argument; use either.' end - + if type = self[filename, read_shebang] type else @@ -60,9 +60,9 @@ module CodeRay raise UnknownFileType, 'Could not determine type of %p.' % filename end end - + protected - + def shebang filename return unless File.exist? filename File.open filename, 'r' do |f| @@ -73,9 +73,9 @@ module CodeRay end end end - + end - + TypeFromExt = { 'c' => :c, 'cfc' => :xml, @@ -129,16 +129,16 @@ module CodeRay for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu] TypeFromExt[cpp_alias] = :cpp end - + TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/ - + TypeFromName = { 'Capfile' => :ruby, 'Rakefile' => :ruby, 'Rantfile' => :ruby, 'Gemfile' => :ruby, } - + end - + end -- cgit v1.2.1 From e1e6a6af97ef710cfc02a1380180b772f1c34672 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 09:23:54 -0600 Subject: additional Ruby files types Ruby seems to have no shortage of these specially named files. --- lib/coderay/helpers/file_type.rb | 41 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 6d4fa92..6d928d6 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -1,5 +1,5 @@ module CodeRay - + # = FileType # # A simple filetype recognizer. @@ -8,18 +8,18 @@ module CodeRay # # # determine the type of the given # lang = FileType[file_name] - # + # # # return :text if the file type is unknown # lang = FileType.fetch file_name, :text - # + # # # try the shebang line, too # lang = FileType.fetch file_name, :text, true module FileType - + UnknownFileType = Class.new Exception - + class << self - + # Try to determine the file type of the file. # # +filename+ is a relative or absolute path to a file. @@ -30,7 +30,7 @@ module CodeRay name = File.basename filename ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot ext2 = filename.to_s[/\.(.*)/, 1] # from first dot - + type = TypeFromExt[ext] || TypeFromExt[ext.downcase] || @@ -39,10 +39,10 @@ module CodeRay TypeFromName[name] || TypeFromName[name.downcase] type ||= shebang(filename) if read_shebang - + type end - + # This works like Hash#fetch. # # If the filetype cannot be found, the +default+ value @@ -51,7 +51,7 @@ module CodeRay if default && block_given? warn 'Block supersedes default value argument; use either.' end - + if type = self[filename, read_shebang] type else @@ -60,9 +60,9 @@ module CodeRay raise UnknownFileType, 'Could not determine type of %p.' % filename end end - + protected - + def shebang filename return unless File.exist? filename File.open filename, 'r' do |f| @@ -73,9 +73,9 @@ module CodeRay end end end - + end - + TypeFromExt = { 'c' => :c, 'cfc' => :xml, @@ -116,7 +116,7 @@ module CodeRay 'rhtml' => :erb, 'rjs' => :ruby, 'rpdf' => :ruby, - 'ru' => :ruby, + 'ru' => :ruby, # config.ru 'rxml' => :ruby, 'sass' => :sass, 'sql' => :sql, @@ -132,16 +132,19 @@ module CodeRay for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu] TypeFromExt[cpp_alias] = :cpp end - + TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/ - + TypeFromName = { 'Capfile' => :ruby, 'Rakefile' => :ruby, 'Rantfile' => :ruby, 'Gemfile' => :ruby, + 'Guardfile' => :ruby, + 'Vagrantfile' => :ruby, + 'Appraisals' => :ruby } - + end - + end -- cgit v1.2.1 From fd8c81f5c338e4f1448007c9100d15d912fadd27 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 09:43:54 -0600 Subject: additional types: string, error http://golang.org/ref/spec#Predeclared_identifiers --- lib/coderay/scanners/go.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 4431ef2..a66a5e3 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -23,7 +23,7 @@ module Scanners 'int8', 'int16', 'int32', 'int64', 'float32', 'float64', 'complex64', 'complex128', - 'byte', 'rune', + 'byte', 'rune', 'string', 'error', 'uint', 'int', 'uintptr', ] # :nodoc: -- cgit v1.2.1 From 004d0c83222113ff732101a3e25785ce0625bfa2 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:14:29 +0200 Subject: whitespace --- lib/coderay/scanners/go.rb | 82 +++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 41 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 4431ef2..d4a3598 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -1,12 +1,12 @@ module CodeRay module Scanners - + # Scanner for Go, copy from c class Go < Scanner - + register_for :go file_extension 'go' - + # http://golang.org/ref/spec#Keywords KEYWORDS = [ 'break', 'default', 'func', 'interface', 'select', @@ -15,7 +15,7 @@ module Scanners 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for', 'import', 'return', 'var', ] # :nodoc: - + # http://golang.org/ref/spec#Types PREDEFINED_TYPES = [ 'bool', @@ -26,51 +26,51 @@ module Scanners 'byte', 'rune', 'uint', 'int', 'uintptr', ] # :nodoc: - + PREDEFINED_CONSTANTS = [ 'nil', 'iota', 'true', 'false', ] # :nodoc: - + DIRECTIVES = [ 'go_no_directive', # Seems no directive concept in Go? ] # :nodoc: - + IDENT_KIND = WordList.new(:ident). add(KEYWORDS, :keyword). add(PREDEFINED_TYPES, :predefined_type). add(DIRECTIVES, :directive). add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: - + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: - - protected - + + protected + def scan_tokens encoder, options - + state = :initial label_expected = true case_expected = false label_expected_before_preproc_line = nil in_preproc_line = false - + until eos? - + case state - + when :initial - + if match = scan(/ \s+ | \\\n /x) if in_preproc_line && match != "\\\n" && match.index(?\n) in_preproc_line = false label_expected = label_expected_before_preproc_line end encoder.text_token match, :space - + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) encoder.text_token match, :comment - + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) label_expected = match =~ /[;\{\}]/ if case_expected @@ -78,7 +78,7 @@ module Scanners case_expected = false end encoder.text_token match, :operator - + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) kind = IDENT_KIND[match] if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) @@ -94,7 +94,7 @@ module Scanners end end encoder.text_token match, kind - + elsif match = scan(/L?"/) encoder.begin_group :string if match[0] == ?L @@ -107,41 +107,41 @@ module Scanners elsif match = scan(/ \# \s* if \s* 0 /x) match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? encoder.text_token match, :comment - + elsif match = scan(/#[ \t]*(\w*)/) encoder.text_token match, :preprocessor in_preproc_line = true label_expected_before_preproc_line = label_expected state = :include_expected if self[1] == 'include' - + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) label_expected = false encoder.text_token match, :char - + elsif match = scan(/\$/) encoder.text_token match, :ident - + elsif match = scan(/0[xX][0-9A-Fa-f]+/) label_expected = false encoder.text_token match, :hex - + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) label_expected = false encoder.text_token match, :octal - + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) label_expected = false encoder.text_token match, :integer - + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) label_expected = false encoder.text_token match, :float - + else encoder.text_token getch, :error - + end - + when :string if match = scan(/[^\\\n"]+/) encoder.text_token match, :content @@ -160,36 +160,36 @@ module Scanners else raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end - + when :include_expected if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) encoder.text_token match, :include state = :initial - + elsif match = scan(/\s+/) encoder.text_token match, :space state = :initial if match.index ?\n - + else state = :initial - + end - + else raise_inspect 'Unknown state', encoder - + end - + end - + if state == :string encoder.end_group :string end - + encoder end - + end - + end end -- cgit v1.2.1 From afa2be73c03d033056024354787abd66f2ff7fa0 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:15:05 +0200 Subject: add string as predefined type --- lib/coderay/scanners/go.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index d4a3598..dbf9595 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -23,7 +23,7 @@ module Scanners 'int8', 'int16', 'int32', 'int64', 'float32', 'float64', 'complex64', 'complex128', - 'byte', 'rune', + 'byte', 'rune', 'string', 'uint', 'int', 'uintptr', ] # :nodoc: -- cgit v1.2.1 From 7ef6f7783d788bf656a3c59600cd45cd5b694376 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:15:22 +0200 Subject: add support for raw strings in Go --- lib/coderay/scanners/go.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index dbf9595..c0c602d 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -103,7 +103,14 @@ module Scanners end encoder.text_token match, :delimiter state = :string - + + elsif match = scan(/ ` ([^`]+)? (`)? /x) + encoder.begin_group :shell + encoder.text_token '`', :delimiter + encoder.text_token self[1], :content if self[1] + encoder.text_token self[2], :delimiter if self[2] + encoder.end_group :shell + elsif match = scan(/ \# \s* if \s* 0 /x) match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? encoder.text_token match, :comment -- cgit v1.2.1 From a24c207a7e547408de0f6157d495b0dd2e8d5a40 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:15:36 +0200 Subject: fix empty token in Go scanner --- lib/coderay/scanners/go.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index c0c602d..76a0820 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -161,7 +161,7 @@ module Scanners encoder.text_token match, :char elsif match = scan(/ \\ | $ /x) encoder.end_group :string - encoder.text_token match, :error + encoder.text_token match, :error unless match.empty? state = :initial label_expected = false else -- cgit v1.2.1 From 4669b7086136647f2eece41a122fa204eb16e1e9 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:16:47 +0200 Subject: fix label_expected (test case?) --- lib/coderay/scanners/go.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 76a0820..23cbfe6 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -64,6 +64,7 @@ module Scanners if match = scan(/ \s+ | \\\n /x) if in_preproc_line && match != "\\\n" && match.index(?\n) in_preproc_line = false + case_expected = false label_expected = label_expected_before_preproc_line end encoder.text_token match, :space @@ -72,7 +73,6 @@ module Scanners encoder.text_token match, :comment elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) - label_expected = match =~ /[;\{\}]/ if case_expected label_expected = true if match == ':' case_expected = false @@ -83,6 +83,7 @@ module Scanners kind = IDENT_KIND[match] if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) kind = :label + label_expected = false match << matched else label_expected = false -- cgit v1.2.1 From c0d028010b081020352444078a685ee60bffe209 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 18:17:14 +0200 Subject: make predefined-type a bit more bright/blue --- lib/coderay/styles/alpha.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/styles/alpha.rb b/lib/coderay/styles/alpha.rb index ff85ecc..f4e9d7d 100644 --- a/lib/coderay/styles/alpha.rb +++ b/lib/coderay/styles/alpha.rb @@ -107,7 +107,7 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; } .operator { } .predefined { color:#369; font-weight:bold } .predefined-constant { color:#069 } -.predefined-type { color:#0a5; font-weight:bold } +.predefined-type { color:#0a8; font-weight:bold } .preprocessor { color:#579 } .pseudo-class { color:#00C; font-weight:bold } .regexp { background-color:hsla(300,100%,50%,0.06); } -- cgit v1.2.1 From 85275cf21e7d15459abc11fd76606bd7d38fb8b5 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 12:11:49 -0600 Subject: Go doesn't have a "f" suffix for floats like C. --- lib/coderay/scanners/go.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 49d24c2..afcfca5 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -141,7 +141,7 @@ module Scanners label_expected = false encoder.text_token match, :integer - elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + elsif match = scan(/\d|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) label_expected = false encoder.text_token match, :float -- cgit v1.2.1 From cad9a00e28d61781d5a12a0556be7126eb790725 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 12:35:26 -0600 Subject: add imaginary numbers to Go scanner --- lib/coderay/scanners/go.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index afcfca5..04504ab 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -128,7 +128,11 @@ module Scanners elsif match = scan(/\$/) encoder.text_token match, :ident - + + elsif match = scan(/\d*(\.\d*)?([eE][+-]?\d+)?i/) + label_expected = false + encoder.text_token match, :imaginary + elsif match = scan(/0[xX][0-9A-Fa-f]+/) label_expected = false encoder.text_token match, :hex @@ -137,13 +141,13 @@ module Scanners label_expected = false encoder.text_token match, :octal - elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) - label_expected = false - encoder.text_token match, :integer - elsif match = scan(/\d|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) label_expected = false encoder.text_token match, :float + + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false + encoder.text_token match, :integer else encoder.text_token getch, :error -- cgit v1.2.1 From 4c877bf0eb655cf2c04a0efb24b4e5f83b97e160 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 12:43:50 -0600 Subject: predeclared identifiers http://golang.org/ref/spec#Predeclared_identifiers --- lib/coderay/scanners/go.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 04504ab..ae50aa4 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -31,6 +31,11 @@ module Scanners 'nil', 'iota', 'true', 'false', ] # :nodoc: + + PREDEFINED_FUNCTIONS = %w[ + append cap close complex copy delete imag len + make new panic print println real recover + ] # :nodoc: DIRECTIVES = [ 'go_no_directive', # Seems no directive concept in Go? @@ -40,7 +45,8 @@ module Scanners add(KEYWORDS, :keyword). add(PREDEFINED_TYPES, :predefined_type). add(DIRECTIVES, :directive). - add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(PREDEFINED_FUNCTIONS, :predefined) # :nodoc: ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: -- cgit v1.2.1 From dd9ec43a3ea4cfce8eb1e8dd38504a9d8f24df54 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 12:46:15 -0600 Subject: yup, no C-style directives (auto extern static) --- lib/coderay/scanners/go.rb | 5 ----- 1 file changed, 5 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index ae50aa4..eb06fb0 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -37,14 +37,9 @@ module Scanners make new panic print println real recover ] # :nodoc: - DIRECTIVES = [ - 'go_no_directive', # Seems no directive concept in Go? - ] # :nodoc: - IDENT_KIND = WordList.new(:ident). add(KEYWORDS, :keyword). add(PREDEFINED_TYPES, :predefined_type). - add(DIRECTIVES, :directive). add(PREDEFINED_CONSTANTS, :predefined_constant). add(PREDEFINED_FUNCTIONS, :predefined) # :nodoc: -- cgit v1.2.1 From ae1969b1809365c87a9cb485b671364c502236a0 Mon Sep 17 00:00:00 2001 From: Nathan Youngman Date: Sun, 23 Jun 2013 13:00:17 -0600 Subject: revert trimming spaces --- lib/coderay/helpers/file_type.rb | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 6d928d6..a0f9fd6 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -1,5 +1,5 @@ module CodeRay - + # = FileType # # A simple filetype recognizer. @@ -8,18 +8,18 @@ module CodeRay # # # determine the type of the given # lang = FileType[file_name] - # + # # # return :text if the file type is unknown # lang = FileType.fetch file_name, :text - # + # # # try the shebang line, too # lang = FileType.fetch file_name, :text, true module FileType - + UnknownFileType = Class.new Exception - + class << self - + # Try to determine the file type of the file. # # +filename+ is a relative or absolute path to a file. @@ -30,7 +30,7 @@ module CodeRay name = File.basename filename ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot ext2 = filename.to_s[/\.(.*)/, 1] # from first dot - + type = TypeFromExt[ext] || TypeFromExt[ext.downcase] || @@ -39,10 +39,10 @@ module CodeRay TypeFromName[name] || TypeFromName[name.downcase] type ||= shebang(filename) if read_shebang - + type end - + # This works like Hash#fetch. # # If the filetype cannot be found, the +default+ value @@ -51,7 +51,7 @@ module CodeRay if default && block_given? warn 'Block supersedes default value argument; use either.' end - + if type = self[filename, read_shebang] type else @@ -60,9 +60,9 @@ module CodeRay raise UnknownFileType, 'Could not determine type of %p.' % filename end end - + protected - + def shebang filename return unless File.exist? filename File.open filename, 'r' do |f| @@ -73,9 +73,9 @@ module CodeRay end end end - + end - + TypeFromExt = { 'c' => :c, 'cfc' => :xml, @@ -132,9 +132,9 @@ module CodeRay for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu] TypeFromExt[cpp_alias] = :cpp end - + TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/ - + TypeFromName = { 'Capfile' => :ruby, 'Rakefile' => :ruby, @@ -144,7 +144,7 @@ module CodeRay 'Vagrantfile' => :ruby, 'Appraisals' => :ruby } - + end - + end -- cgit v1.2.1 From dc57601571af8024700991b6a80129285a980e9e Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 30 Jun 2013 05:57:38 +0200 Subject: add Lint encoder; do we still need DebugLint? --- lib/coderay/encoders/debug_lint.rb | 12 ++++---- lib/coderay/encoders/lint.rb | 57 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 7 deletions(-) create mode 100644 lib/coderay/encoders/lint.rb (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/debug_lint.rb b/lib/coderay/encoders/debug_lint.rb index 17a0795..2c14186 100644 --- a/lib/coderay/encoders/debug_lint.rb +++ b/lib/coderay/encoders/debug_lint.rb @@ -1,6 +1,8 @@ module CodeRay module Encoders + load :lint + # = Debug Lint Encoder # # Debug encoder with additional checks for: @@ -15,12 +17,8 @@ module Encoders register_for :debug_lint - InvalidTokenStream = Class.new StandardError - EmptyToken = Class.new InvalidTokenStream - IncorrectTokenGroupNesting = Class.new InvalidTokenStream - def text_token text, kind - raise EmptyToken, 'empty token' if text.empty? + raise Lint::EmptyToken, 'empty token' if text.empty? super end @@ -30,7 +28,7 @@ module Encoders end def end_group kind - raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind + raise Lint::IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind @opened.pop super end @@ -41,7 +39,7 @@ module Encoders end def end_line kind - raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind + raise Lint::IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind @opened.pop super end diff --git a/lib/coderay/encoders/lint.rb b/lib/coderay/encoders/lint.rb new file mode 100644 index 0000000..4601e90 --- /dev/null +++ b/lib/coderay/encoders/lint.rb @@ -0,0 +1,57 @@ +module CodeRay +module Encoders + + # = Lint Encoder + # + # Checks for: + # + # - empty tokens + # - incorrect nesting + # + # It will raise an InvalidTokenStream exception when any of the above occurs. + # + # See also: Encoders::DebugLint + class Lint < Debug + + register_for :lint + + InvalidTokenStream = Class.new StandardError + EmptyToken = Class.new InvalidTokenStream + IncorrectTokenGroupNesting = Class.new InvalidTokenStream + + def text_token text, kind + raise EmptyToken, 'empty token' if text.empty? + end + + def begin_group kind + @opened << kind + end + + def end_group kind + raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind + @opened.pop + end + + def begin_line kind + @opened << kind + end + + def end_line kind + raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind + @opened.pop + end + + protected + + def setup options + @opened = [] + end + + def finish options + raise 'Some tokens still open at end of token stream: %p' % [@opened] unless @opened.empty? + end + + end + +end +end -- cgit v1.2.1 From 94e4bb3366537def28ed257fcf2b70a634711c9f Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Fri, 12 Jul 2013 14:57:46 +0200 Subject: don't change value of objects you don't own --- lib/coderay/scanners/css.rb | 2 +- lib/coderay/scanners/sass.rb | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb index 9ed4618..5977b9c 100644 --- a/lib/coderay/scanners/css.rb +++ b/lib/coderay/scanners/css.rb @@ -53,7 +53,7 @@ module Scanners end def scan_tokens encoder, options - states = Array(options[:state] || @state) + states = Array(options[:state] || @state).dup value_expected = @value_expected until eos? diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb index e20bebe..7ba9bf5 100644 --- a/lib/coderay/scanners/sass.rb +++ b/lib/coderay/scanners/sass.rb @@ -19,7 +19,7 @@ module Scanners end def scan_tokens encoder, options - states = Array(options[:state] || @state) + states = Array(options[:state] || @state).dup string_delimiter = nil until eos? @@ -119,7 +119,7 @@ module Scanners else #:nocov: - raise_inspect 'Unknown state', encoder + raise_inspect 'Unknown state: %p' % [states.last], encoder #:nocov: end @@ -215,7 +215,7 @@ module Scanners end if options[:keep_state] - @state = states + @state = states.dup end while state = states.pop -- cgit v1.2.1 From 990ed25fd1bf350dabae4bed031e07ee485beb79 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 16:20:59 +0200 Subject: split '" string states in Sass scanner (edge case) --- lib/coderay/scanners/sass.rb | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb index 7ba9bf5..85b4711 100644 --- a/lib/coderay/scanners/sass.rb +++ b/lib/coderay/scanners/sass.rb @@ -7,11 +7,6 @@ module Scanners register_for :sass file_extension 'sass' - STRING_CONTENT_PATTERN = { - "'" => /(?:[^\n\'\#]+|\\\n|#{RE::Escape}|#(?!\{))+/, - '"' => /(?:[^\n\"\#]+|\\\n|#{RE::Escape}|#(?!\{))+/, - } - protected def setup @@ -20,7 +15,8 @@ module Scanners def scan_tokens encoder, options states = Array(options[:state] || @state).dup - string_delimiter = nil + + encoder.begin_group :string if states.last == :sqstring || states.last == :dqstring until eos? @@ -91,24 +87,23 @@ module Scanners next end - when :string - if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) + when :sqstring, :dqstring + if match = scan(states.last == :sqstring ? /(?:[^\n\'\#]+|\\\n|#{RE::Escape}|#(?!\{))+/o : /(?:[^\n\"\#]+|\\\n|#{RE::Escape}|#(?!\{))+/o) encoder.text_token match, :content elsif match = scan(/['"]/) encoder.text_token match, :delimiter encoder.end_group :string - string_delimiter = nil states.pop elsif match = scan(/#\{/) encoder.begin_group :inline encoder.text_token match, :inline_delimiter states.push :sass_inline elsif match = scan(/ \\ | $ /x) - encoder.end_group :string + encoder.end_group states.last encoder.text_token match, :error unless match.empty? states.pop else - raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder + raise_inspect "else case #{states.last} reached; %p not handled." % peek(1), encoder end when :include @@ -157,15 +152,15 @@ module Scanners elsif match = scan(/['"]/) encoder.begin_group :string - string_delimiter = match encoder.text_token match, :delimiter if states.include? :sass_inline - content = scan_until(/(?=#{string_delimiter}|\}|\z)/) + # no nesting, just scan the string until delimiter + content = scan_until(/(?=#{match}|\}|\z)/) encoder.text_token content, :content unless content.empty? - encoder.text_token string_delimiter, :delimiter if scan(/#{string_delimiter}/) + encoder.text_token match, :delimiter if scan(/#{match}/) encoder.end_group :string else - states.push :string + states.push match == "'" ? :sqstring : :dqstring end elsif match = scan(/#{RE::Function}/o) @@ -221,7 +216,7 @@ module Scanners while state = states.pop if state == :sass_inline encoder.end_group :inline - elsif state == :string + elsif state == :sqstring || state == :dqstring encoder.end_group :string end end -- cgit v1.2.1 From def7e09db1963368e20bfd53c72532d6a631e0e8 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 16:21:42 +0200 Subject: fix #139: don't scan for :include after eos --- lib/coderay/scanners/sass.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb index 85b4711..1bbd534 100644 --- a/lib/coderay/scanners/sass.rb +++ b/lib/coderay/scanners/sass.rb @@ -209,6 +209,8 @@ module Scanners end + states.pop if states.last == :include + if options[:keep_state] @state = states.dup end -- cgit v1.2.1 From 62a0be9509f8814902a4a97df4ad84913728d059 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 16:34:17 +0200 Subject: fix #143 (Sass scanner key vs tag heuristic) --- lib/coderay/scanners/sass.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb index 1bbd534..e3296b9 100644 --- a/lib/coderay/scanners/sass.rb +++ b/lib/coderay/scanners/sass.rb @@ -44,7 +44,7 @@ module Scanners elsif case states.last when :initial, :media, :sass_inline if match = scan(/(?>#{RE::Ident})(?!\()/ox) - encoder.text_token match, value_expected ? :value : (check(/.*:/) ? :key : :tag) + encoder.text_token match, value_expected ? :value : (check(/.*:(?![a-z])/) ? :key : :tag) next elsif !value_expected && (match = scan(/\*/)) encoder.text_token match, :tag -- cgit v1.2.1 From 70c9ba896e1bba5ac727fb6fdfc3ba94510e652d Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 16:34:41 +0200 Subject: fix CSS scanner for things like "nth-child(2n)" --- lib/coderay/scanners/css.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb index 5977b9c..55d5239 100644 --- a/lib/coderay/scanners/css.rb +++ b/lib/coderay/scanners/css.rb @@ -25,7 +25,7 @@ module Scanners HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/ - Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)/ + Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)n?/ Name = /#{NMChar}+/ Ident = /-?#{NMStart}#{NMChar}*/ AtKeyword = /@#{Ident}/ -- cgit v1.2.1 From bbe4d72ba785f1bd6fd703d63b096a907da1b09f Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 20:31:34 +0200 Subject: tweak numeral tokens handling (#147) --- lib/coderay/scanners/go.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index eb06fb0..938da9d 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -31,7 +31,7 @@ module Scanners 'nil', 'iota', 'true', 'false', ] # :nodoc: - + PREDEFINED_FUNCTIONS = %w[ append cap close complex copy delete imag len make new panic print println real recover @@ -73,7 +73,7 @@ module Scanners elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) encoder.text_token match, :comment - elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + elsif match = scan(/ ?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) if case_expected label_expected = true if match == ':' case_expected = false @@ -129,24 +129,24 @@ module Scanners elsif match = scan(/\$/) encoder.text_token match, :ident - - elsif match = scan(/\d*(\.\d*)?([eE][+-]?\d+)?i/) + + elsif match = scan(/-?\d*(\.\d*)?([eE][+-]?\d+)?i/) label_expected = false encoder.text_token match, :imaginary - - elsif match = scan(/0[xX][0-9A-Fa-f]+/) + + elsif match = scan(/-?0[xX][0-9A-Fa-f]+/) label_expected = false encoder.text_token match, :hex - elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) + elsif match = scan(/-?(?:0[0-7]+)(?![89.eEfF])/) label_expected = false encoder.text_token match, :octal - elsif match = scan(/\d|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) + elsif match = scan(/-?(?:\d*\.\d+|\d+\.)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) label_expected = false encoder.text_token match, :float - - elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + + elsif match = scan(/-?(?:\d+)(?![.eEfF])L?L?/) label_expected = false encoder.text_token match, :integer -- cgit v1.2.1 From 6dd14ef018ee4417771504117819121bd4b8520d Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 20:34:10 +0200 Subject: be a bit more graceful with buggy Go strings --- lib/coderay/scanners/go.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 938da9d..5034adc 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -165,9 +165,10 @@ module Scanners label_expected = false elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) encoder.text_token match, :char - elsif match = scan(/ \\ | $ /x) + elsif match = scan(/ \\ /x) + encoder.text_token match, :error + elsif match = scan(/$/) encoder.end_group :string - encoder.text_token match, :error unless match.empty? state = :initial label_expected = false else -- cgit v1.2.1 From 82864efabda9dc17fa6a28b0740ffc8f58706126 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Jul 2013 20:36:27 +0200 Subject: allow unicode characters in char literals --- lib/coderay/scanners/go.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 5034adc..59473f6 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -44,7 +44,7 @@ module Scanners add(PREDEFINED_FUNCTIONS, :predefined) # :nodoc: ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: - UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: protected @@ -123,7 +123,7 @@ module Scanners label_expected_before_preproc_line = label_expected state = :include_expected if self[1] == 'include' - elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) )? '? /ox) label_expected = false encoder.text_token match, :char -- cgit v1.2.1 From 59ca07b0d1a1710ab729636ea00de4b638f56110 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 20 Jul 2013 11:17:30 +0200 Subject: =?UTF-8?q?add=20Ruby=202=20syntax:=20%i(=E2=80=A6)=20and=20%I(?= =?UTF-8?q?=E2=80=A6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/coderay/scanners/ruby/patterns.rb | 5 ++++- lib/coderay/styles/alpha.rb | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index ed071d2..0b36e13 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -157,13 +157,16 @@ module Scanners yield ]) - FANCY_STRING_START = / % ( [QqrsWwx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x + FANCY_STRING_START = / % ( [iIqQrswWx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x FANCY_STRING_KIND = Hash.new(:string).merge({ + 'i' => :symbol, + 'I' => :symbol, 'r' => :regexp, 's' => :symbol, 'x' => :shell, }) FANCY_STRING_INTERPRETED = Hash.new(true).merge({ + 'i' => false, 'q' => false, 's' => false, 'w' => false, diff --git a/lib/coderay/styles/alpha.rb b/lib/coderay/styles/alpha.rb index f4e9d7d..d304dc4 100644 --- a/lib/coderay/styles/alpha.rb +++ b/lib/coderay/styles/alpha.rb @@ -125,7 +125,7 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; } .string .modifier { color: #E40 } .symbol { color:#A60 } .symbol .content { color:#A60 } -.symbol .delimiter { color:#630 } +.symbol .delimiter { color:#740 } .tag { color:#070; font-weight:bold } .type { color:#339; font-weight:bold } .value { color: #088 } -- cgit v1.2.1 From 5c23a731ca55729fc65630eca3b37a5b1a71e5b1 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 20 Jul 2013 18:12:04 +0200 Subject: mark possibly problematic spots with FIXME --- lib/coderay/encoders/html.rb | 2 ++ lib/coderay/encoders/html/css.rb | 2 ++ lib/coderay/helpers/file_type.rb | 1 + lib/coderay/helpers/plugin.rb | 1 + lib/coderay/scanners/debug.rb | 2 ++ lib/coderay/scanners/diff.rb | 1 + lib/coderay/scanners/python.rb | 3 +++ lib/coderay/scanners/raydebug.rb | 2 ++ lib/coderay/scanners/ruby/string_state.rb | 1 + 9 files changed, 15 insertions(+) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index 20f2409..6dd231a 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -142,6 +142,7 @@ module Encoders HTML_ESCAPE = make_html_escape_hash HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1F]/ + # FIXME: cache attack TOKEN_KIND_TO_INFO = Hash.new do |h, kind| h[kind] = kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize } end @@ -284,6 +285,7 @@ module Encoders end def make_span_for_kinds method, hint + # FIXME: cache attack Hash.new do |h, kinds| h[kinds.is_a?(Symbol) ? kinds : kinds.dup] = begin css_class = css_class_for_kinds(kinds) diff --git a/lib/coderay/encoders/html/css.rb b/lib/coderay/encoders/html/css.rb index 164d7f8..de98f0e 100644 --- a/lib/coderay/encoders/html/css.rb +++ b/lib/coderay/encoders/html/css.rb @@ -21,6 +21,7 @@ module Encoders end def get_style_for_css_classes css_classes + # FIXME: cache attack cl = @styles[css_classes.first] return '' unless cl style = '' @@ -52,6 +53,7 @@ module Encoders for selector in selectors.split(',') classes = selector.scan(/[-\w]+/) cl = classes.pop + # FIXME: cache attack @styles[cl] ||= Hash.new @styles[cl][classes] = style.to_s.strip.delete(' ').chomp(';') end diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 5e3a1e7..e8a7b75 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -68,6 +68,7 @@ module CodeRay File.open filename, 'r' do |f| if first_line = f.gets if type = first_line[TypeFromShebang] + # FIXME: cache attack type.to_sym end end diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb index d14c5a9..3a38bfd 100644 --- a/lib/coderay/helpers/plugin.rb +++ b/lib/coderay/helpers/plugin.rb @@ -207,6 +207,7 @@ module CodeRay id elsif id.is_a? String if id[/\w+/] == id + # FIXME: cache attack id.downcase.to_sym else raise ArgumentError, "Invalid id given: #{id}" diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb index 566bfa7..9d10864 100644 --- a/lib/coderay/scanners/debug.rb +++ b/lib/coderay/scanners/debug.rb @@ -21,6 +21,7 @@ module Scanners encoder.text_token match, :space elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x) + # FIXME: cache attack kind = self[1].to_sym match = self[2].gsub(/\\(.)/m, '\1') unless TokenKinds.has_key? kind @@ -30,6 +31,7 @@ module Scanners encoder.text_token match, kind elsif match = scan(/ (\w+) ([<\[]) /x) + # FIXME: cache attack kind = self[1].to_sym opened_tokens << kind case self[2] diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb index fd1aed6..836fa41 100644 --- a/lib/coderay/scanners/diff.rb +++ b/lib/coderay/scanners/diff.rb @@ -21,6 +21,7 @@ module Scanners line_kind = nil state = :initial deleted_lines_count = 0 + # FIXME: cache attack scanners = Hash.new do |h, lang| h[lang] = Scanners[lang].new '', :keep_tokens => true, :keep_state => true end diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb index 09c8b6e..23630f9 100644 --- a/lib/coderay/scanners/python.rb +++ b/lib/coderay/scanners/python.rb @@ -75,10 +75,12 @@ module Scanners <<=? | >>=? | [<>=]=? | != # comparison and assignment /x # :nodoc: + # FIXME: cache attack STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter| h[delimiter] = Regexp.union delimiter # :nodoc: } + # FIXME: cache attack STRING_CONTENT_REGEXP = Hash.new { |h, delimiter| h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc: } @@ -183,6 +185,7 @@ module Scanners kind = :ident elsif kind == :keyword state = DEF_NEW_STATE[match] + # FIXME: cache attack from_import_state << match.to_sym if state == :include_expected end encoder.text_token match, kind diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb index d39d962..ca35de0 100644 --- a/lib/coderay/scanners/raydebug.rb +++ b/lib/coderay/scanners/raydebug.rb @@ -26,6 +26,7 @@ module Scanners encoder.text_token kind, :class encoder.text_token '(', :operator match = self[2] + # FIXME: cache attack encoder.text_token match, kind.to_sym unless match.empty? encoder.text_token match, :operator if match = scan(/\)/) @@ -39,6 +40,7 @@ module Scanners else raise 'CodeRay bug: This case should not be reached.' end + # FIXME: cache attack kind = kind.to_sym opened_tokens << kind encoder.begin_group kind diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index 2f398d1..fe37d07 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -14,6 +14,7 @@ module Scanners { } ] ].each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with << + # FIXME: cache attack STRING_PATTERN = Hash.new do |h, k| delim, interpreted = *k # delim = delim.dup # workaround for old Ruby -- cgit v1.2.1 From ea107396fdd72cdbbaf4820d09a87bd879ba7e6c Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 16:43:19 +0200 Subject: check token kinds in Lint encoders --- lib/coderay/encoders/debug_lint.rb | 3 ++- lib/coderay/encoders/lint.rb | 4 +++- lib/coderay/token_kinds.rb | 5 +---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/debug_lint.rb b/lib/coderay/encoders/debug_lint.rb index 2c14186..a4eba2c 100644 --- a/lib/coderay/encoders/debug_lint.rb +++ b/lib/coderay/encoders/debug_lint.rb @@ -18,7 +18,8 @@ module Encoders register_for :debug_lint def text_token text, kind - raise Lint::EmptyToken, 'empty token' if text.empty? + raise Lint::EmptyToken, 'empty token for %p' % [kind] if text.empty? + raise Lint::UnknownTokenKind, 'unknown token kind %p (text was %p)' % [kind, text] unless TokenKinds.has_key? kind super end diff --git a/lib/coderay/encoders/lint.rb b/lib/coderay/encoders/lint.rb index 4601e90..88c8bd1 100644 --- a/lib/coderay/encoders/lint.rb +++ b/lib/coderay/encoders/lint.rb @@ -17,10 +17,12 @@ module Encoders InvalidTokenStream = Class.new StandardError EmptyToken = Class.new InvalidTokenStream + UnknownTokenKind = Class.new InvalidTokenStream IncorrectTokenGroupNesting = Class.new InvalidTokenStream def text_token text, kind - raise EmptyToken, 'empty token' if text.empty? + raise EmptyToken, 'empty token for %p' % [kind] if text.empty? + raise UnknownTokenKind, 'unknown token kind %p (text was %p)' % [kind, text] unless TokenKinds.has_key? kind end def begin_group kind diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb index 9137a49..5f49d77 100755 --- a/lib/coderay/token_kinds.rb +++ b/lib/coderay/token_kinds.rb @@ -1,10 +1,7 @@ module CodeRay # A Hash of all known token kinds and their associated CSS classes. - TokenKinds = Hash.new do |h, k| - warn 'Undefined Token kind: %p' % [k] if $CODERAY_DEBUG - false - end + TokenKinds = Hash.new(false) # speedup TokenKinds.compare_by_identity if TokenKinds.respond_to? :compare_by_identity -- cgit v1.2.1 From 60afd6857c8d0f1c3f9f2d6ca45f01b216d6b4b5 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 16:49:40 +0200 Subject: no attack vector found --- lib/coderay/encoders/html.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index 6dd231a..ee2d91a 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -142,7 +142,6 @@ module Encoders HTML_ESCAPE = make_html_escape_hash HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1F]/ - # FIXME: cache attack TOKEN_KIND_TO_INFO = Hash.new do |h, kind| h[kind] = kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize } end -- cgit v1.2.1 From 5cd749771379b9832ab1b37936bd98fb7cc80a34 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 17:04:09 +0200 Subject: don't dup @span_for_kinds hash keys --- lib/coderay/encoders/html.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index ee2d91a..ad5fafc 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -286,7 +286,7 @@ module Encoders def make_span_for_kinds method, hint # FIXME: cache attack Hash.new do |h, kinds| - h[kinds.is_a?(Symbol) ? kinds : kinds.dup] = begin + h[kinds] = begin css_class = css_class_for_kinds(kinds) title = HTML.token_path_to_hint hint, kinds if hint -- cgit v1.2.1 From ee30738b0b0615715321aa4f1ed8c7e4025cb411 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 17:04:23 +0200 Subject: rename local variable --- lib/coderay/encoders/html.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index ad5fafc..81a6ffa 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -310,8 +310,8 @@ module Encoders def break_lines text, style reopen = '' - @opened.each_with_index do |k, index| - reopen << (@span_for_kinds[index > 0 ? [k, *@opened[0...index]] : k] || '') + @opened.each_with_index do |kind, index| + reopen << (@span_for_kinds[index > 0 ? [kind, *@opened[0...index]] : kind] || '') end text.gsub("\n", "#{'' * @opened.size}#{'' if style}\n#{reopen}#{style}") end -- cgit v1.2.1 From e2546068d0f16fcba15268e740bbb6d9f4f223e9 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 18:28:54 +0200 Subject: prevent Symbol attack in Raydebug scanner --- lib/coderay/scanners/raydebug.rb | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb index ca35de0..6c1c10f 100644 --- a/lib/coderay/scanners/raydebug.rb +++ b/lib/coderay/scanners/raydebug.rb @@ -1,3 +1,5 @@ +require 'set' + module CodeRay module Scanners @@ -12,6 +14,11 @@ module Scanners protected + def setup + super + @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set + end + def scan_tokens encoder, options opened_tokens = [] @@ -26,8 +33,13 @@ module Scanners encoder.text_token kind, :class encoder.text_token '(', :operator match = self[2] - # FIXME: cache attack - encoder.text_token match, kind.to_sym unless match.empty? + unless match.empty? + if @known_token_kinds.include? kind + encoder.text_token match, kind.to_sym + else + encoder.text_token match, :plain + end + end encoder.text_token match, :operator if match = scan(/\)/) elsif match = scan(/ (\w+) ([<\[]) /x) -- cgit v1.2.1 From 8ee1c8deedc58672aa46f311163c2178a70186ce Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 18:44:01 +0200 Subject: cleanup Plugin, don't use #to_sym anymore --- lib/coderay/helpers/plugin.rb | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb index 3a38bfd..9a724ff 100644 --- a/lib/coderay/helpers/plugin.rb +++ b/lib/coderay/helpers/plugin.rb @@ -30,7 +30,7 @@ module CodeRay # * a file could not be found # * the requested Plugin is not registered PluginNotFound = Class.new LoadError - HostNotFound = Class.new LoadError + HostNotFound = Class.new LoadError PLUGIN_HOSTS = [] PLUGIN_HOSTS_BY_ID = {} # dummy hash @@ -49,8 +49,8 @@ module CodeRay def [] id, *args, &blk plugin = validate_id(id) begin - plugin = plugin_hash.[] plugin, *args, &blk - end while plugin.is_a? Symbol + plugin = plugin_hash.[](plugin, *args, &blk) + end while plugin.is_a? String plugin end @@ -95,7 +95,7 @@ module CodeRay def map hash for from, to in hash from = validate_id from - to = validate_id to + to = validate_id to plugin_hash[from] = to unless plugin_hash.has_key? from end end @@ -197,23 +197,22 @@ module CodeRay File.join plugin_path, "#{plugin_id}.rb" end - # Converts +id+ to a Symbol if it is a String, - # or returns +id+ if it already is a Symbol. + # Converts +id+ to a valid plugin ID String, or returns +nil+. # # Raises +ArgumentError+ for all other objects, or if the # given String includes non-alphanumeric characters (\W). def validate_id id - if id.is_a? Symbol or id.nil? - id - elsif id.is_a? String + case id + when Symbol + id.to_s + when String if id[/\w+/] == id - # FIXME: cache attack - id.downcase.to_sym + id.downcase else raise ArgumentError, "Invalid id given: #{id}" end else - raise ArgumentError, "String or Symbol expected, but #{id.class} given." + raise ArgumentError, "Symbol or String expected, but #{id.class} given." end end -- cgit v1.2.1 From 6ef7fa4541230442b6e743042648320619ad6859 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 18:44:55 +0200 Subject: no attack vector found --- lib/coderay/helpers/file_type.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index e8a7b75..5e3a1e7 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -68,7 +68,6 @@ module CodeRay File.open filename, 'r' do |f| if first_line = f.gets if type = first_line[TypeFromShebang] - # FIXME: cache attack type.to_sym end end -- cgit v1.2.1 From 2ab42c7b5e674453fac0320fe0c4a40daf6197e1 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 18:53:41 +0200 Subject: prevent Symbol attack in Debug scanner --- lib/coderay/scanners/debug.rb | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb index 9d10864..ac12c16 100644 --- a/lib/coderay/scanners/debug.rb +++ b/lib/coderay/scanners/debug.rb @@ -1,3 +1,5 @@ +require 'set' + module CodeRay module Scanners @@ -11,6 +13,11 @@ module Scanners protected + def setup + super + @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set + end + def scan_tokens encoder, options opened_tokens = [] @@ -21,26 +28,24 @@ module Scanners encoder.text_token match, :space elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x) - # FIXME: cache attack - kind = self[1].to_sym - match = self[2].gsub(/\\(.)/m, '\1') - unless TokenKinds.has_key? kind - kind = :error - match = matched + if @known_token_kinds.include? self[1] + encoder.text_token self[2].gsub(/\\(.)/m, '\1'), self[1].to_sym + else + encoder.text_token matched, :error end - encoder.text_token match, kind elsif match = scan(/ (\w+) ([<\[]) /x) - # FIXME: cache attack - kind = self[1].to_sym - opened_tokens << kind - case self[2] - when '<' - encoder.begin_group kind - when '[' - encoder.begin_line kind - else - raise 'CodeRay bug: This case should not be reached.' + if @known_token_kinds.include? self[1] + kind = self[1].to_sym + opened_tokens << kind + case self[2] + when '<' + encoder.begin_group kind + when '[' + encoder.begin_line kind + else + raise 'CodeRay bug: This case should not be reached.' + end end elsif !opened_tokens.empty? && match = scan(/ > /x) -- cgit v1.2.1 From 5d6bee7f5caced1383e6aac427fb356a4788794b Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:14:21 +0200 Subject: tweak Debug scanners again, introduce :unknown token kind --- lib/coderay/scanners/debug.rb | 25 ++++++++++++++----------- lib/coderay/scanners/raydebug.rb | 17 ++++++----------- lib/coderay/token_kinds.rb | 3 ++- 3 files changed, 22 insertions(+), 23 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb index ac12c16..83ede9a 100644 --- a/lib/coderay/scanners/debug.rb +++ b/lib/coderay/scanners/debug.rb @@ -5,7 +5,7 @@ module Scanners # = Debug Scanner # - # Interprets the output of the Encoders::Debug encoder. + # Interprets the output of the Encoders::Debug encoder (basically the inverse function). class Debug < Scanner register_for :debug @@ -31,21 +31,24 @@ module Scanners if @known_token_kinds.include? self[1] encoder.text_token self[2].gsub(/\\(.)/m, '\1'), self[1].to_sym else - encoder.text_token matched, :error + encoder.text_token matched, :unknown end elsif match = scan(/ (\w+) ([<\[]) /x) if @known_token_kinds.include? self[1] kind = self[1].to_sym - opened_tokens << kind - case self[2] - when '<' - encoder.begin_group kind - when '[' - encoder.begin_line kind - else - raise 'CodeRay bug: This case should not be reached.' - end + else + kind = :unknown + end + + opened_tokens << kind + case self[2] + when '<' + encoder.begin_group kind + when '[' + encoder.begin_line kind + else + raise 'CodeRay bug: This case should not be reached.' end elsif !opened_tokens.empty? && match = scan(/ > /x) diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb index 6c1c10f..1effdc8 100644 --- a/lib/coderay/scanners/raydebug.rb +++ b/lib/coderay/scanners/raydebug.rb @@ -3,9 +3,9 @@ require 'set' module CodeRay module Scanners - # = Debug Scanner + # = Raydebug Scanner # - # Parses the output of the Encoders::Debug encoder. + # Highlights the output of the Encoders::Debug encoder. class Raydebug < Scanner register_for :raydebug @@ -43,17 +43,12 @@ module Scanners encoder.text_token match, :operator if match = scan(/\)/) elsif match = scan(/ (\w+) ([<\[]) /x) - kind = self[1] - case self[2] - when '<' - encoder.text_token kind, :class - when '[' - encoder.text_token kind, :class + encoder.text_token self[1], :class + if @known_token_kinds.include? self[1] + kind = self[1].to_sym else - raise 'CodeRay bug: This case should not be reached.' + kind = :unknown end - # FIXME: cache attack - kind = kind.to_sym opened_tokens << kind encoder.begin_group kind encoder.text_token self[2], :operator diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb index 5f49d77..f911862 100755 --- a/lib/coderay/token_kinds.rb +++ b/lib/coderay/token_kinds.rb @@ -80,5 +80,6 @@ module CodeRay :plain => false # almost all scanners ) - TokenKinds[:method] = TokenKinds[:function] + TokenKinds[:method] = TokenKinds[:function] + TokenKinds[:unknown] = TokenKinds[:plain] end -- cgit v1.2.1 From 21d07b305f6293065cf08134cee2c66e727422cf Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:17:47 +0200 Subject: rename protected method in FileType --- lib/coderay/helpers/file_type.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 5e3a1e7..7de34d5 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -38,7 +38,7 @@ module CodeRay (TypeFromExt[ext2.downcase] if ext2) || TypeFromName[name] || TypeFromName[name.downcase] - type ||= shebang(filename) if read_shebang + type ||= type_from_shebang(filename) if read_shebang type end @@ -63,7 +63,7 @@ module CodeRay protected - def shebang filename + def type_from_shebang filename return unless File.exist? filename File.open filename, 'r' do |f| if first_line = f.gets -- cgit v1.2.1 From 368e053880819edc74fdcef38f38b5fd4806a3f4 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:18:49 +0200 Subject: FileType should guard against attacks here --- lib/coderay/scanners/diff.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb index 836fa41..fd1aed6 100644 --- a/lib/coderay/scanners/diff.rb +++ b/lib/coderay/scanners/diff.rb @@ -21,7 +21,6 @@ module Scanners line_kind = nil state = :initial deleted_lines_count = 0 - # FIXME: cache attack scanners = Hash.new do |h, lang| h[lang] = Scanners[lang].new '', :keep_tokens => true, :keep_state => true end -- cgit v1.2.1 From af04107b8b370452a17fa54e8ea0e8adc8b376b0 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:19:57 +0200 Subject: no attack vector, there are only 4 cases --- lib/coderay/scanners/python.rb | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb index 23630f9..05e1f5f 100644 --- a/lib/coderay/scanners/python.rb +++ b/lib/coderay/scanners/python.rb @@ -75,12 +75,10 @@ module Scanners <<=? | >>=? | [<>=]=? | != # comparison and assignment /x # :nodoc: - # FIXME: cache attack STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter| h[delimiter] = Regexp.union delimiter # :nodoc: } - # FIXME: cache attack STRING_CONTENT_REGEXP = Hash.new { |h, delimiter| h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc: } -- cgit v1.2.1 From e9140073f4dcba5c022a2ad40a1b935a07a6b4c3 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:21:45 +0200 Subject: no attack vector, there are only 2 cases --- lib/coderay/scanners/python.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb index 05e1f5f..09c8b6e 100644 --- a/lib/coderay/scanners/python.rb +++ b/lib/coderay/scanners/python.rb @@ -183,7 +183,6 @@ module Scanners kind = :ident elsif kind == :keyword state = DEF_NEW_STATE[match] - # FIXME: cache attack from_import_state << match.to_sym if state == :include_expected end encoder.text_token match, kind -- cgit v1.2.1 From c3c70e0b3497939dbfb1958a0764f4fd18c05a48 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:31:35 +0200 Subject: cleanup --- lib/coderay/scanners/ruby/string_state.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index fe37d07..bcc0507 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -17,7 +17,6 @@ module Scanners # FIXME: cache attack STRING_PATTERN = Hash.new do |h, k| delim, interpreted = *k - # delim = delim.dup # workaround for old Ruby delim_pattern = Regexp.escape(delim) if closing_paren = CLOSING_PAREN[delim] delim_pattern << Regexp.escape(closing_paren) -- cgit v1.2.1 From 65983f38eaed758a9901adf9e4e8c4be3e3a6123 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:41:55 +0200 Subject: avoid cache attack in Ruby scanner (eg. using Unicode-delimited Fancy Strings) --- lib/coderay/scanners/ruby/string_state.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index bcc0507..28ddd6c 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -14,7 +14,6 @@ module Scanners { } ] ].each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with << - # FIXME: cache attack STRING_PATTERN = Hash.new do |h, k| delim, interpreted = *k delim_pattern = Regexp.escape(delim) @@ -29,12 +28,13 @@ module Scanners # '| [|?*+(){}\[\].^$]' # end - h[k] = - if interpreted && delim != '#' - / (?= [#{delim_pattern}] | \# [{$@] ) /mx - else - / (?= [#{delim_pattern}] ) /mx - end + if interpreted && delim != '#' + / (?= [#{delim_pattern}] | \# [{$@] ) /mx + else + / (?= [#{delim_pattern}] ) /mx + end.tap do |pattern| + h[k] = pattern if (delim.respond_to?(:ord) ? delim.ord : delim[0]) < 256 + end end def initialize kind, interpreted, delim, heredoc = false -- cgit v1.2.1 From 05f5a0e270ce2cde4ff242634033c902c58f13ea Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:45:33 +0200 Subject: no cache attacks possible, static input (CSS) --- lib/coderay/encoders/html/css.rb | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html/css.rb b/lib/coderay/encoders/html/css.rb index de98f0e..164d7f8 100644 --- a/lib/coderay/encoders/html/css.rb +++ b/lib/coderay/encoders/html/css.rb @@ -21,7 +21,6 @@ module Encoders end def get_style_for_css_classes css_classes - # FIXME: cache attack cl = @styles[css_classes.first] return '' unless cl style = '' @@ -53,7 +52,6 @@ module Encoders for selector in selectors.split(',') classes = selector.scan(/[-\w]+/) cl = classes.pop - # FIXME: cache attack @styles[cl] ||= Hash.new @styles[cl][classes] = style.to_s.strip.delete(' ').chomp(';') end -- cgit v1.2.1 From ee992427810a1cf88b53c12ccf7fda91a30ab33e Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 21 Jul 2013 20:58:07 +0200 Subject: limit HTML encoder span_for_kinds cache size --- lib/coderay/encoders/html.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index 81a6ffa..ffde5d2 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -284,9 +284,8 @@ module Encoders end def make_span_for_kinds method, hint - # FIXME: cache attack Hash.new do |h, kinds| - h[kinds] = begin + begin css_class = css_class_for_kinds(kinds) title = HTML.token_path_to_hint hint, kinds if hint @@ -298,6 +297,9 @@ module Encoders "" end end + end.tap do |span| + h.clear if h.size >= 100 + h[kinds] = span end end end -- cgit v1.2.1 From a31b36683834f39c1581add498cce0b016f20fb5 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Fri, 30 Aug 2013 16:22:19 +0200 Subject: fix coderay -HTML option --- lib/coderay/encoders/html.rb | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index ffde5d2..d2ebb5a 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -197,13 +197,15 @@ module Encoders @last_opened = nil end - @out.extend Output - @out.css = @css - if options[:line_numbers] - Numbering.number! @out, options[:line_numbers], options + if @out.respond_to? :to_str + @out.extend Output + @out.css = @css + if options[:line_numbers] + Numbering.number! @out, options[:line_numbers], options + end + @out.wrap! options[:wrap] + @out.apply_title! options[:title] end - @out.wrap! options[:wrap] - @out.apply_title! options[:title] if defined?(@real_out) && @real_out @real_out << @out -- cgit v1.2.1 From a48037b85a12228431b32103786456f36beb355f Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 1 Sep 2013 01:01:35 +0200 Subject: final cleanup --- lib/coderay/scanners/go.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb index 59473f6..99fdd63 100644 --- a/lib/coderay/scanners/go.rb +++ b/lib/coderay/scanners/go.rb @@ -1,7 +1,6 @@ module CodeRay module Scanners - # Scanner for Go, copy from c class Go < Scanner register_for :go -- cgit v1.2.1 From d3197be3f207f8fcf52954d8815a0ea1948d25a4 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 22 Feb 2014 00:33:54 +0100 Subject: fix for #163 (SQL scanner), declare 1.1.1 --- lib/coderay/scanners/sql.rb | 40 ++++++++++++++++------------------------ lib/coderay/version.rb | 2 +- 2 files changed, 17 insertions(+), 25 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb index 93aeaf3..c25f6d2 100644 --- a/lib/coderay/scanners/sql.rb +++ b/lib/coderay/scanners/sql.rb @@ -57,6 +57,12 @@ module Scanners STRING_PREFIXES = /[xnb]|_\w+/i + STRING_CONTENT_PATTERN = { + '"' => / (?: [^\\"] | "" )+ /x, + "'" => / (?: [^\\'] | '' )+ /x, + '`' => / (?: [^\\`] | `` )+ /x, + } + def scan_tokens encoder, options state = :initial @@ -115,40 +121,26 @@ module Scanners end elsif state == :string - if match = scan(/[^\\"'`]+/) - string_content << match - next + if match = scan(STRING_CONTENT_PATTERN[string_type]) + encoder.text_token match, :content elsif match = scan(/["'`]/) if string_type == match if peek(1) == string_type # doubling means escape - string_content << string_type << getch - next - end - unless string_content.empty? - encoder.text_token string_content, :content - string_content = '' + encoder.text_token match + getch, :content + else + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + string_type = nil end - encoder.text_token match, :delimiter - encoder.end_group :string - state = :initial - string_type = nil else - string_content << match + encoder.text_token match, :content end elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - unless string_content.empty? - encoder.text_token string_content, :content - string_content = '' - end encoder.text_token match, :char elsif match = scan(/ \\ . /mox) - string_content << match - next + encoder.text_token match, :content elsif match = scan(/ \\ | $ /x) - unless string_content.empty? - encoder.text_token string_content, :content - string_content = '' - end encoder.text_token match, :error unless match.empty? encoder.end_group :string state = :initial diff --git a/lib/coderay/version.rb b/lib/coderay/version.rb index 4b4f085..7ea3f70 100644 --- a/lib/coderay/version.rb +++ b/lib/coderay/version.rb @@ -1,3 +1,3 @@ module CodeRay - VERSION = '1.1.0' + VERSION = '1.1.1' end -- cgit v1.2.1 From da39961195a297293bfe274e4f60c607ad21eada Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 17 May 2014 21:16:38 +0200 Subject: HTML envoder keeps \t with tab_width: false Fixes #170 --- lib/coderay/encoders/html.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index d2ebb5a..c7c0c2d 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -180,7 +180,7 @@ module Encoders @break_lines = (options[:break_lines] == true) - @HTML_ESCAPE = HTML_ESCAPE.merge("\t" => ' ' * options[:tab_width]) + @HTML_ESCAPE = HTML_ESCAPE.merge("\t" => options[:tab_width] ? ' ' * options[:tab_width] : "\t") @opened = [] @last_opened = nil -- cgit v1.2.1 From e1aa98e7386609fd4c84bdcd2c3ea4b26663c8b7 Mon Sep 17 00:00:00 2001 From: BenBasson Date: Wed, 11 Jun 2014 22:47:51 +0100 Subject: Allow $ in SQL object names. --- lib/coderay/scanners/sql.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb index c25f6d2..7d57f77 100644 --- a/lib/coderay/scanners/sql.rb +++ b/lib/coderay/scanners/sql.rb @@ -96,7 +96,7 @@ module Scanners state = :string encoder.text_token match, :delimiter - elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x) + elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9\$]* /x) encoder.text_token match, name_expected ? :ident : (match[0] == ?@ ? :variable : IDENT_KIND[match]) name_expected = false -- cgit v1.2.1 From e5624a07e95cc7a3c704a4d08cddea582adc7f31 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 21 Mar 2015 03:44:49 +0100 Subject: prevent running out of regexp stack --- lib/coderay/scanners/diff.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb index fd1aed6..74a6c27 100644 --- a/lib/coderay/scanners/diff.rb +++ b/lib/coderay/scanners/diff.rb @@ -100,7 +100,7 @@ module Scanners next elsif match = scan(/-/) deleted_lines_count += 1 - if options[:inline_diff] && deleted_lines_count == 1 && (changed_lines_count = 1 + check(/.*(?:\n\-.*)*/).count("\n")) && match?(/(?>.*(?:\n\-.*){#{changed_lines_count - 1}}(?:\n\+.*){#{changed_lines_count}})$(?!\n\+)/) + if options[:inline_diff] && deleted_lines_count == 1 && (changed_lines_count = 1 + check(/.*(?:\n\-.*)*/).count("\n")) && changed_lines_count <= 100_000 && match?(/(?>.*(?:\n\-.*){#{changed_lines_count - 1}}(?:\n\+.*){#{changed_lines_count}})$(?!\n\+)/) deleted_lines = Array.new(changed_lines_count) { |i| skip(/\n\-/) if i > 0; scan(/.*/) } inserted_lines = Array.new(changed_lines_count) { |i| skip(/\n\+/) ; scan(/.*/) } -- cgit v1.2.1 From 080f8a8225cb911d037d1f6e58e581dec9558c58 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 11:40:13 +0100 Subject: add support for Ruby 2.1 number literal suffixes --- lib/coderay/scanners/ruby.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 80165ca..0492a55 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -191,7 +191,10 @@ module Scanners encoder.text_token match, :error method_call_expected = false else - encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary + kind = self[1] ? :float : :integer # TODO: send :hex/:octal/:binary + match << 'r' if match !~ /e/i && scan(/r/) + match << 'i' if scan(/i/) + encoder.text_token match, kind end value_expected = false -- cgit v1.2.1 From 39cbd37815f65f21e0433f4da4cf5fbeda2e1e3f Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 12:06:26 +0100 Subject: add support for Ruby 2.2 quoted hash keys KNOWN ISSUE: string interpolation will not work! --- lib/coderay/scanners/ruby.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 0492a55..165d66b 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -164,15 +164,18 @@ module Scanners end elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx) - encoder.begin_group :string if match.size == 1 + encoder.begin_group :string encoder.text_token match, :delimiter state = self.class::StringState.new :string, match == '"', match # important for streaming else + kind = value_expected == true && scan(/:/) ? :key : :string + encoder.begin_group kind encoder.text_token match[0,1], :delimiter encoder.text_token match[1..-2], :content if match.size > 2 encoder.text_token match[-1,1], :delimiter - encoder.end_group :string + encoder.end_group kind + encoder.text_token ':', :operator if kind == :key value_expected = false end -- cgit v1.2.1 From d9d1eedcb235b371683eed22a6e4217caef73ffa Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 12:08:21 +0100 Subject: add support for Ruby 2.3 safe navigation operator --- lib/coderay/scanners/ruby.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 165d66b..24ab71f 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -201,7 +201,7 @@ module Scanners end value_expected = false - elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x) + elsif match = scan(/ [-+!~^\/]=? | [:;] | &\. | [*|&]{1,2}=? | >>? /x) value_expected = true encoder.text_token match, :operator -- cgit v1.2.1 From 376884d457ac7953914cc84b94fe6404cd904fe0 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 12:10:18 +0100 Subject: add support for Ruby 2.3 squiggly heredoc --- lib/coderay/scanners/ruby/patterns.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index 0b36e13..3dd6ad5 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -114,7 +114,7 @@ module Scanners # NOTE: This is not completely correct, but # nobody needs heredoc delimiters ending with \n. HEREDOC_OPEN = / - << (-)? # $1 = float + << ([-~])? # $1 = float (?: ( [A-Za-z_0-9]+ ) # $2 = delim | -- cgit v1.2.1 From 415498eaf9417cf30656c4a745eef0409b214afc Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 13:11:31 +0100 Subject: allow indentation of squiggly heredoc delimiter --- lib/coderay/scanners/ruby.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 24ab71f..f7feb46 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -214,7 +214,7 @@ module Scanners encoder.end_group kind heredocs ||= [] # create heredocs if empty heredocs << self.class::StringState.new(kind, quote != "'", delim, - self[1] == '-' ? :indented : :linestart) + self[1] ? :indented : :linestart) value_expected = false elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o) -- cgit v1.2.1 From c33f3f5c43064f7b468a59e086dc4a9a4f949ff7 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 13:17:23 +0100 Subject: check for keys with escape sequences, too --- lib/coderay/scanners/ruby.rb | 5 +++-- lib/coderay/scanners/ruby/string_state.rb | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index f7feb46..5b8de42 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -165,9 +165,10 @@ module Scanners elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx) if match.size == 1 - encoder.begin_group :string + kind = check(self.class::StringState.simple_key_pattern(match)) ? :key : :string + encoder.begin_group kind encoder.text_token match, :delimiter - state = self.class::StringState.new :string, match == '"', match # important for streaming + state = self.class::StringState.new kind, match == '"', match # important for streaming else kind = value_expected == true && scan(/:/) ? :key : :string encoder.begin_group kind diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index 28ddd6c..93e7208 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -37,6 +37,14 @@ module Scanners end end + def self.simple_key_pattern delim + if delim == "'" + / (?> (?: [^\\']+ | \\. )* ) ' : /mx + else + / (?> (?: [^\\"\#]+ | \\. | \#\$[\\"] | \#(?!\{) )* ) " : /mx + end + end + def initialize kind, interpreted, delim, heredoc = false if heredoc pattern = heredoc_pattern delim, interpreted, heredoc == :indented -- cgit v1.2.1 From 036fb3291274ed87f106bdbeb65bbd10b4c561f9 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 13:39:08 +0100 Subject: skip over interpolation if not nested --- lib/coderay/scanners/ruby/string_state.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index 93e7208..95f1e83 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -41,7 +41,7 @@ module Scanners if delim == "'" / (?> (?: [^\\']+ | \\. )* ) ' : /mx else - / (?> (?: [^\\"\#]+ | \\. | \#\$[\\"] | \#(?!\{) )* ) " : /mx + / (?> (?: [^\\"\#]+ | \\. | \#\$[\\"] | \#\{[^\{\}]+\} | \#(?!\{) )* ) " : /mx end end -- cgit v1.2.1 From a14639c31bbe33c23853a66d6feb817da4248e1a Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 13:44:18 +0100 Subject: don't ruin indentation --- lib/coderay/scanners/ruby/patterns.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index 3dd6ad5..e5a156d 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -114,7 +114,7 @@ module Scanners # NOTE: This is not completely correct, but # nobody needs heredoc delimiters ending with \n. HEREDOC_OPEN = / - << ([-~])? # $1 = float + << ([-~])? # $1 = float (?: ( [A-Za-z_0-9]+ ) # $2 = delim | -- cgit v1.2.1 From 7f1f2287650c3f3da75ffe6d9e79793dfcc7a67d Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sat, 13 Feb 2016 15:39:51 +0100 Subject: document new option to keep tabs --- lib/coderay/encoders/html.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index c7c0c2d..942b9c8 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -25,7 +25,8 @@ module Encoders # == Options # # === :tab_width - # Convert \t characters to +n+ spaces (a number.) + # Convert \t characters to +n+ spaces (a number or false.) + # false will keep tab characters untouched. # # Default: 8 # -- cgit v1.2.1