diff options
author | Kornelius Kalnbach <murphy@rubychan.de> | 2015-03-21 04:32:59 +0100 |
---|---|---|
committer | Kornelius Kalnbach <murphy@rubychan.de> | 2015-03-21 04:32:59 +0100 |
commit | 615ac9604cf9f37009fa38e4320552c8735b4386 (patch) | |
tree | a30d42f111e1708c9df6cb2e97bee7bd952255b0 /lib/coderay/scanners | |
parent | 41c211ddb452bc8aa643e3915974b84cd82234be (diff) | |
download | coderay-615ac9604cf9f37009fa38e4320552c8735b4386.tar.gz |
add alternative JSON scanners
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r-- | lib/coderay/scanners/json.rb | 34 | ||||
-rw-r--r-- | lib/coderay/scanners/json1.rb | 100 | ||||
-rw-r--r-- | lib/coderay/scanners/json2.rb | 131 | ||||
-rw-r--r-- | lib/coderay/scanners/json3.rb | 143 | ||||
-rw-r--r-- | lib/coderay/scanners/json4.rb | 143 |
5 files changed, 533 insertions, 18 deletions
diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb index cb61960..b09970c 100644 --- a/lib/coderay/scanners/json.rb +++ b/lib/coderay/scanners/json.rb @@ -14,7 +14,7 @@ module Scanners ESCAPE = / [bfnrt\\"\/] /x # :nodoc: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: - KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx + KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /x protected @@ -37,41 +37,40 @@ module Scanners when :initial if match = scan(/ \s+ /x) encoder.text_token match, :space - elsif match = scan(/ " (?=#{KEY}) /ox) - state = :key - encoder.begin_group :key - encoder.text_token match, :delimiter - elsif match = scan(/ " /x) - state = :string - encoder.begin_group :string + elsif match = scan(/"/) + state = check(/#{KEY}/o) ? :key : :string + encoder.begin_group state encoder.text_token match, :delimiter elsif match = scan(/ [:,\[{\]}] /x) encoder.text_token match, :operator elsif match = scan(/ true | false | null /x) encoder.text_token match, :value - elsif match = scan(/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: [eE][-+]? \d+ )? | [eE][-+]? \d+ ) /x) - encoder.text_token match, :float elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x) - encoder.text_token match, :integer + if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x) + match << matched + encoder.text_token match, :float + else + encoder.text_token match, :integer + end else encoder.text_token getch, :error end when :string, :key - if match = scan(/ [^\\"]+ /x) + if match = scan(/[^\\"]+/) encoder.text_token match, :content - elsif match = scan(/ " /x) + elsif match = scan(/"/) encoder.text_token match, :delimiter encoder.end_group state state = :initial - elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /ox) + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) encoder.text_token match, :char - elsif match = scan(/ \\. /mx) + elsif match = scan(/\\./m) encoder.text_token match, :content - elsif match = scan(/ \\ /x) + elsif match = scan(/ \\ | $ /x) encoder.end_group state + encoder.text_token match, :error unless match.empty? state = :initial - encoder.text_token match, :error else raise_inspect "else case \" reached; %p not handled." % peek(1), encoder end @@ -80,7 +79,6 @@ module Scanners raise_inspect 'Unknown state: %p' % [state], encoder end - end if options[:keep_state] diff --git a/lib/coderay/scanners/json1.rb b/lib/coderay/scanners/json1.rb new file mode 100644 index 0000000..c2f75b9 --- /dev/null +++ b/lib/coderay/scanners/json1.rb @@ -0,0 +1,100 @@ +module CodeRay +module Scanners + + # Scanner for JSON (JavaScript Object Notation). + class JSON1 < Scanner + + register_for :json1 + file_extension 'json1' + + KINDS_NOT_LOC = [ + :float, :char, :content, :delimiter, + :error, :integer, :operator, :value, + ] # :nodoc: + + ESCAPE = / [bfnrt\\"\/] /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: + KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx + + protected + + def setup + @state = :initial + end + + # See http://json.org/ for a definition of the JSON lexic/grammar. + def scan_tokens encoder, options + state = options[:state] || @state + + if [:string, :key].include? state + encoder.begin_group state + end + + until eos? + + case state + + when :initial + if match = scan(/ \s+ /x) + encoder.text_token match, :space + elsif match = scan(/ " (?=#{KEY}) /ox) + state = :key + encoder.begin_group :key + encoder.text_token match, :delimiter + elsif match = scan(/ " /x) + state = :string + encoder.begin_group :string + encoder.text_token match, :delimiter + elsif match = scan(/ [:,\[{\]}] /x) + encoder.text_token match, :operator + elsif match = scan(/ true | false | null /x) + encoder.text_token match, :value + elsif match = scan(/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: [eE][-+]? \d+ )? | [eE][-+]? \d+ ) /x) + encoder.text_token match, :float + elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x) + encoder.text_token match, :integer + else + encoder.text_token getch, :error + end + + when :string, :key + if match = scan(/ [^\\"]+ /x) + encoder.text_token match, :content + elsif match = scan(/ " /x) + encoder.text_token match, :delimiter + encoder.end_group state + state = :initial + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /ox) + encoder.text_token match, :char + elsif match = scan(/ \\. /mx) + encoder.text_token match, :content + elsif match = scan(/ \\ /x) + encoder.end_group state + state = :initial + encoder.text_token match, :error + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + + else + raise_inspect 'Unknown state: %p' % [state], encoder + + end + + end + + if options[:keep_state] + @state = state + end + + if [:string, :key].include? state + encoder.end_group state + end + + encoder + end + + end + +end +end diff --git a/lib/coderay/scanners/json2.rb b/lib/coderay/scanners/json2.rb new file mode 100644 index 0000000..14bbe67 --- /dev/null +++ b/lib/coderay/scanners/json2.rb @@ -0,0 +1,131 @@ +module CodeRay +module Scanners + + class RuleBasedScanner2 < Scanner + class << self + attr_accessor :states + + def state *names, &block + @@states ||= {} + + @@rules = [] + + instance_eval(&block) + + for name in names + @@states[name] = @@rules + end + + @@rules = nil + end + + def token pattern, *actions + @@rules << [pattern, *actions] + end + + def push_group name + [:begin_group, name] + end + + def pop_group + [:end_group] + end + end + end + + # Scanner for JSON (JavaScript Object Notation). + class JSON2 < RuleBasedScanner2 + + register_for :json2 + file_extension 'json2' + + KINDS_NOT_LOC = [ + :float, :char, :content, :delimiter, + :error, :integer, :operator, :value, + ] # :nodoc: + + ESCAPE = / [bfnrt\\"\/] /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: + KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx + + state :initial do + token %r/ \s+ /x, :space + + token %r/ " (?=#{KEY}) /x, push_group(:key), :delimiter + token %r/ " /x, push_group(:string), :delimiter + + token %r/ [:,\[{\]}] /x, :operator + + token %r/ true | false | null /x, :value + token %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: [eE][-+]? \d+ )? | [eE][-+]? \d+ ) /x, :float + token %r/ -? (?: 0 | [1-9]\d* ) /x, :integer + end + + state :string, :key do + token %r/ [^\\"]+ /x, :content + + token %r/ " /x, :delimiter, pop_group + + token %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char + token %r/ \\. /mx, :content + token %r/ \\ /x, pop_group, :error + + # token %r/$/, end_group + end + + protected + + def setup + @state = :initial + end + + # See http://json.org/ for a definition of the JSON lexic/grammar. + def scan_tokens encoder, options + state = options[:state] || @state + + if [:string, :key].include? state + encoder.begin_group state + end + + states = [state] + + until eos? + for pattern, *actions in @@states[state] + if match = scan(pattern) + for action in actions + case action + when Symbol + encoder.text_token match, action + when Array + case action.first + when :begin_group + encoder.begin_group action.last + state = action.last + states << state + when :end_group + encoder.end_group states.pop + state = states.last + end + end + end + + break + end + end && encoder.text_token(getch, :error) + end + + if options[:keep_state] + @state = state + end + + if [:string, :key].include? state + encoder.end_group state + end + + encoder + end + + end + +end +end diff --git a/lib/coderay/scanners/json3.rb b/lib/coderay/scanners/json3.rb new file mode 100644 index 0000000..a79f513 --- /dev/null +++ b/lib/coderay/scanners/json3.rb @@ -0,0 +1,143 @@ +module CodeRay +module Scanners + + class RuleBasedScanner3 < Scanner + class << self + attr_accessor :states + + def state *names, &block + @@code ||= "" + + @@code << "when #{names.map(&:inspect).join(', ')}\n" + + @@first = true + instance_eval(&block) + @@code << " else\n" + # @@code << " raise 'no match for #{names.map(&:inspect).join(', ')}'\n" + @@code << " encoder.text_token getch, :error\n" + @@code << " end\n" + @@code << " \n" + end + + def token pattern, *actions + @@code << " #{'els' unless @@first}if match = scan(#{pattern.inspect})\n" + + for action in actions + case action + when Symbol + @@code << " p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG + @@code << " encoder.text_token match, #{action.inspect}\n" + when Array + case action.first + when :begin_group + @@code << " p 'begin_group %p' % [#{action.last.inspect}]\n" if $DEBUG + @@code << " state = #{action.last.inspect}\n" + @@code << " states << #{action.last.inspect}\n" + @@code << " encoder.begin_group #{action.last.inspect}\n" + when :end_group + @@code << " p 'end_group %p' % [states.last]\n" if $DEBUG + @@code << " encoder.end_group states.pop\n" + @@code << " state = states.last\n" + end + end + end + + @@first = false + end + + def push_group name + [:begin_group, name] + end + + def pop_group + [:end_group] + end + end + end + + # Scanner for JSON (JavaScript Object Notation). + class JSON3 < RuleBasedScanner3 + + register_for :json3 + file_extension 'json3' + + KINDS_NOT_LOC = [ + :float, :char, :content, :delimiter, + :error, :integer, :operator, :value, + ] # :nodoc: + + ESCAPE = / [bfnrt\\"\/] /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: + KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx + + state :initial do + token %r/ \s+ /x, :space + + token %r/ [:,\[{\]}] /x, :operator + + token %r/ " (?=#{KEY}) /x, push_group(:key), :delimiter + token %r/ " /x, push_group(:string), :delimiter + + token %r/ true | false | null /x, :value + token %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: e[-+]? \d+ )? | e[-+]? \d+ ) /ix, :float + token %r/ -? (?: 0 | [1-9]\d* ) (?: e[+-] \d+ )? /ix, :integer + end + + state :key, :string do + token %r/ [^\\"]+ /x, :content + + token %r/ " /x, :delimiter, pop_group + + token %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char + token %r/ \\. /mx, :content + token %r/ \\ /x, pop_group, :error + end + + protected + + def setup + @state = :initial + end + + # See http://json.org/ for a definition of the JSON lexic/grammar. + scan_tokens_code = <<-"RUBY" + def scan_tokens encoder, options + state = options[:state] || @state + + if [:string, :key].include? state + encoder.begin_group state + end + + states = [state] + + until eos? + + case state + +#{ @@code.chomp.gsub(/^/, ' ') } + else + raise_inspect 'Unknown state: %p' % [state], encoder + + end + + end + + if options[:keep_state] + @state = state + end + + if [:string, :key].include? state + encoder.end_group state + end + + encoder + end + RUBY + + # puts scan_tokens_code + class_eval scan_tokens_code + + end + +end +end diff --git a/lib/coderay/scanners/json4.rb b/lib/coderay/scanners/json4.rb new file mode 100644 index 0000000..3160218 --- /dev/null +++ b/lib/coderay/scanners/json4.rb @@ -0,0 +1,143 @@ +module CodeRay +module Scanners + + class RuleBasedScanner4 < Scanner + class << self + attr_accessor :states + + def state *names, &block + @@code ||= "" + + @@code << "when #{names.map(&:inspect).join(', ')}\n" + + @@first = true + instance_eval(&block) + @@code << " else\n" + # @@code << " raise 'no match for #{names.map(&:inspect).join(', ')}'\n" + @@code << " encoder.text_token getch, :error\n" + @@code << " end\n" + @@code << " \n" + end + + def token pattern, *actions + @@code << " #{'els' unless @@first}if match = scan(#{pattern.inspect})\n" + + for action in actions + case action + when Symbol + @@code << " p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG + @@code << " encoder.text_token match, #{action.inspect}\n" + when Array + case action.first + when :push + @@code << " p 'push %p' % [#{action.last.inspect}]\n" if $DEBUG + @@code << " state = #{action.last.inspect}\n" + @@code << " states << state\n" + @@code << " encoder.begin_group state\n" + when :pop + @@code << " p 'pop %p' % [states.last]\n" if $DEBUG + @@code << " encoder.end_group states.pop\n" + @@code << " state = states.last\n" + end + end + end + + @@first = false + end + + def push state + [:push, state] + end + + def pop + [:pop] + end + end + end + + # Scanner for JSON (JavaScript Object Notation). + class JSON4 < RuleBasedScanner4 + + register_for :json4 + file_extension 'json4' + + KINDS_NOT_LOC = [ + :float, :char, :content, :delimiter, + :error, :integer, :operator, :value, + ] # :nodoc: + + ESCAPE = / [bfnrt\\"\/] /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: + KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx + + state :initial do + token %r/ \s+ /x, :space + + token %r/ [:,\[{\]}] /x, :operator + + token %r/ " (?=#{KEY}) /x, push(:key), :delimiter + token %r/ " /x, push(:string), :delimiter + + token %r/ true | false | null /x, :value + token %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: e[-+]? \d+ )? | e[-+]? \d+ ) /ix, :float + token %r/ -? (?: 0 | [1-9]\d* ) (?: e[+-] \d+ )? /ix, :integer + end + + state :key, :string do + token %r/ [^\\"]+ /x, :content + + token %r/ " /x, :delimiter, pop + + token %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char + token %r/ \\. /mx, :content + token %r/ \\ /x, :error, pop + end + + protected + + def setup + @state = :initial + end + + # See http://json.org/ for a definition of the JSON lexic/grammar. + scan_tokens_code = <<-"RUBY" + def scan_tokens encoder, options + state = options[:state] || @state + + if [:string, :key].include? state + encoder.begin_group state + end + + states = [state] + + until eos? + + case state + +#{ @@code.chomp.gsub(/^/, ' ') } + else + raise_inspect 'Unknown state: %p' % [state], encoder + + end + + end + + if options[:keep_state] + @state = state + end + + if [:string, :key].include? state + encoder.end_group state + end + + encoder + end + RUBY + + # puts scan_tokens_code + class_eval scan_tokens_code + + end + +end +end |