summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners
diff options
context:
space:
mode:
authorKornelius Kalnbach <murphy@rubychan.de>2015-03-21 04:32:59 +0100
committerKornelius Kalnbach <murphy@rubychan.de>2015-03-21 04:32:59 +0100
commit615ac9604cf9f37009fa38e4320552c8735b4386 (patch)
treea30d42f111e1708c9df6cb2e97bee7bd952255b0 /lib/coderay/scanners
parent41c211ddb452bc8aa643e3915974b84cd82234be (diff)
downloadcoderay-615ac9604cf9f37009fa38e4320552c8735b4386.tar.gz
add alternative JSON scanners
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r--lib/coderay/scanners/json.rb34
-rw-r--r--lib/coderay/scanners/json1.rb100
-rw-r--r--lib/coderay/scanners/json2.rb131
-rw-r--r--lib/coderay/scanners/json3.rb143
-rw-r--r--lib/coderay/scanners/json4.rb143
5 files changed, 533 insertions, 18 deletions
diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb
index cb61960..b09970c 100644
--- a/lib/coderay/scanners/json.rb
+++ b/lib/coderay/scanners/json.rb
@@ -14,7 +14,7 @@ module Scanners
ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
- KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /x
protected
@@ -37,41 +37,40 @@ module Scanners
when :initial
if match = scan(/ \s+ /x)
encoder.text_token match, :space
- elsif match = scan(/ " (?=#{KEY}) /ox)
- state = :key
- encoder.begin_group :key
- encoder.text_token match, :delimiter
- elsif match = scan(/ " /x)
- state = :string
- encoder.begin_group :string
+ elsif match = scan(/"/)
+ state = check(/#{KEY}/o) ? :key : :string
+ encoder.begin_group state
encoder.text_token match, :delimiter
elsif match = scan(/ [:,\[{\]}] /x)
encoder.text_token match, :operator
elsif match = scan(/ true | false | null /x)
encoder.text_token match, :value
- elsif match = scan(/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: [eE][-+]? \d+ )? | [eE][-+]? \d+ ) /x)
- encoder.text_token match, :float
elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
- encoder.text_token match, :integer
+ if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x)
+ match << matched
+ encoder.text_token match, :float
+ else
+ encoder.text_token match, :integer
+ end
else
encoder.text_token getch, :error
end
when :string, :key
- if match = scan(/ [^\\"]+ /x)
+ if match = scan(/[^\\"]+/)
encoder.text_token match, :content
- elsif match = scan(/ " /x)
+ elsif match = scan(/"/)
encoder.text_token match, :delimiter
encoder.end_group state
state = :initial
- elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /ox)
+ elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
encoder.text_token match, :char
- elsif match = scan(/ \\. /mx)
+ elsif match = scan(/\\./m)
encoder.text_token match, :content
- elsif match = scan(/ \\ /x)
+ elsif match = scan(/ \\ | $ /x)
encoder.end_group state
+ encoder.text_token match, :error unless match.empty?
state = :initial
- encoder.text_token match, :error
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
@@ -80,7 +79,6 @@ module Scanners
raise_inspect 'Unknown state: %p' % [state], encoder
end
-
end
if options[:keep_state]
diff --git a/lib/coderay/scanners/json1.rb b/lib/coderay/scanners/json1.rb
new file mode 100644
index 0000000..c2f75b9
--- /dev/null
+++ b/lib/coderay/scanners/json1.rb
@@ -0,0 +1,100 @@
+module CodeRay
+module Scanners
+
+ # Scanner for JSON (JavaScript Object Notation).
+ class JSON1 < Scanner
+
+ register_for :json1
+ file_extension 'json1'
+
+ KINDS_NOT_LOC = [
+ :float, :char, :content, :delimiter,
+ :error, :integer, :operator, :value,
+ ] # :nodoc:
+
+ ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+ if [:string, :key].include? state
+ encoder.begin_group state
+ end
+
+ until eos?
+
+ case state
+
+ when :initial
+ if match = scan(/ \s+ /x)
+ encoder.text_token match, :space
+ elsif match = scan(/ " (?=#{KEY}) /ox)
+ state = :key
+ encoder.begin_group :key
+ encoder.text_token match, :delimiter
+ elsif match = scan(/ " /x)
+ state = :string
+ encoder.begin_group :string
+ encoder.text_token match, :delimiter
+ elsif match = scan(/ [:,\[{\]}] /x)
+ encoder.text_token match, :operator
+ elsif match = scan(/ true | false | null /x)
+ encoder.text_token match, :value
+ elsif match = scan(/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: [eE][-+]? \d+ )? | [eE][-+]? \d+ ) /x)
+ encoder.text_token match, :float
+ elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
+ encoder.text_token match, :integer
+ else
+ encoder.text_token getch, :error
+ end
+
+ when :string, :key
+ if match = scan(/ [^\\"]+ /x)
+ encoder.text_token match, :content
+ elsif match = scan(/ " /x)
+ encoder.text_token match, :delimiter
+ encoder.end_group state
+ state = :initial
+ elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /ox)
+ encoder.text_token match, :char
+ elsif match = scan(/ \\. /mx)
+ encoder.text_token match, :content
+ elsif match = scan(/ \\ /x)
+ encoder.end_group state
+ state = :initial
+ encoder.text_token match, :error
+ else
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
+ end
+
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ if [:string, :key].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/json2.rb b/lib/coderay/scanners/json2.rb
new file mode 100644
index 0000000..14bbe67
--- /dev/null
+++ b/lib/coderay/scanners/json2.rb
@@ -0,0 +1,131 @@
+module CodeRay
+module Scanners
+
+ class RuleBasedScanner2 < Scanner
+ class << self
+ attr_accessor :states
+
+ def state *names, &block
+ @@states ||= {}
+
+ @@rules = []
+
+ instance_eval(&block)
+
+ for name in names
+ @@states[name] = @@rules
+ end
+
+ @@rules = nil
+ end
+
+ def token pattern, *actions
+ @@rules << [pattern, *actions]
+ end
+
+ def push_group name
+ [:begin_group, name]
+ end
+
+ def pop_group
+ [:end_group]
+ end
+ end
+ end
+
+ # Scanner for JSON (JavaScript Object Notation).
+ class JSON2 < RuleBasedScanner2
+
+ register_for :json2
+ file_extension 'json2'
+
+ KINDS_NOT_LOC = [
+ :float, :char, :content, :delimiter,
+ :error, :integer, :operator, :value,
+ ] # :nodoc:
+
+ ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+
+ state :initial do
+ token %r/ \s+ /x, :space
+
+ token %r/ " (?=#{KEY}) /x, push_group(:key), :delimiter
+ token %r/ " /x, push_group(:string), :delimiter
+
+ token %r/ [:,\[{\]}] /x, :operator
+
+ token %r/ true | false | null /x, :value
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: [eE][-+]? \d+ )? | [eE][-+]? \d+ ) /x, :float
+ token %r/ -? (?: 0 | [1-9]\d* ) /x, :integer
+ end
+
+ state :string, :key do
+ token %r/ [^\\"]+ /x, :content
+
+ token %r/ " /x, :delimiter, pop_group
+
+ token %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
+ token %r/ \\. /mx, :content
+ token %r/ \\ /x, pop_group, :error
+
+ # token %r/$/, end_group
+ end
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+ if [:string, :key].include? state
+ encoder.begin_group state
+ end
+
+ states = [state]
+
+ until eos?
+ for pattern, *actions in @@states[state]
+ if match = scan(pattern)
+ for action in actions
+ case action
+ when Symbol
+ encoder.text_token match, action
+ when Array
+ case action.first
+ when :begin_group
+ encoder.begin_group action.last
+ state = action.last
+ states << state
+ when :end_group
+ encoder.end_group states.pop
+ state = states.last
+ end
+ end
+ end
+
+ break
+ end
+ end && encoder.text_token(getch, :error)
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ if [:string, :key].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/json3.rb b/lib/coderay/scanners/json3.rb
new file mode 100644
index 0000000..a79f513
--- /dev/null
+++ b/lib/coderay/scanners/json3.rb
@@ -0,0 +1,143 @@
+module CodeRay
+module Scanners
+
+ class RuleBasedScanner3 < Scanner
+ class << self
+ attr_accessor :states
+
+ def state *names, &block
+ @@code ||= ""
+
+ @@code << "when #{names.map(&:inspect).join(', ')}\n"
+
+ @@first = true
+ instance_eval(&block)
+ @@code << " else\n"
+ # @@code << " raise 'no match for #{names.map(&:inspect).join(', ')}'\n"
+ @@code << " encoder.text_token getch, :error\n"
+ @@code << " end\n"
+ @@code << " \n"
+ end
+
+ def token pattern, *actions
+ @@code << " #{'els' unless @@first}if match = scan(#{pattern.inspect})\n"
+
+ for action in actions
+ case action
+ when Symbol
+ @@code << " p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG
+ @@code << " encoder.text_token match, #{action.inspect}\n"
+ when Array
+ case action.first
+ when :begin_group
+ @@code << " p 'begin_group %p' % [#{action.last.inspect}]\n" if $DEBUG
+ @@code << " state = #{action.last.inspect}\n"
+ @@code << " states << #{action.last.inspect}\n"
+ @@code << " encoder.begin_group #{action.last.inspect}\n"
+ when :end_group
+ @@code << " p 'end_group %p' % [states.last]\n" if $DEBUG
+ @@code << " encoder.end_group states.pop\n"
+ @@code << " state = states.last\n"
+ end
+ end
+ end
+
+ @@first = false
+ end
+
+ def push_group name
+ [:begin_group, name]
+ end
+
+ def pop_group
+ [:end_group]
+ end
+ end
+ end
+
+ # Scanner for JSON (JavaScript Object Notation).
+ class JSON3 < RuleBasedScanner3
+
+ register_for :json3
+ file_extension 'json3'
+
+ KINDS_NOT_LOC = [
+ :float, :char, :content, :delimiter,
+ :error, :integer, :operator, :value,
+ ] # :nodoc:
+
+ ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+
+ state :initial do
+ token %r/ \s+ /x, :space
+
+ token %r/ [:,\[{\]}] /x, :operator
+
+ token %r/ " (?=#{KEY}) /x, push_group(:key), :delimiter
+ token %r/ " /x, push_group(:string), :delimiter
+
+ token %r/ true | false | null /x, :value
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: e[-+]? \d+ )? | e[-+]? \d+ ) /ix, :float
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: e[+-] \d+ )? /ix, :integer
+ end
+
+ state :key, :string do
+ token %r/ [^\\"]+ /x, :content
+
+ token %r/ " /x, :delimiter, pop_group
+
+ token %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
+ token %r/ \\. /mx, :content
+ token %r/ \\ /x, pop_group, :error
+ end
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
+ scan_tokens_code = <<-"RUBY"
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+ if [:string, :key].include? state
+ encoder.begin_group state
+ end
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+#{ @@code.chomp.gsub(/^/, ' ') }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ if [:string, :key].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+ RUBY
+
+ # puts scan_tokens_code
+ class_eval scan_tokens_code
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/json4.rb b/lib/coderay/scanners/json4.rb
new file mode 100644
index 0000000..3160218
--- /dev/null
+++ b/lib/coderay/scanners/json4.rb
@@ -0,0 +1,143 @@
+module CodeRay
+module Scanners
+
+ class RuleBasedScanner4 < Scanner
+ class << self
+ attr_accessor :states
+
+ def state *names, &block
+ @@code ||= ""
+
+ @@code << "when #{names.map(&:inspect).join(', ')}\n"
+
+ @@first = true
+ instance_eval(&block)
+ @@code << " else\n"
+ # @@code << " raise 'no match for #{names.map(&:inspect).join(', ')}'\n"
+ @@code << " encoder.text_token getch, :error\n"
+ @@code << " end\n"
+ @@code << " \n"
+ end
+
+ def token pattern, *actions
+ @@code << " #{'els' unless @@first}if match = scan(#{pattern.inspect})\n"
+
+ for action in actions
+ case action
+ when Symbol
+ @@code << " p 'text_token %p %p' % [match, #{action.inspect}]\n" if $DEBUG
+ @@code << " encoder.text_token match, #{action.inspect}\n"
+ when Array
+ case action.first
+ when :push
+ @@code << " p 'push %p' % [#{action.last.inspect}]\n" if $DEBUG
+ @@code << " state = #{action.last.inspect}\n"
+ @@code << " states << state\n"
+ @@code << " encoder.begin_group state\n"
+ when :pop
+ @@code << " p 'pop %p' % [states.last]\n" if $DEBUG
+ @@code << " encoder.end_group states.pop\n"
+ @@code << " state = states.last\n"
+ end
+ end
+ end
+
+ @@first = false
+ end
+
+ def push state
+ [:push, state]
+ end
+
+ def pop
+ [:pop]
+ end
+ end
+ end
+
+ # Scanner for JSON (JavaScript Object Notation).
+ class JSON4 < RuleBasedScanner4
+
+ register_for :json4
+ file_extension 'json4'
+
+ KINDS_NOT_LOC = [
+ :float, :char, :content, :delimiter,
+ :error, :integer, :operator, :value,
+ ] # :nodoc:
+
+ ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
+ KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /mx
+
+ state :initial do
+ token %r/ \s+ /x, :space
+
+ token %r/ [:,\[{\]}] /x, :operator
+
+ token %r/ " (?=#{KEY}) /x, push(:key), :delimiter
+ token %r/ " /x, push(:string), :delimiter
+
+ token %r/ true | false | null /x, :value
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: \.\d+ (?: e[-+]? \d+ )? | e[-+]? \d+ ) /ix, :float
+ token %r/ -? (?: 0 | [1-9]\d* ) (?: e[+-] \d+ )? /ix, :integer
+ end
+
+ state :key, :string do
+ token %r/ [^\\"]+ /x, :content
+
+ token %r/ " /x, :delimiter, pop
+
+ token %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /x, :char
+ token %r/ \\. /mx, :content
+ token %r/ \\ /x, :error, pop
+ end
+
+ protected
+
+ def setup
+ @state = :initial
+ end
+
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
+ scan_tokens_code = <<-"RUBY"
+ def scan_tokens encoder, options
+ state = options[:state] || @state
+
+ if [:string, :key].include? state
+ encoder.begin_group state
+ end
+
+ states = [state]
+
+ until eos?
+
+ case state
+
+#{ @@code.chomp.gsub(/^/, ' ') }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+
+ end
+
+ end
+
+ if options[:keep_state]
+ @state = state
+ end
+
+ if [:string, :key].include? state
+ encoder.end_group state
+ end
+
+ encoder
+ end
+ RUBY
+
+ # puts scan_tokens_code
+ class_eval scan_tokens_code
+
+ end
+
+end
+end