summaryrefslogtreecommitdiff
path: root/lib/coderay
diff options
context:
space:
mode:
authorKornelius Kalnbach <murphy@rubychan.de>2015-04-22 00:56:03 +0200
committerKornelius Kalnbach <murphy@rubychan.de>2015-04-22 00:56:03 +0200
commit235e01b4077a33ccc82e646d7f5992dd41b6646e (patch)
tree4ec653353df6253cd672a4a51c31518d137d56fb /lib/coderay
parente8bef1034bedcc4ae1698657ea349b455edff58d (diff)
downloadcoderay-235e01b4077a33ccc82e646d7f5992dd41b6646e.tar.gz
add push/pop state, working on C scanner
Diffstat (limited to 'lib/coderay')
-rw-r--r--lib/coderay/rule_based_scanner.rb35
-rw-r--r--lib/coderay/scanners/c2.rb126
2 files changed, 156 insertions, 5 deletions
diff --git a/lib/coderay/rule_based_scanner.rb b/lib/coderay/rule_based_scanner.rb
index 670cb15..ac6a623 100644
--- a/lib/coderay/rule_based_scanner.rb
+++ b/lib/coderay/rule_based_scanner.rb
@@ -6,10 +6,13 @@ module CodeRay
Kind = Struct.new :token_kind
Push = Struct.new :state
Pop = Class.new
+ PushState = Struct.new :state
+ PopState = Class.new
Check = Struct.new :condition
CheckIf = Class.new Check
CheckUnless = Class.new Check
ValueSetter = Struct.new :targets, :value
+ Continue = Class.new
class << self
attr_accessor :states
@@ -101,7 +104,7 @@ module CodeRay
when Kind
case action.token_kind
when Proc
- @code << " encoder.text_token match, #{make_callback(action.token_kind)}\n"
+ @code << " encoder.text_token match, kind = #{make_callback(action.token_kind)}\n"
else
raise "I don't know how to evaluate this kind: %p" % [action.token_kind]
end
@@ -111,7 +114,7 @@ module CodeRay
@code << " encoder.text_token self[#{i + 1}], #{kind.inspect} if self[#{i + 1}]\n"
end
- when Push
+ when Push, PushState
case action.state
when String
raise
@@ -126,16 +129,22 @@ module CodeRay
raise "I don't know how to evaluate this push state: %p" % [action.state]
end
@code << " states << state\n"
- @code << " encoder.begin_group state\n"
- when Pop
+ @code << " encoder.begin_group state\n" if action.is_a? Push
+ when Pop, PopState
@code << " p 'pop %p' % [states.last]\n" if $DEBUG
- @code << " encoder.end_group states.pop\n"
+ if action.is_a? Pop
+ @code << " encoder.end_group states.pop\n"
+ else
+ @code << " states.pop\n"
+ end
@code << " state = states.last\n"
when ValueSetter
case action.value
when Proc
@code << " #{action.targets.join(' = ')} = #{make_callback(action.value)}\n"
+ when Symbol
+ @code << " #{action.targets.join(' = ')} = #{action.value}\n"
else
@code << " #{action.targets.join(' = ')} = #{action.value.inspect}\n"
end
@@ -143,6 +152,9 @@ module CodeRay
when Proc
@code << " #{make_callback(action)}\n"
+ when Continue
+ @code << " next\n"
+
else
raise "I don't know how to evaluate this action: %p" % [action]
end
@@ -168,6 +180,15 @@ module CodeRay
Pop.new
end
+ def push_state state = nil, &block
+ raise 'push_state requires a state or a block; got nothing' unless state || block
+ PushState.new state || block
+ end
+
+ def pop_state
+ PopState.new
+ end
+
def check_if value = nil, &callback
CheckIf.new value || callback
end
@@ -192,6 +213,10 @@ module CodeRay
ValueSetter.new Array(flags), nil
end
+ def continue
+ Continue.new
+ end
+
protected
def make_callback block
diff --git a/lib/coderay/scanners/c2.rb b/lib/coderay/scanners/c2.rb
new file mode 100644
index 0000000..d21e032
--- /dev/null
+++ b/lib/coderay/scanners/c2.rb
@@ -0,0 +1,126 @@
+module CodeRay
+module Scanners
+
+ # Scanner for C.
+ class C2 < RuleBasedScanner
+
+ register_for :c2
+ file_extension 'c'
+
+ KEYWORDS = [
+ 'asm', 'break', 'case', 'continue', 'default', 'do',
+ 'else', 'enum', 'for', 'goto', 'if', 'return',
+ 'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
+ 'restrict', # added in C99
+ ] # :nodoc:
+
+ PREDEFINED_TYPES = [
+ 'int', 'long', 'short', 'char',
+ 'signed', 'unsigned', 'float', 'double',
+ 'bool', 'complex', # added in C99
+ ] # :nodoc:
+
+ PREDEFINED_CONSTANTS = [
+ 'EOF', 'NULL',
+ 'true', 'false', # added in C99
+ ] # :nodoc:
+ DIRECTIVES = [
+ 'auto', 'extern', 'register', 'static', 'void',
+ 'const', 'volatile', # added in C89
+ 'inline', # added in C99
+ ] # :nodoc:
+
+ IDENT_KIND = WordList.new(:ident).
+ add(KEYWORDS, :keyword).
+ add(PREDEFINED_TYPES, :predefined_type).
+ add(DIRECTIVES, :directive).
+ add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
+
+ ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
+
+ protected
+
+ state :initial do
+ on check_if(:in_preproc_line), %r/ \s*? \n \s* /x, :space, flag_off(:in_preproc_line), set(:label_expected, :label_expected_before_preproc_line)
+ on %r/ \s+ | \\\n /x, :space
+
+ on %r/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/(?![\/*])=? | \.(?!\d) /x, :operator, set(:label_expected) { |match, case_expected| match =~ /[;\{\}]/ || case_expected && match =~ /:/ }, flag_off(:case_expected)
+
+ on %r/ (?: case | default ) \b /x, :keyword, flag_on(:case_expected), flag_off(:label_expected)
+ on check_if(:label_expected), check_unless(:in_preproc_line), %r/ [A-Za-z_][A-Za-z_0-9]*+ :(?!:) /x, kind { |match|
+ kind = IDENT_KIND[match.chop]
+ kind == :ident ? :label : kind
+ }, set(:label_expected) { |kind| kind == :label }
+ on %r/ [A-Za-z_][A-Za-z_0-9]* /x, kind { |match| IDENT_KIND[match] }, flag_off(:label_expected)
+
+ on %r/(L)?(")/, push(:string), groups(:modifier, :delimiter)
+
+ on %r/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /x, :char, flag_off(:label_expected)
+ on %r/0[xX][0-9A-Fa-f]+/, :hex, flag_off(:label_expected)
+ on %r/(?:0[0-7]+)(?![89.eEfF])/, :octal, flag_off(:label_expected)
+ on %r/(?:\d+)(?![.eEfF])L?L?/, :integer, flag_off(:label_expected)
+ on %r/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/, :float, flag_off(:label_expected)
+
+ on %r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx, :comment
+ on %r/ \# \s* if \s* 0 /x, -> (match) {
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /mx) unless eos?
+ }, :comment
+ on %r/ \# [ \t]* include\b /x, :preprocessor, flag_on(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected), push_state(:include_expected)
+ on %r/ \# [ \t]* \w* /x, :preprocessor, flag_on(:in_preproc_line), set(:label_expected_before_preproc_line, :label_expected)
+
+ on %r/\$/, :ident
+ end
+
+ state :string do
+ on %r/[^\\\n"]+/, :content
+ on %r/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mx, :char
+ on %r/"/, :delimiter, pop, flag_off(:label_expected)
+ on %r/ \\ /x, pop, :error, flag_off(:label_expected)
+ on %r/ $ /x, pop, flag_off(:label_expected), continue
+ end
+
+ state :include_expected do
+ on %r/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/, :include, pop_state
+ on %r/ \s*? \n \s* /x, :space, pop_state
+ on %r/\s+/, :space
+ on %r//, pop_state, continue # TODO: add otherwise method for this
+ end
+
+ scan_tokens_code = <<-"RUBY"
+ def scan_tokens encoder, options#{ def_line = __LINE__; nil }
+ state = :initial
+ label_expected = true
+ case_expected = false
+ label_expected_before_preproc_line = nil
+ in_preproc_line = false
+
+ states = [state]
+
+ until eos?
+ last_pos = pos
+ case state
+#{ @code.chomp.gsub(/^/, ' ') }
+ else
+ raise_inspect 'Unknown state: %p' % [state], encoder
+ end
+
+ raise_inspect 'nothing was consumed! states = %p' % [states], encoder if pos == last_pos
+ end
+
+ if state == :string
+ encoder.end_group :string
+ end
+
+ encoder
+ end
+ RUBY
+
+ if ENV['PUTS']
+ puts scan_tokens_code
+ puts "callbacks: #{@callbacks.size}"
+ end
+ class_eval scan_tokens_code, __FILE__, def_line
+ end
+end
+end