summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2008-09-16 22:07:44 +0000
committermurphy <murphy@rubychan.de>2008-09-16 22:07:44 +0000
commite46b6cbe74db250d743e7f194bfc7514529a82cc (patch)
tree851e2669d010cddffcad61189055282360cab2fc /lib/coderay/scanners
parente7b3056418e54db4361b053a7af030d83e15acf8 (diff)
downloadcoderay-e46b6cbe74db250d743e7f194bfc7514529a82cc.tar.gz
New: *JavaScript Scanner* (closes #23).
* It's quite good and fast, but still a beta. * I included Prototype and script.aculo.us as example code for testing. More changes: * Added two new token classes, :keyword and :key, along with CSS styles. ** Actually, we should use :keyword for most scanners that now use :reserved. * HTML Encoder: The CSS parser understands multiple selectors separated by commas.
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r--lib/coderay/scanners/java_script.rb191
1 files changed, 191 insertions, 0 deletions
diff --git a/lib/coderay/scanners/java_script.rb b/lib/coderay/scanners/java_script.rb
new file mode 100644
index 0000000..f13c5d3
--- /dev/null
+++ b/lib/coderay/scanners/java_script.rb
@@ -0,0 +1,191 @@
+module CodeRay
+module Scanners
+
+ class JavaScript < Scanner
+
+ include Streamable
+
+ register_for :java_script
+ file_extension 'js'
+
+ # The actual JavaScript keywords.
+ KEYWORDS = %w[
+ break case catch continue default delete do else
+ false finally for function if in instanceof new null
+ return switch throw true try typeof var void while with
+ ]
+
+ MAGIC_VARIABLES = %w[ this arguments ] # arguments was introduced in JavaScript 1.4
+
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
+ case delete in instanceof new return throw typeof while with
+ ]
+
+ # Reserved for future use.
+ RESERVED_WORDS = %w[
+ abstract boolean byte char class debugger double enum export extends
+ final float goto implements import int interface long native package
+ private protected public short static super synchronized throws transient
+ volatile
+ ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(MAGIC_VARIABLES, :local_variable).
+ add(KEYWORDS, :keyword)
+
+ ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
+ REGEXP_ESCAPE = / [bBdDsSwW] /x
+ STRING_CONTENT_PATTERN = {
+ "'" => /[^\\']+/,
+ '"' => /[^\\"]+/,
+ '/' => /[^\\\/]+/,
+ }
+
+ def scan_tokens tokens, options
+
+ state = :initial
+ string_delimiter = nil
+ value_expected = true
+ key_expected = false
+
+ until eos?
+
+ kind = nil
+ match = nil
+
+ case state
+
+ when :initial
+
+ if match = scan(/ \s+ | \\\n /x)
+ value_expected = true if !value_expected && match.index(?\n)
+ tokens << [match, :space]
+ next
+
+ elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+ value_expected = true
+ kind = :comment
+
+ elsif scan(/[+-]?(?:\d+)(?![.eEfF])/)
+ value_expected = false
+ kind = :integer
+
+ elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
+ value_expected = true
+ last_operator = match[-1]
+ key_expected = (last_operator == ?{) || (last_operator == ?,)
+ kind = :operator
+
+ elsif scan(/ [)\]}]+ /x)
+ key_expected = value_expected = false
+ kind = :operator
+
+ elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
+ kind = IDENT_KIND[match]
+ value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
+ if kind == :ident
+ if match.index(?$)
+ kind = :predefined
+ elsif key_expected && check(/\s*:/)
+ kind = :key
+ end
+ end
+ key_expected = false
+
+ elsif match = scan(/["']/)
+ tokens << [:open, :string]
+ state = :string
+ string_delimiter = match
+ kind = :delimiter
+
+ elsif value_expected && (match = scan(/\/(?=\S)/))
+ tokens << [:open, :regexp]
+ state = :regexp
+ string_delimiter = '/'
+ kind = :delimiter
+
+ elsif scan(/ \/ /x)
+ value_expected = true
+ key_expected = false
+ kind = :operator
+
+ elsif scan(/0[xX][0-9A-Fa-f]+/)
+ key_expected = value_expected = false
+ kind = :hex
+
+ elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
+ key_expected = value_expected = false
+ kind = :oct
+
+ elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ key_expected = value_expected = false
+ kind = :float
+
+ else
+ getch
+ kind = :error
+
+ end
+
+ when :string, :regexp
+ if scan(STRING_CONTENT_PATTERN[string_delimiter])
+ kind = :content
+ elsif match = scan(/["'\/]/)
+ tokens << [match, :delimiter]
+ if state == :regexp
+ modifiers = scan(/[gim]+/)
+ tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
+ end
+ tokens << [:close, state]
+ string_delimiter = nil
+ key_expected = value_expected = false
+ state = :initial
+ next
+ elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
+ if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
+ kind = :content
+ else
+ kind = :char
+ end
+ elsif state == :regexp && scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ kind = :char
+ elsif scan(/\\./m)
+ kind = :content
+ elsif scan(/ \\ | $ /x)
+ tokens << [:close, :delimiter]
+ kind = :error
+ key_expected = value_expected = false
+ state = :initial
+ else
+ raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+ end
+
+ else
+ raise_inspect 'Unknown state', tokens
+
+ end
+
+ match ||= matched
+ if $DEBUG and not kind
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens
+ end
+ raise_inspect 'Empty token', tokens unless match
+
+ tokens << [match, kind]
+
+ end
+
+ if [:string, :regexp].include? state
+ tokens << [:close, state]
+ end
+
+ tokens
+ end
+
+ end
+
+end
+end