summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2010-06-15 12:30:02 +0000
committermurphy <murphy@rubychan.de>2010-06-15 12:30:02 +0000
commit8895b2d4655a48ff1c19d3cd2dea1af18e3744fe (patch)
tree0fc17722a83cca4a530f6208d0c1400a8e8cf563 /lib/coderay/scanners
parenta71a17a2ff654b5dd08114ec5188ce39e643ff36 (diff)
downloadcoderay-8895b2d4655a48ff1c19d3cd2dea1af18e3744fe.tar.gz
Ticket #127: Clojure scanner by Licenser, yay!
It's still in development. The Scheme scanner, which Lice used as template, has some very bad code, and both scanner must be cleaned up (see #59).
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r--lib/coderay/scanners/clojure.rb203
1 files changed, 203 insertions, 0 deletions
diff --git a/lib/coderay/scanners/clojure.rb b/lib/coderay/scanners/clojure.rb
new file mode 100644
index 0000000..362387a
--- /dev/null
+++ b/lib/coderay/scanners/clojure.rb
@@ -0,0 +1,203 @@
+module CodeRay
+ module Scanners
+
+ # Clojure scanner by Licenser.
+ class Clojure < Scanner
+
+ register_for :clojure
+ file_extension 'clj'
+
+ SPECIAL_FORMS = %w[
+ def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
+ new
+ ] # :nodoc:
+
+ CORE_FORMS = %w[
+ + - -> ->> .. / * < <= = == > >= accessor aclone add-classpath add-watch
+ agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
+ alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
+ aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
+ assert assoc assoc! assoc-in associative? atom await await-for bases bean
+ bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
+ bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
+ booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
+ char-array char-escape-string char-name-string char? chars class class?
+ clear-agent-errors clojure-version coll? comment commute comp comparator
+ compare compare-and-set! compile complement concat cond condp conj conj!
+ cons constantly construct-proxy contains? count counted? create-ns
+ create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
+ defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
+ deliver denominator deref derive descendants disj disj! dissoc dissoc!
+ distinct distinct? doall doc dorun doseq dosync dotimes doto double
+ double-array doubles drop drop-last drop-while empty empty? ensure
+ enumeration-seq error-handler error-mode eval even? every? extend
+ extend-protocol extend-type extenders extends? false? ffirst file-seq
+ filter find find-doc find-ns find-var first float float-array float?
+ floats flush fn fn? fnext for force format future future-call future-cancel
+ future-cancelled? future-done? future? gen-class gen-interface gensym get
+ get-in get-method get-proxy-class get-thread-bindings get-validator hash
+ hash-map hash-set identical? identity if-let if-not ifn? import in-ns
+ inc init-proxy instance? int int-array integer? interleave intern
+ interpose into into-array ints io! isa? iterate iterator-seq juxt key
+ keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
+ list? load load-file load-reader load-string loaded-libs locking long
+ long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
+ map map? mapcat max max-key memfn memoize merge merge-with meta methods
+ min min-key mod name namespace neg? newline next nfirst nil? nnext not
+ not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
+ ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
+ nthnext num number? numerator object-array odd? or parents partial
+ partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
+ pos? pr pr-str prefer-method prefers print print-namespace-doc
+ print-str printf println println-str prn prn-str promise proxy
+ proxy-mappings proxy-super push-thread-bindings pvalues quot rand
+ rand-int range ratio? rationalize re-find re-groups re-matcher
+ re-matches re-pattern re-seq read read-line read-string reduce ref
+ ref-history-count ref-max-history ref-min-history ref-set refer
+ refer-clojure reify release-pending-sends rem remove remove-all-methods
+ remove-method remove-ns remove-watch repeat repeatedly replace replicate
+ require reset! reset-meta! resolve rest restart-agent resultset-seq
+ reverse reversible? rseq rsubseq satisfies? second select-keys send
+ send-off seq seq? seque sequence sequential? set set-error-handler!
+ set-error-mode! set-validator! set? short short-array shorts
+ shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
+ sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
+ split-at split-with str string? struct struct-map subs subseq subvec
+ supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
+ take-nth take-while test the-ns thread-bound? time to-array to-array-2d
+ trampoline transient tree-seq true? type unchecked-add unchecked-dec
+ unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
+ unchecked-remainder unchecked-subtract underive update-in update-proxy
+ use val vals var-get var-set var? vary-meta vec vector vector-of vector?
+ when when-first when-let when-not while with-bindings with-bindings*
+ with-in-str with-local-vars with-meta with-open with-out-str
+ with-precision xml-seq zero? zipmap
+ ] # :nodoc:
+ PREDEFINED_CONSTANTS = %w[
+ true false *1 *2 *3 *agent* *clojure-version* *command-line-args*
+ *compile-files* *compile-path* *e *err* *file* *flush-on-newline*
+ *in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
+ *print-readably* *read-eval* *warn-on-reflection*
+ ] # :nodoc:
+
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
+ add(SPECIAL_FORMS, :reserved).
+ add(CORE_FORMS, :reserved).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+ BASIC_IDENTIFYER = /[a-zA-Z$%*\/_+!?&<>\-=][a-zA-Z0-9ยง$&*+!\/_?<>\-\#]*/
+ IDENTIFIER = /(?:[@']?(?:#{BASIC_IDENTIFYER}\.)*#{BASIC_IDENTIFYER}(?:\/#{BASIC_IDENTIFYER})?\.?)|\.\.?/
+ SYMBOL = /::?#{IDENTIFIER}/o
+ DIGIT = /\d/
+ DIGIT10 = DIGIT
+ DIGIT16 = /[0-9a-f]/i
+ DIGIT8 = /[0-7]/
+ DIGIT2 = /[01]/
+ RADIX16 = /\#x/i
+ RADIX8 = /\#o/i
+ RADIX2 = /\#b/i
+ RADIX10 = /\#d/i
+ EXACTNESS = /#i|#e/i
+ SIGN = /[\+-]?/
+ EXP_MARK = /[esfdl]/i
+ EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
+ SUFFIX = /#{EXP}?/
+ PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
+ PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
+ PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
+ PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
+ UINT10 = /#{DIGIT10}+#*/
+ UINT16 = /#{DIGIT16}+#*/
+ UINT8 = /#{DIGIT8}+#*/
+ UINT2 = /#{DIGIT2}+#*/
+ DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
+ UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
+ UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
+ UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
+ UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
+ REAL10 = /#{SIGN}#{UREAL10}/
+ REAL16 = /#{SIGN}#{UREAL16}/
+ REAL8 = /#{SIGN}#{UREAL8}/
+ REAL2 = /#{SIGN}#{UREAL2}/
+ IMAG10 = /i|#{UREAL10}i/
+ IMAG16 = /i|#{UREAL16}i/
+ IMAG8 = /i|#{UREAL8}i/
+ IMAG2 = /i|#{UREAL2}i/
+ COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
+ COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
+ COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
+ COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
+ NUM10 = /#{PREFIX10}?#{COMPLEX10}/
+ NUM16 = /#{PREFIX16}#{COMPLEX16}/
+ NUM8 = /#{PREFIX8}#{COMPLEX8}/
+ NUM2 = /#{PREFIX2}#{COMPLEX2}/
+ NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
+
+ protected
+
+ def scan_tokens encoder, options
+
+ state = :initial
+ ident_kind = IDENT_KIND
+
+ until eos?
+
+ case state
+ when :initial
+ if match = scan(/ \s+ | \\\n | , /x)
+ encoder.text_token match, :space
+ elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|@/)
+ encoder.text_token match, :operator # FIXME: was :operator_fat
+ elsif match = scan(/;.*/)
+ encoder.text_token match, :comment
+ elsif match = scan(/\#\\(?:newline|space|.?)/)
+ encoder.text_token match, :char
+ elsif match = scan(/\#[ft]/)
+ encoder.text_token match, :pre_constant
+ elsif match = scan(/#{IDENTIFIER}/o)
+ encoder.text_token match, ident_kind[matched]
+ elsif match = scan(/#{SYMBOL}/o)
+ encoder.text_token match, :symbol
+ elsif match = scan(/\./)
+ encoder.text_token match, :operator
+ elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
+ encoder.text_token match, :type
+ elsif match = scan(/ (\#)? " /x)
+ state = self[1] ? :regexp : :string
+ encoder.begin_group state
+ encoder.text_token match, :delimiter
+ elsif match = scan(/#{NUM}/o) and not matched.empty?
+ encoder.text_token match, match[/[.e]/i] ? :float : :integer
+ else
+ encoder.text_token getch, :error
+ end
+
+ when :string, :regexp
+ if match = scan(/[^"\\]+|\\.?/)
+ encoder.text_token match, :content
+ elsif match = scan(/"/)
+ encoder.text_token match, :delimiter
+ encoder.end_group state
+ state = :initial
+ else
+ raise_inspect "else case \" reached; %p not handled." % peek(1),
+ encoder, state
+ end
+
+ else
+ raise 'else case reached'
+
+ end
+
+ end
+
+ if [:string, :regexp].include? state
+ encoder.end_group state
+ end
+
+ encoder
+
+ end
+ end
+ end
+end \ No newline at end of file