diff options
Diffstat (limited to 'etc/todo/scanners')
-rw-r--r-- | etc/todo/scanners/applescript-sebastian.rb | 219 | ||||
-rw-r--r-- | etc/todo/scanners/avrasm.rb | 153 | ||||
-rw-r--r-- | etc/todo/scanners/css.rb | 170 | ||||
-rw-r--r-- | etc/todo/scanners/javascript.rb | 199 | ||||
-rw-r--r-- | etc/todo/scanners/lisp.rb | 102 | ||||
-rw-r--r-- | etc/todo/scanners/paste-333 (DIFF).rb | 88 | ||||
-rw-r--r-- | etc/todo/scanners/paste-693 (IO).rb | 134 | ||||
-rw-r--r-- | etc/todo/scanners/php.rb | 282 | ||||
-rw-r--r-- | etc/todo/scanners/sql.rb | 138 | ||||
-rw-r--r-- | etc/todo/scanners/vhdl.rb | 132 |
10 files changed, 1617 insertions, 0 deletions
diff --git a/etc/todo/scanners/applescript-sebastian.rb b/etc/todo/scanners/applescript-sebastian.rb new file mode 100644 index 0000000..ec29091 --- /dev/null +++ b/etc/todo/scanners/applescript-sebastian.rb @@ -0,0 +1,219 @@ +# Scanner for AppleScript Created by Sebastian Yepes F. +# Web: http://sebastian.yepes.in +# e-Mail: sebastian@yepes.in + +module CodeRay +module Scanners + + class AppleScript < Scanner + + register_for :applescript + + RESERVED_WORDS = [ + '#include', 'for', 'foreach', 'if', 'elseif', 'else', 'while', 'do', 'dowhile', 'end', + 'switch', 'case', 'return', 'break', 'continue', 'in', 'to', 'of', 'repeat', 'tell', 'then','as' + ] + + KEYWORD_OPERATOR = [ + 'is greater than', 'comes after', 'is less than', 'comes before', + 'is greater than or equal to', 'is less than or equal to', 'is equal to', + 'is', 'is not equal to', 'is not', 'contains', 'does not contain', 'is in', + 'is not in', 'starts with', 'ends with' + ] + + + DIRECTIVES = [ + 'activate', '#endinitclip', '#initclip', '__proto__', '_accProps', '_alpha', '_currentframe', + '_droptarget', '_focusrect', '_framesloaded', '_height', '_highquality', '_lockroot', + '_name', '_parent', '_quality', '_root', '_rotation', '_soundbuftime', '_target', '_totalframes', + '_url', '_visible', '_width', '_x', '_xmouse', '_xscale', '_y', '_ymouse', '_yscale', 'abs', + 'Accessibility', 'acos', 'activityLevel', 'add', 'addListener', 'addPage', 'addProperty', + 'addRequestHeader', 'align', 'allowDomain', 'allowInsecureDomain', 'and', 'appendChild', + 'apply', 'Arguments', 'Array', 'asfunction', 'asin', 'atan', 'atan2', 'attachAudio', 'attachMovie', + 'attachSound', 'attachVideo', 'attributes', 'autosize', 'avHardwareDisable', 'background', + 'backgroundColor', 'BACKSPACE', 'bandwidth', 'beginFill', 'beginGradientFill', 'blockIndent', + 'bold', 'Boolean', 'border', 'borderColor', 'bottomScroll', 'bufferLength', 'bufferTime', + 'builtInItems', 'bullet', 'Button', 'bytesLoaded', 'bytesTotal', 'call', 'callee', 'caller', + 'Camera', 'capabilities', 'CAPSLOCK', 'caption', 'catch', 'ceil', 'charAt', 'charCodeAt', + 'childNodes', 'chr', 'clear', 'clearInterval', 'cloneNode', 'close', 'Color', 'concat', + 'connect', 'condenseWhite', 'constructor', 'contentType', 'ContextMenu', 'ContextMenuItem', + 'CONTROL', 'copy', 'cos', 'createElement', 'createEmptyMovieClip', 'createTextField', + 'createTextNode', 'currentFps', 'curveTo', 'CustomActions', 'customItems', 'data', 'Date', + 'deblocking', 'delete', 'DELETEKEY', 'docTypeDecl', 'domain', 'DOWN', + 'duplicateMovieClip', 'duration', 'dynamic', 'E', 'embedFonts', 'enabled', + 'endFill', 'ENTER', 'eq', 'Error', 'ESCAPE(Konstante)', 'escape(Funktion)', 'eval', + 'exactSettings', 'exp', 'extends', 'finally', 'findText', 'firstChild', 'floor', + 'flush', 'focusEnabled', 'font', 'fps', 'fromCharCode', 'fscommand', + 'gain', 'ge', 'get', 'getAscii', 'getBeginIndex', 'getBounds', 'getBytesLoaded', 'getBytesTotal', + 'getCaretIndex', 'getCode', 'getCount', 'getDate', 'getDay', 'getDepth', 'getEndIndex', 'getFocus', + 'getFontList', 'getFullYear', 'getHours', 'getInstanceAtDepth', 'getLocal', 'getMilliseconds', + 'getMinutes', 'getMonth', 'getNewTextFormat', 'getNextHighestDepth', 'getPan', 'getProgress', + 'getProperty', 'getRGB', 'getSeconds', 'getSelected', 'getSelectedText', 'getSize', 'getStyle', + 'getStyleNames', 'getSWFVersion', 'getText', 'getTextExtent', 'getTextFormat', 'getTextSnapshot', + 'getTime', 'getTimer', 'getTimezoneOffset', 'getTransform', 'getURL', 'getUTCDate', 'getUTCDay', + 'getUTCFullYear', 'getUTCHours', 'getUTCMilliseconds', 'getUTCMinutes', 'getUTCMonth', 'getUTCSeconds', + 'getVersion', 'getVolume', 'getYear', 'globalToLocal', 'goto', 'gotoAndPlay', 'gotoAndStop', + 'hasAccessibility', 'hasAudio', 'hasAudioEncoder', 'hasChildNodes', 'hasEmbeddedVideo', 'hasMP3', + 'hasPrinting', 'hasScreenBroadcast', 'hasScreenPlayback', 'hasStreamingAudio', 'hasStreamingVideo', + 'hasVideoEncoder', 'height', 'hide', 'hideBuiltInItems', 'hitArea', 'hitTest', 'hitTestTextNearPos', + 'HOME', 'hscroll', 'html', 'htmlText', 'ID3', 'ifFrameLoaded', 'ignoreWhite', 'implements', + 'import', 'indent', 'index', 'indexOf', 'Infinity', '-Infinity', 'INSERT', 'insertBefore', 'install', + 'instanceof', 'int', 'interface', 'isActive', 'isDebugger', 'isDown', 'isFinite', 'isNaN', 'isToggled', + 'italic', 'join', 'Key', 'language', 'lastChild', 'lastIndexOf', 'le', 'leading', 'LEFT', 'leftMargin', + 'length', 'level', 'lineStyle', 'lineTo', 'list', 'LN10', 'LN2', 'load', 'loadClip', 'loaded', 'loadMovie', + 'loadMovieNum', 'loadSound', 'loadVariables', 'loadVariablesNum', 'LoadVars', 'LocalConnection', + 'localFileReadDisable', 'localToGlobal', 'log', 'LOG10E', 'LOG2E', 'manufacturer', 'Math', 'max', + 'MAX_VALUE', 'maxChars', 'maxhscroll', 'maxscroll', 'mbchr', 'mblength', 'mbord', 'mbsubstring', 'menu', + 'message', 'Microphone', 'min', 'MIN_VALUE', 'MMExecute', 'motionLevel', 'motionTimeOut', 'Mouse', + 'mouseWheelEnabled', 'moveTo', 'Movieclip', 'MovieClipLoader', 'multiline', 'muted', 'name', 'names', 'NaN', + 'ne', 'NEGATIVE_INFINITY', 'NetConnection', 'NetStream', 'newline', 'nextFrame', + 'nextScene', 'nextSibling', 'nodeName', 'nodeType', 'nodeValue', 'not', 'Number', 'Object', + 'on', 'onActivity', 'onChanged', 'onClipEvent', 'onClose', 'onConnect', 'onData', 'onDragOut', + 'onDragOver', 'onEnterFrame', 'onID3', 'onKeyDown', 'onKeyUp', 'onKillFocus', 'onLoad', 'onLoadComplete', + 'onLoadError', 'onLoadInit', 'onLoadProgress', 'onLoadStart', 'onMouseDown', 'onMouseMove', 'onMouseUp', + 'onMouseWheel', 'onPress', 'onRelease', 'onReleaseOutside', 'onResize', 'onRollOut', 'onRollOver', + 'onScroller', 'onSelect', 'onSetFocus', 'onSoundComplete', 'onStatus', 'onUnload', 'onUpdate', 'onXML', + 'or(logischesOR)', 'ord', 'os', 'parentNode', 'parseCSS', 'parseFloat', 'parseInt', 'parseXML', 'password', + 'pause', 'PGDN', 'PGUP', 'PI', 'pixelAspectRatio', 'play', 'playerType', 'pop', 'position', + 'POSITIVE_INFINITY', 'pow', 'prevFrame', 'previousSibling', 'prevScene', 'print', 'printAsBitmap', + 'printAsBitmapNum', 'PrintJob', 'printNum', 'private', 'prototype', 'public', 'push', 'quality', + 'random', 'rate', 'registerClass', 'removeListener', 'removeMovieClip', 'removeNode', 'removeTextField', + 'replaceSel', 'replaceText', 'resolutionX', 'resolutionY', 'restrict', 'reverse', 'RIGHT', + 'rightMargin', 'round', 'scaleMode', 'screenColor', 'screenDPI', 'screenResolutionX', 'screenResolutionY', + 'scroll', 'seek', 'selectable', 'Selection', 'send', 'sendAndLoad', 'separatorBefore', 'serverString', + 'set', 'setvariable', 'setBufferTime', 'setClipboard', 'setDate', 'setFocus', 'setFullYear', 'setGain', + 'setHours', 'setInterval', 'setMask', 'setMilliseconds', 'setMinutes', 'setMode', 'setMonth', + 'setMotionLevel', 'setNewTextFormat', 'setPan', 'setProperty', 'setQuality', 'setRate', 'setRGB', + 'setSeconds', 'setSelectColor', 'setSelected', 'setSelection', 'setSilenceLevel', 'setStyle', + 'setTextFormat', 'setTime', 'setTransform', 'setUseEchoSuppression', 'setUTCDate', 'setUTCFullYear', + 'setUTCHours', 'setUTCMilliseconds', 'setUTCMinutes', 'setUTCMonth', 'setUTCSeconds', 'setVolume', + 'setYear', 'SharedObject', 'SHIFT(Konstante)', 'shift(Methode)', 'show', 'showMenu', 'showSettings', + 'silenceLevel', 'silenceTimeout', 'sin', 'size', 'slice', 'smoothing', 'sort', 'sortOn', 'Sound', 'SPACE', + 'splice', 'split', 'sqrt', 'SQRT1_2', 'SQRT2', 'Stage', 'start', 'startDrag', 'static', 'status', 'stop', + 'stopAllSounds', 'stopDrag', 'StyleSheet(Klasse)', 'styleSheet(Eigenschaft)', 'substr', + 'substring', 'super', 'swapDepths', 'System', 'TAB', 'tabChildren', 'tabEnabled', 'tabIndex', + 'tabStops', 'tan', 'target', 'targetPath', 'tellTarget', 'text', 'textColor', 'TextField', 'TextFormat', + 'textHeight', 'TextSnapshot', 'textWidth', 'this', 'throw', 'time', 'toggleHighQuality', 'toLowerCase', + 'toString', 'toUpperCase', 'trace', 'trackAsMenu', 'try', 'type', 'typeof', 'undefined', + 'underline', 'unescape', 'uninstall', 'unloadClip', 'unloadMovie', 'unLoadMovieNum', 'unshift', 'unwatch', + 'UP', 'updateAfterEvent', 'updateProperties', 'url', 'useCodePage', 'useEchoSuppression', 'useHandCursor', + 'UTC', 'valueOf', 'variable', 'version', 'Video', 'visible', 'void', 'watch', 'width', + 'with', 'wordwrap', 'XML', 'xmlDecl', 'XMLNode', 'XMLSocket' + ] + + PREDEFINED_TYPES = [ + 'boolean', 'small integer', 'integer', 'double integer', + 'small real', 'real', 'date','list', 'record', 'string', 'class' + ] + + PREDEFINED_CONSTANTS = [ + 'pi', 'true', 'false', + 'application responses', 'case', 'diacriticals', 'expansion', 'hyphens', 'punctuation', 'white space', + 'seconds', 'minutes', 'hours', 'days', 'weeks', + 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun', + 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', + 'anything', 'current application', 'it', 'me', 'missing value', 'my', 'result', + 'yes', 'no', 'ask', + 'return', 'space', 'tab', + 'all caps', 'all lowercase', 'bold', 'condensed', 'expanded', 'hidden', 'italic', 'outline', 'plain', 'shadow', 'small caps', 'strikethrough', 'subscript', 'superscript', 'underline', + 'version' + ] + + PLAIN_STRING_CONTENT = { + "'" => /[^'\n]+/, + '"' => /[^"\n]+/, + } + + + IDENT_KIND = CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(KEYWORD_OPERATOR, :operator). + add(DIRECTIVES, :directive). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + + + def scan_tokens tokens, options + + state = :initial + plain_string_content = @plain_string_content + + until eos? + + kind = nil + match = nil + + if state == :initial + + if scan(/\s+/x) + kind = :space + + elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) + kind = :preprocessor + + elsif scan(/^[\s\t]*--.*/x) + kind = :comment + elsif scan(/\(\* (?: .*? \*\)$ | .* )/mx) + kind = :comment + + elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + + elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) + tokens << [:open, :char] + tokens << ["'", :delimiter] + tokens << [self[1], :content] + tokens << ["'", :delimiter] + tokens << [:close, :char] + next + + elsif match = scan(/["']/) + tokens << [:open, :string] + state = :string + plain_string_content = PLAIN_STRING_CONTENT[match] + kind = :delimiter + + else + kind = :plain + getch + + end + + elsif state == :string + if scan(plain_string_content) + kind = :content + elsif scan(/['"]/) + tokens << [matched, :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ | $ /x) + tokens << [:close, :string] + kind = :error + state = :initial + end + + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + tokens << [match, kind] + + end + tokens + end + + end + +end +end
\ No newline at end of file diff --git a/etc/todo/scanners/avrasm.rb b/etc/todo/scanners/avrasm.rb new file mode 100644 index 0000000..b3fc28d --- /dev/null +++ b/etc/todo/scanners/avrasm.rb @@ -0,0 +1,153 @@ +module CodeRay +module Scanners + + class AVRASM < Scanner + + register_for :avrasm + + RESERVED_WORDS = [ + ] + + PREDEFINED_TYPES = [ + ] + + PREDEFINED_CONSTANTS = [ + ] + + IDENT_KIND = CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(/;.*/x) + kind = :comment + + elsif scan(/\.(\w*)/x) + kind = :preprocessor + state = :include_expected if self[1] == 'include' + + elsif scan(/@[0-9]+/) + kind = :preprocessor + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif scan(/r[0-9]+/i) + # register R0-R31 + kind = :pre_constant + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + match << scan(/:/) + kind = :label + end + + elsif match = scan(/"/) + tokens << [:open, :string] + state = :string + kind = :delimiter + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :integer + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :integer + + elsif scan(/0[bB][0-9A-Fa-f]+/) + kind = :integer + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + kind = :error + + end + + when :string + if scan(/[^\\\n"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + tokens << [:close, :string] + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + when :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + getch + kind = :error + + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + if state == :string + tokens << [:close, :string] + end + + tokens + end + + end + +end +end diff --git a/etc/todo/scanners/css.rb b/etc/todo/scanners/css.rb new file mode 100644 index 0000000..f1072f1 --- /dev/null +++ b/etc/todo/scanners/css.rb @@ -0,0 +1,170 @@ +module CodeRay +module Scanners + + class Css < Scanner + + register_for :css + + module RE + NonASCII = /[\x80-\xFF]/ + Hex = /[0-9a-fA-F]/ + Unicode = /\\#{Hex}{1,6}(?:\r\n|\s)?/ # differs from standard because it allows uppercase hex too + Escape = /#{Unicode}|\\[^\r\n\f0-9a-fA-F]/ + NMChar = /[_a-zA-Z0-9-]|#{NonASCII}|#{Escape}/ + NMStart = /[_a-zA-Z]|#{NonASCII}|#{Escape}/ + NL = /\r\n|\r|\n|\f/ + String1 = /"(?:[^\n\r\f\\"]|\\#{NL}|#{Escape})*"/ + String2 = /'(?:[^\n\r\f\\']|\\#{NL}|#{Escape})*'/ + String = /#{String1}|#{String2}/ + Invalid1 = /"(?:[^\n\r\f\\"]|\\#{NL}|#{Escape})*/ + Invalid2 = /'(?:[^\n\r\f\\']|\\#{NL}|#{Escape})*/ + Invalid = /#{Invalid1}|#{Invalid2}/ + W = /\s+/ + S = W + + HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/ + Color = /#{HexColor}/ + + Num = /-?(?:[0-9]+|[0-9]*\.[0-9]+)/ + Name = /#{NMChar}+/ + Ident = /-?#{NMStart}#{NMChar}*/ + AtKeyword = /@#{Ident}/ + Percentage = /#{Num}%/ + + reldimensions = %w[em ex px] + absdimensions = %w[in cm mm pt pc] + Unit = /#{(reldimensions + absdimensions).join('|')}/ + + Dimension = /#{Num}#{Unit}/ + + Comment = %r! /\* (?: .*? \*/ | .* ) !mx + URL = /url\((?:[^)\n\r\f]|\\\))*\)/ + + + Id = /##{Name}/ + Class = /\.#{Name}/ + + end + + def scan_tokens tokens, options + states = [:initial] + i = 0 + until eos? + + kind = nil + match = nil + + if states.last == :comment + if scan /(?:[^\n\r\f*]|\*(?!\/))+/ + kind = :comment + + elsif scan /\*\// + kind = :comment + states.pop + + elsif scan RE::S + kind = :space + end + + elsif scan RE::S + kind = :space + + elsif scan /\/\*/ + kind = :comment + states.push :comment + + elsif scan RE::String + kind = :string + + elsif scan RE::AtKeyword + kind = :reserved + + elsif scan RE::Invalid + kind = :error + + elsif scan RE::URL + kind = :string + + elsif scan RE::Dimension + kind = :float + + elsif scan RE::Percentage + kind = :float + + elsif scan RE::Num + kind = :float + + elsif scan /\{/ + kind = :operator + states.push :block + + elsif scan /\}/ + if states.last == :block + kind = :operator + states.pop + else + kind = :error + end + + elsif + case states.last + when :initial + + if scan RE::Class + kind = :class + + elsif scan RE::Id + kind = :constant + + elsif scan RE::Ident + kind = :label + + elsif scan RE::Name + kind = :identifier + + end + + when :block + if scan RE::Color + kind = :color + + elsif scan RE::Ident + kind = :definition + + elsif scan RE::Name + kind = :symbol + + end + + else + raise_inspect 'Unknown state', tokens + + end + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + else + getch + kind = :error + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + tokens + end + + end + +end +end diff --git a/etc/todo/scanners/javascript.rb b/etc/todo/scanners/javascript.rb new file mode 100644 index 0000000..da67084 --- /dev/null +++ b/etc/todo/scanners/javascript.rb @@ -0,0 +1,199 @@ +module CodeRay +module Scanners + + # Basic Javascript scanner + class Javascript < Scanner + + include Streamable + + register_for :javascript + + helper :patterns + + DEFAULT_OPTIONS = { + } + + private + def scan_tokens tokens, options + first_bake = saved_tokens = nil + last_token_dot = false + last_state = nil + state = :initial + depth = nil + inline_block_stack = [] + + patterns = Patterns # avoid constant lookup + + until eos? + match = nil + kind = nil + + if state.instance_of? patterns::StringState +# {{{ + match = scan_until(state.pattern) || scan_until(/\z/) + tokens << [match, :content] unless match.empty? + break if eos? + + case match = getch + + when state.delim + if state.paren + state.paren_depth -= 1 + if state.paren_depth > 0 + tokens << [match, :nesting_delimiter] + next + end + end + tokens << [match, :delimiter] + tokens << [:close, state.type] + state = state.next_state + + when '\\' + if state.interpreted + if esc = scan(/ #{patterns::ESCAPE} /ox) + tokens << [match + esc, :char] + else + tokens << [match, :error] + end + else + case m = getch + when state.delim, '\\' + tokens << [match + m, :char] + when nil + tokens << [match, :error] + else + tokens << [match + m, :content] + end + end + + when '#' + case peek(1)[0] + when ?{ + inline_block_stack << [state, depth] + state = :initial + depth = 1 + tokens << [:open, :inline] + tokens << [match + getch, :delimiter] + when ?$, ?@ + tokens << [match, :escape] + last_state = state # scan one token as normal code, then return here + state = :initial + else + raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens + end + + when state.paren + state.paren_depth += 1 + tokens << [match, :nesting_delimiter] + + else + raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens + + end + next +# }}} + else +# {{{ + if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) + case m = match[0] + when ?\s, ?\t, ?\f + match << scan(/\s*/) unless eos? + kind = :space + when ?\n, ?\\ + kind = :space + match << scan(/\s*/) unless eos? + when ?#, ?=, ?_ + kind = :comment + else + raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens + end + tokens << [match, kind] + next + + elsif state == :initial + + # IDENTS # + if match = scan(/#{patterns::METHOD_NAME}/o) + kind = last_token_dot ? :ident : + patterns::IDENT_KIND[match] + + # OPERATORS # + elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or + (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) + last_token_dot = :set if match == '.' or match == '::' + kind = :operator + unless inline_block_stack.empty? + case match + when '{' + depth += 1 + when '}' + depth -= 1 + if depth == 0 # closing brace of inline block reached + state, depth = inline_block_stack.pop + tokens << [match, :delimiter] + kind = :inline + match = :close + end + end + end + + elsif match = scan(/ ['"] /mx) + tokens << [:open, :string] + kind = :delimiter + state = patterns::StringState.new :string, match == '"', match # important for streaming + + elsif match = scan(/#{patterns::NUMERIC}/o) + kind = if self[1] then :float else :integer end + + elsif match = scan(/ \+\+ | -- | << | >> /x) + kind = :operator + + elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) + kind = :operator + + elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) + kind = :operator + + else + kind = :error + match = getch + + end + + end +# }}} + + last_token_dot = last_token_dot == :set + + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens, state + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + if last_state + state = last_state + last_state = nil + end + end + end + + inline_block_stack << [state] if state.is_a? patterns::StringState + until inline_block_stack.empty? + this_block = inline_block_stack.pop + tokens << [:close, :inline] if this_block.size > 1 + state = this_block.first + tokens << [:close, state.type] + end + + tokens + end + + end + +end +end + +# vim:fdm=marker diff --git a/etc/todo/scanners/lisp.rb b/etc/todo/scanners/lisp.rb new file mode 100644 index 0000000..73ce0da --- /dev/null +++ b/etc/todo/scanners/lisp.rb @@ -0,0 +1,102 @@ +# By Nathan Weizenbaum (http://nex3.leeweiz.net) +# MIT License (http://www.opensource.org/licenses/mit-license.php) +# +# CodeRay scanner for Lisp. +# The keywords are mostly geared towards Emacs Lisp, +# but it should work fine for Common Lisp +# and reasonably well for Scheme. + +require 'rubygems' +require 'coderay' + +module CodeRay::Scanners + class Lisp < Scanner + register_for :lisp + + NON_SYMBOL_CHARS = '();\s\[\]' + SYMBOL_RE = /[^#{NON_SYMBOL_CHARS}]+/ + EXPONENT_RE = /(e[\-+]?[0-9]+)?/ + + GEN_DEFINES = %w{ + defun defun* defsubst defmacro defadvice define-skeleton define-minor-mode + define-global-minor-mode define-globalized-minor-mode define-derived-mode + define-generic-mode define-compiler-macro define-modify-macro defsetf + define-setf-expander define-method-combination defgeneric defmethod + } + TYPE_DEFINES = %w{ + defgroup deftheme deftype defstruct defclass define-condition + define-widget defface defpackage + } + VAR_DEFINES = %w{ + defvar defconst defconstant defcustom defparameter define-symbol-macro + } + KEYWORDS = (GEN_DEFINES + TYPE_DEFINES + VAR_DEFINES + %w{ + lambda autoload progn prog1 prog2 save-excursion save-window-excursion + save-selected-window save-restriction save-match-data save-current-buffer + with-current-buffer combine-after-change-calls with-output-to-string + with-temp-file with-temp-buffer with-temp-message with-syntax-table let + let* while if read-if catch condition-case unwind-protect + with-output-to-temp-buffer eval-after-load dolist dotimes when unless + }).inject({}) { |memo, str| memo[str] = nil; memo } + + DEFINES = WordList.new. + add(GEN_DEFINES, :function). + add(TYPE_DEFINES, :class). + add(VAR_DEFINES, :variable) + + def scan_tokens(tokens, options) + defined = false + until eos? + kind = nil + match = nil + + if scan(/\s+/m) + kind = :space + else + if scan(/[\(\)\[\]]/) + kind = :delimiter + elsif scan(/'+#{SYMBOL_RE}/) + kind = :symbol + elsif scan(/\&#{SYMBOL_RE}/) + kind = :reserved + elsif scan(/:#{SYMBOL_RE}/) + kind = :constant + elsif scan(/\?#{SYMBOL_RE}/) + kind = :char + elsif match = scan(/"(\\"|[^"])+"/m) + tokens << [:open, :string] << ['"', :delimiter] << + [match[1...-1], :content] << ['"', :delimiter] << [:close, :string] + next + elsif scan(/[\-+]?[0-9]*\.[0-9]+#{EXPONENT_RE}/) + kind = :float + elsif scan(/[\-+]?[0-9]+#{EXPONENT_RE}/) + kind = :integer + elsif scan(/;.*$/) + kind = :comment + elsif scan(SYMBOL_RE) + kind = :plain + + if defined + kind = defined + else + sym = matched + if KEYWORDS.include? sym + kind = :reserved + defined = DEFINES[sym] + end + end + end + end + + match ||= matched + raise_inspect 'Empty token', tokens unless match + + defined = [:reserved, :comment, :space].include?(kind) && defined + + tokens << [match, kind] + end + + tokens + end + end +end
\ No newline at end of file diff --git a/etc/todo/scanners/paste-333 (DIFF).rb b/etc/todo/scanners/paste-333 (DIFF).rb new file mode 100644 index 0000000..e6e1dff --- /dev/null +++ b/etc/todo/scanners/paste-333 (DIFF).rb @@ -0,0 +1,88 @@ +## diff.rb + +module CodeRay module Scanners + + class Diff < Scanner + + register_for :diff + + def scan_tokens tokens, options + + until eos? + + kind = :space + match = nil + + # remove newlines + if scan(/\n/) + kind = :space + elsif scan(/^[+-]{3} .*$/) + kind = :diffhead + elsif scan(/^[+].*$/) + kind = :add + elsif scan(/^[-].*$/) + kind = :delete + elsif scan(/^[^ ].*$/) + kind = :diffhead + elsif scan(/^ .*$/) + kind = :space + else + getch + end + + match ||= matched + raise [match, kind], tokens if kind == :error + + tokens << [match, kind] + + end + + tokens + end + + end + +end end + +## styles (cycnus) [plain] + +.add { color:green; background:#dfd; } +.delete { color:red; background:#fdd; } +.diffhead { color:#999; background: #e7e7ff; } + +## tokens (encoder/html/classes.rb) + + ClassOfKind = { + :add => "add", + :delete => "delete", + :diffhead => "diffhead", + +## example diff [diff] +Index: /Users/jgoebel/rails/pastie/app/controllers/pastes_controller.rb +=================================================================== +--- /Users/jgoebel/rails/pastie/app/controllers/pastes_controller.rb (revision 1431) ++++ /Users/jgoebel/rails/pastie/app/controllers/pastes_controller.rb (revision 1437) +@@ -1,6 +1,10 @@ ++require 'login_system' + require 'coderay' + + class PastesController < ApplicationController ++ include LoginSystem ++ ++ before_filter :attempt_cookie_login + + # caches_action :recent + +@@ -10,11 +14,7 @@ + + def show + @paste = Paste.find(params[:id]) +- if params[:key] and params[:key]==User.new(@paste.nick).magic_mojo +- session[:login]=@paste.nick +- return redirect_to(:action => 'show', :id => @paste.id) +- end +- ++ attempt_key_login if not logged_in? + unless @paste.asset or not @paste.body.blank? + render :action => "edit" + end
\ No newline at end of file diff --git a/etc/todo/scanners/paste-693 (IO).rb b/etc/todo/scanners/paste-693 (IO).rb new file mode 100644 index 0000000..664d893 --- /dev/null +++ b/etc/todo/scanners/paste-693 (IO).rb @@ -0,0 +1,134 @@ +module CodeRay module Scanners + + #A simple scanner for a simple language: Io + + class Io < Scanner + + register_for :io + + RESERVED_WORDS = [ 'clone','init', 'method', 'list', 'vector', 'block', 'if','ifTrue','ifFalse','ifTrueIfFalse','then', 'for','loop', + 'reverseForeach','foreach','map','continue','break','while','do','return', + 'self','sender','target','proto','parent','protos'] + + PREDEFINED_TYPES = [] + + PREDEFINED_CONSTANTS = ['Object', 'Lobby', + 'TRUE','true','FALSE','false','NULL','null','Null','Nil','nil','YES','NO'] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = :error + match = nil + + if state == :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + + elsif scan(/ [-+*\/\$\@=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + match << scan(/:/) + kind = :label + end + + elsif match = scan(/L?"/) + tokens << [:open, :string] + if match[0] == ?L + tokens << ['L', :modifier] + match = '"' + end + state = :string + kind = :delimiter + + elsif scan(/#\s*(\w*)/) + kind = :preprocessor # FIXME multiline preprocs + state = :include_expected if self[1] == 'include' + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + end + + elsif state == :string + if scan(/[^\\"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + kind = :error + state = :initial + else + raise "else case \" reached; %p not handled." % peek(1), tokens + end + + elsif state == :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + getch + + end + + else + raise 'else-case reached', tokens + + end + + match ||= matched + raise [match, kind], tokens if kind == :error + + tokens << [match, kind] + + end + + tokens + end + + end + +end end
\ No newline at end of file diff --git a/etc/todo/scanners/php.rb b/etc/todo/scanners/php.rb new file mode 100644 index 0000000..239fbf8 --- /dev/null +++ b/etc/todo/scanners/php.rb @@ -0,0 +1,282 @@ +class Regexp + def |(other) + Regexp.union(self, other) + end + def +(other) + /#{self}#{other}/ + end +end +module CodeRay +module Scanners + + load :html + + class PHP < Scanner + + register_for :php + + def setup + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def reset_instance + super + @html_scanner.reset + end + + module Words + ControlKeywords = %w! + if else elseif while do for switch case default declare foreach as + endif endwhile endfor endforeach endswitch enddeclare return break + continue exit die try catch throw + ! + OtherKeywords = %w! + function class extends implements instanceof parent self var const + private public protected static abstract final global new echo include + require include_once require_once eval print use unset isset empty + interface list array clone null true false + ! + + SpecialConstants = %w! __LINE__ __FILE__ __CLASS__ + __METHOD__ __FUNCTION__ + ! + IdentKinds = WordList.new(:ident). + add(ControlKeywords, :reserved). + add(OtherKeywords, :pre_type). + add(SpecialConstants, :pre_constant) + end + module RE + def self.build_alternatives(array) + Regexp.new(array.map { |s| Regexp.escape(s) }.join('|') , Regexp::IGNORECASE) + end + + PHPStart = / + <script language="php"> | + <script language='php'> | + <\?php | + <\?(?!xml) | + <% + /xi + + PHPEnd = %r! + </script> | + \?> | + %> + !xi + + IChar = /[a-z0-9_\x80-\xFF]/i + IStart = /[a-z_\x80-\xFF]/i + Identifier = /#{IStart}#{IChar}*/ + Variable = /\$#{Identifier}/ + + Typecasts = build_alternatives %w! + float double real int integer bool boolean string array object null + !.map{|s| "(#{s})"} + OneLineComment1 = %r!//.*?(?=#{PHPEnd}|$)! + OneLineComment2 = %r!#.*?(?=#{PHPEnd}|$)! + OneLineComment = OneLineComment1 | OneLineComment2 + + HereDoc = /<<</ + Identifier + + binops = %w! + + - * / << >> & | ^ . % + ! + + ComparisionOperator = build_alternatives %w$ + === !== == != <= >= + $ + IncDecOperator = build_alternatives %w! ++ -- ! + + BinaryOperator = build_alternatives binops + AssignOperator = build_alternatives binops.map {|s| "${s}=" } + LogicalOperator = build_alternatives %w! and or xor not ! + ObjectOperator = build_alternatives %w! -> :: ! + OtherOperator = build_alternatives %w$ => = ? : [ ] ( ) ; , ~ ! @ > <$ + + Operator = ComparisionOperator | IncDecOperator | LogicalOperator | + ObjectOperator | AssignOperator | BinaryOperator | OtherOperator + + + S = /\s+/ + + Integer = /-?0x[0-9a-fA-F]/ | /-?\d+/ + Float = /-?(?:\d+\.\d*|\d*\.\d+)(?:e[+-]\d+)?/ + + end + + + + def scan_tokens tokens, options + states = [:php, :html] + heredocdelim = nil + + until eos? + match = nil + kind = nil + case states.last + when :html + if scan RE::PHPStart + kind = :delimiter + states.pop + else + match = scan_until(/(?=#{RE::PHPStart})/o) || scan_until(/\z/) + @html_scanner.tokenize match if not match.empty? + kind = :space + match = '' + end + + when :php + if scan RE::PHPEnd + kind = :delimiter + states.push :html + + elsif scan RE::S + kind = :space + + elsif scan /\/\*/ + kind = :comment + states.push :mlcomment + + elsif scan RE::OneLineComment + kind = :comment + + elsif match = scan(RE::Identifier) + kind = Words::IdentKinds[match] + if kind == :ident and check(/:(?!:)/) and tokens[-2][0] == 'case' +# match << scan(/:/) + kind = :label + elsif kind == :ident and match =~ /^[A-Z]/ + kind = :constant + end + + elsif scan RE::Integer + kind = :integer + + elsif scan RE::Float + kind = :float + + elsif scan /'/ + kind = :delimiter + states.push :sqstring + + elsif scan /"/ + kind = :delimiter + states.push :dqstring + + elsif match = scan(RE::HereDoc) + heredocdelim = match[RE::Identifier] + kind = :delimiter + states.push = :heredocstring + + elsif scan RE::Variable + kind = :local_variable + + elsif scan /\{/ + kind = :operator + states.push :php + + elsif scan /\}/ + if states.length == 1 + kind = :error + else + kind = :operator + states.pop + end + + elsif scan RE::Operator + kind = :operator + + else + getch + kind = :error + + end + + when :mlcomment + if scan /(?:[^\n\r\f*]|\*(?!\/))+/ + kind = :comment + + elsif scan /\*\// + kind = :comment + states.pop + + elsif scan /[\r\n\f]+/ + kind = :space + end + + when :sqstring + if scan /[^\r\n\f'\\]+/ + kind = :string + elsif match = scan(/\\\\|\\'/) + kind = :char + elsif scan /\\/ + kind = :string + elsif scan /[\r\n\f ]+/ + kind = :space + elsif scan /'/ + kind = :delimiter + states.pop + end + + when :dqstring +#todo: $foo[bar] kind of stuff + if scan /[^\r\n\f"${\\]+/ + kind = :string + elsif scan /\\x[a-fA-F]{2}/ + kind = :char + elsif scan /\\\d{3}/ + kind = :char + elsif scan /\\["\\abcfnrtyv]/ + kind = :char + elsif scan /\\/ + kind = :string + elsif scan /[\r\n\f]+/ + kind = :space + elsif match = scan(RE::Variable) + kind = :local_variable + if check(/\[#{RE::Identifier}\]/) + match << scan(/\[#{RE::Identifier}\]/) + elsif check(/\[/) + match << scan(/\[#{RE::Identifier}?/) + kind = :error + elsif check(/->#{RE::Identifier}/) + match << scan(/->#{RE::Identifier}/) + end + elsif scan /\{/ + if check(/\$/) + kind = :operator + states.push :php + else + kind = :string + end + match = '{' + elsif scan /\$\{#{RE::Identifier}\}/ + kind = :local_variable + elsif scan /\$/ + kind = :string + elsif scan /"/ + kind = :delimiter + states.pop + end + else + raise "Unknown state!" + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + tokens + + end + + end + +end +end diff --git a/etc/todo/scanners/sql.rb b/etc/todo/scanners/sql.rb new file mode 100644 index 0000000..57b5e8c --- /dev/null +++ b/etc/todo/scanners/sql.rb @@ -0,0 +1,138 @@ +# by Josh Goebel +module CodeRay module Scanners + + class SQL < Scanner + + register_for :sql + + RESERVED_WORDS = [ + 'create','table','index','trigger','drop', + 'primary','key', + 'select','insert','update','vacuum','delete','merge','replace','truncate', + 'into','on','from','values', + 'after','before', + 'and','or', + 'count','min','max','group','order','by','avg', + 'where','join','inner','outer','unique','union', + 'transaction', + 'begin','end', + ] + + PREDEFINED_TYPES = [ + 'char','varchar','enum','set','binary', + 'text','tinytext','mediumtext','longtext', + 'blob','tinyblob','mediumblob','longblob', + 'timestamp','date','time','datetime','year', + 'double','decimal','float', + 'int','integer','tinyint','mediumint','bigint', + 'bit','bool','boolean' + ] + + PREDEFINED_CONSTANTS = [ + 'null', 'true', 'false', 'not' + ] + + SQL_KIND= CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + IDENT_KIND = WordList.new(:ident) + + ESCAPE = / [rbfnrtv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + string_type = nil + + until eos? + + kind = :error + match = nil + + if state == :initial + + if scan(/ ^ -- .* $ /x) + kind = :comment + elsif scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + kind = :comment + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%] | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = SQL_KIND[match.downcase] + kind = IDENT_KIND[match] if kind.nil? + + elsif match = scan(/[`"']/) + tokens << [:open, :string] + string_type = matched + state = :string + kind = :delimiter + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + end + + elsif state == :string + if scan(/[^\\"'`]+/) + kind = :content + elsif scan(/["'`]/) + if string_type==matched + tokens << [matched, :delimiter] + tokens << [:close, :string] + state = :initial + string_type=nil + next + else + kind = :content + end + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :content + elsif scan(/ \\ | $ /x) + kind = :error + state = :initial + else + raise "else case \" reached; %p not handled." % peek(1), tokens + end + + else + raise 'else-case reached', tokens + + end + + match ||= matched +# raise [match, kind], tokens if kind == :error + + tokens << [match, kind] + + end +# RAILS_DEFAULT_LOGGER.info tokens.inspect + tokens + + end + + end + +end end
\ No newline at end of file diff --git a/etc/todo/scanners/vhdl.rb b/etc/todo/scanners/vhdl.rb new file mode 100644 index 0000000..0086001 --- /dev/null +++ b/etc/todo/scanners/vhdl.rb @@ -0,0 +1,132 @@ +module CodeRay +module Scanners + + class VHDL < Scanner + + register_for :vhdl + + RESERVED_WORDS = [ + 'access','after','alias','all','assert','architecture','begin', + 'block','body','buffer','bus','case','component','configuration','constant', + 'disconnect','downto','else','elsif','end','entity','exit','file','for', + 'function','generate','generic','group','guarded','if','impure','in', + 'inertial','inout','is','label','library','linkage','literal','loop', + 'map','new','next','null','of','on','open','others','out','package', + 'port','postponed','procedure','process','pure','range','record','register', + 'reject','report','return','select','severity','signal','shared','subtype', + 'then','to','transport','type','unaffected','units','until','use','variable', + 'wait','when','while','with','note','warning','error','failure','and', + 'or','xor','not','nor', + 'array' + ] + + PREDEFINED_TYPES = [ + 'bit','bit_vector','character','boolean','integer','real','time','string', + 'severity_level','positive','natural','signed','unsigned','line','text', + 'std_logic','std_logic_vector','std_ulogic','std_ulogic_vector','qsim_state', + 'qsim_state_vector','qsim_12state','qsim_12state_vector','qsim_strength', + 'mux_bit','mux_vector','reg_bit','reg_vector','wor_bit','wor_vector' + ] + + PREDEFINED_CONSTANTS = [ + + ] + + IDENT_KIND = CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(/-- .*/x) + kind = :comment + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match.downcase] + + elsif match = scan(/[a-z]?"/i) + tokens << [:open, :string] + state = :string + kind = :delimiter + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + kind = :error + + end + + when :string + if scan(/[^\\\n"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + tokens << [:close, :string] + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + if state == :string + tokens << [:close, :string] + end + + tokens + end + + end + +end +end |