enhanced CodeRay interface

updated demo_css.rb Rakefile: rdoc_small task added
author: murphy <murphy@rubychan.de> 2005-09-28 01:39:48 +0000
committer: murphy <murphy@rubychan.de> 2005-09-28 01:39:48 +0000
commit: f9bb65b346b27fe507c1e1bd31c9451b99b2a9a5 (patch)
tree: 464e7008663f0b3e99ea41cebc2dbdc5fe6c72fc /lib
parent: 763e5b8b4d27fae528097263007d8113d444e69d (diff)
download: coderay-f9bb65b346b27fe507c1e1bd31c9451b99b2a9a5.tar.gz
3 files changed, 133 insertions, 11 deletions
diff --git a/lib/coderay.rb b/lib/coderay.rb
index 17c315d..012668b 100644
--- a/lib/coderay.rb
+++ b/lib/coderay.rb
@@ -56,6 +56,76 @@
 #
 # and look at the file it created.
 # 
+
+# = CodeRay
+#
+# The CodeRay module provides convenience methods for the engine.
+#
+# * The +lang+ and +format+ arguments select Scanner and Encoder to use. These are
+#   simply lower-case symbols, like <tt>:python</tt> or <tt>:html</tt>.
+# * All methods take an optional hash as last parameter, +options+, that is send to
+#   the Encoder / Scanner.
+# * Input and language are always sorted in this order: +code+, +lang+.
+# 	(This is in alphabetical order, if you need a mnemonic ;)
+# 
+# You should be able to highlight everything you want just using this methods;
+# so there is no need to dive into CodeRay's deep class hierarchy.
+#
+# The exmaples in the demo/ directory demonstrate common cases using this interface.
+#	
+# = Basic Access Ways
+#
+# Read this to get a general view what CodeRay provides.
+# 
+# == Scanning
+#	
+#	Scanning means analysing an input string, splitting it up into Tokens.
+#	Each Token knows about what type it is: string, comment, class name, etc.
+#
+#	Each +lang+ (language) has its own Scanner; for example, <tt>:ruby</tt> code is
+#	handled by CodeRay::Scanners::RubyScanner.
+# 
+# CodeRay.scan:: Scan a string in a given language into Tokens.
+#                This is the most common method to use.
+# CodeRay.scan_file:: Scan a file and guess the language using FileType.
+# 
+# The Tokens object you get from these methods can encode itself; see Tokens.
+# 
+# == Encoding
+#
+# Encoding means compiling Tokens into an output. This can be colored HTML or
+# LaTeX, a textual statistic or just the number of non-whitespace tokens.
+# 
+# Each Encoder provides output in a specific +format+, so you select Encoders via
+# formats like <tt>:html</tt> or <tt>:statistic</tt>.
+# 
+# CodeRay.encode:: Scan and encode a string in a given language.
+# CodeRay.encode_tokens:: Encode the given tokens.
+# CodeRay.encode_file:: Scan a file, guess the language using FileType and encode it.
+#
+# == Streaming
+#
+# Streaming saves RAM by running Scanner and Encoder in some sort of
+# pipe mode; see TokenStream.
+# 
+# CodeRay.scan_stream:: Scan in stream mode.
+# 
+#	== All-in-One Encoding
+#
+# CodeRay.encode:: Highlight a string with a given input and output format.
+#
+# == Instanciating
+#	
+#	You can use an Encoder instance to highlight multiple inputs. This way, the setup
+#	for this Encoder must only be done once.
+#	
+# CodeRay.encoder:: Create an Encoder instance with format and options.
+#
+# There is no CodeRay.scanner method because Scanners are bound to an input string
+# on creation; you can't re-use them with another string.
+#
+#	The scanning methods provide more flexibility; we recommend to use these.
+#	
 module CodeRay
 	
 	Version = '0.4.2'
@@ -108,7 +178,10 @@ module CodeRay
 			scan code, lang, options, &block
 		end
 
-		# Encode +code+ with the Encoder for +format+ and the Scanner for +lang+.
+		# Encode a string in Streaming mode.
+		# 
+		# This starts scanning +code+ with the the Scanner for +lang+ 
+		# while encodes the output with the Encoder for +format+.
 		# +options+ will be passed to the Encoder.
 		#
 		# See CodeRay::Encoder.encode_stream
@@ -116,17 +189,48 @@ module CodeRay
 			encoder(format, options).encode_stream code, lang, options
 		end
 
+		# Encode a string.
+		# 
+		# This scans +code+ with the the Scanner for +lang+ and then
+		# encodes it with the Encoder for +format+.
+		# +options+ will be passed to the Encoder.
+		#
+		# See CodeRay::Encoder.encode
 		def encode code, lang, format, options = {}
 			encoder(format, options).encode code, lang, options
 		end
+		
+		# Encode pre-scanned Tokens.
+		# Use this together with CodeRay.scan:
+		# 
+		#  require 'coderay'
+		#  
+		#  # Highlight a short Ruby code example in a HTML span
+		#  tokens = CodeRay.scan '1 + 2', :ruby
+		#  puts CodeRay.encode_tokens(tokens, :span)
+		#
+		def encode_tokens tokens, format, options = {}
+			encoder(format, options).encode_tokens tokens, options
+		end
+
+		# Encodes +filename+ (a path to a code file) with the Scanner for +lang+.
+		# 
+		# See CodeRay.scan_file.
+		# Notice that the second argument is the output +format+, not the input language.
+		# 
+		# Example:
+		#  require 'coderay'
+		#  page = CodeRay.encode_file 'some_c_code.c', :html
+		def encode_file filename, format, options = {}
+			tokens = scan_file filename, auto, get_scanner_options(options)
+			encode_tokens tokens, format, options
+		end
 
 		# Finds the Encoder class for +format+ and creates an instance, passing
 		# +options+ to it.
 		# 
 		# Example:
 		#  require 'coderay'
-		#  token_count = CodeRay.encoder(:count).encodea("puts 17 + 4\n", :ruby).to_i  #-> 8
-		#  require 'coderay'
 		#  
 		#  stats = CodeRay.encoder(:statistic)
 		#  stats.encode("puts 17 + 4\n", :ruby)
@@ -140,6 +244,16 @@ module CodeRay
 			Encoders[format].new options
 		end
 
+		# Extract the options for the scanner from the +options+ hash.
+		#
+		#	Returns an empty Hash if <tt>:scanner_options</tt> is not set.
+		#	
+		# This is used if a method like CodeRay.encode has to provide options
+		# for Encoder _and_ scanner.
+		def get_scanner_options options
+			options.fetch :scanner_options, {}
+		end
+
 	end
 
 	# This Exception is raised when you try to stream with something that is not
diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb
index b6a22f0..53febc2 100644
--- a/lib/coderay/encoder.rb
+++ b/lib/coderay/encoder.rb
@@ -165,7 +165,7 @@ module CodeRay
         raise NotStreamableError, self unless kind_of? Streamable
         options = @options.merge options
         setup options
-        scanner_options = options.fetch :scanner_options, {}
+        scanner_options = CodeRay.get_scanner_options options
         @token_stream = CodeRay.scan_stream code, lang, scanner_options, &self
         finish options
       end
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 71ad33a..988008e 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -151,9 +151,15 @@ module CodeRay
 		# This can not be undone, but should yield the same output
 		# in most Encoders.  It basically makes the output smaller.
 		#
-		# Combined with dump, it saves database space.
+		# Combined with dump, it saves space for the cost
+    # calculating time.
+    #
+    # If the scanner is written carefully, this is not required - 
+    # for example, consecutive //-comment lines can already be 
+    # joined in one token by the Scanner.
 		def optimize
-			last_kind, last_text = nil, nil
+      print ' Tokens#optimize: before: %d - ' % size if $DEBUG
+			last_kind = last_text = nil
 			new = self.class.new
 			each do |text, kind|
 				if text.is_a? String
@@ -166,15 +172,17 @@ module CodeRay
 					end
 				else
 					new << [last_text, last_kind] if last_kind
-					last_kind, last_text = nil, nil
+					last_kind = last_text = nil
 					new << [text, kind]
 				end
 			end
 			new << [last_text, last_kind] if last_kind
+      print 'after: %d (%d saved = %2.0f%%)' % 
+        [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
 			new
 		end
 
-		# Compact the object itself; see compact.
+		# Compact the object itself; see optimize.
 		def optimize!
 			replace optimize
 		end
@@ -290,9 +298,9 @@ module CodeRay
 			raise NotImplementedError, 'A TokenStream cannot be dumped.'
 		end
 
-		# A TokenStream cannot be compacted. Use Tokens.
-		def compact
-			raise NotImplementedError, 'A TokenStream cannot be compacted.'
+		# A TokenStream cannot be optimized. Use Tokens.
+		def optimize
+			raise NotImplementedError, 'A TokenStream cannot be optimized.'
 		end
 
 	end
author	murphy <murphy@rubychan.de>	2005-09-28 01:39:48 +0000
committer	murphy <murphy@rubychan.de>	2005-09-28 01:39:48 +0000
commit	f9bb65b346b27fe507c1e1bd31c9451b99b2a9a5 (patch)
tree	464e7008663f0b3e99ea41cebc2dbdc5fe6c72fc /lib
parent	763e5b8b4d27fae528097263007d8113d444e69d (diff)
download	coderay-f9bb65b346b27fe507c1e1bd31c9451b99b2a9a5.tar.gz