Incorporated Jan-Hein's changes and texinfo conversion.

author: Guido van Rossum <guido@python.org> 1992-12-08 14:37:55 +0000
committer: Guido van Rossum <guido@python.org> 1992-12-08 14:37:55 +0000
commit: 95cd2ef1623d8f3ac574d617964e5b4e097bc54d (patch)
tree: c763f4b5a345daddf1ac94020f2e5c227fc3b9dc /Doc/partparse.py
parent: 2a7178efe1353aed277c0714e92a0790c84f7b88 (diff)
download: cpython-git-95cd2ef1623d8f3ac574d617964e5b4e097bc54d.tar.gz
1 files changed, 2137 insertions, 0 deletions
diff --git a/Doc/partparse.py b/Doc/partparse.py
new file mode 100644
index 0000000000..83b58bd89d
--- /dev/null
+++ b/Doc/partparse.py
@@ -0,0 +1,2137 @@
+#
+# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
+#     and generate texinfo source.
+#
+# This is *not* a good example of good programming practices. In fact, this
+#     file could use a complete rewrite, in order to become faster, more
+#     easy extensible and maintainable.
+#
+# However, I added some comments on a few places for the pityful person who
+#     would ever need to take a look into this file.
+#
+# Have I been clear enough??
+#
+# -jh
+
+
+import sys, string, regex
+
+# Different parse modes for phase 1
+MODE_REGULAR = 0
+MODE_VERBATIM = 1
+MODE_CS_SCAN = 2
+MODE_COMMENT = 3
+MODE_MATH = 4
+MODE_DMATH = 5
+MODE_GOBBLEWHITE = 6
+
+the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \
+	  MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE
+
+# Show the neighbourhood of the scanned buffer
+def epsilon(buf, where):
+	wmt, wpt = where - 10, where + 10
+	if wmt < 0:
+		wmt = 0
+	if wpt > len(buf):
+		wpt = len(buf)
+	return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
+
+# Should return the line number. never worked
+def lin():
+	global lineno
+	return ' Line ' + `lineno` + '.'
+
+# Displays the recursion level.
+def lv(lvl):
+	return ' Level ' + `lvl` + '.'
+
+# Combine the three previous functions. Used often.
+def lle(lvl, buf, where):
+	return lv(lvl) + lin() + epsilon(buf, where)
+	
+	
+# This class is only needed for _symbolic_ representation of the parse mode.
+class Mode:
+	def init(self, arg):
+		if arg not in the_modes:
+			raise ValueError, 'mode not in the_modes'
+		self.mode = arg
+		return self
+
+	def __cmp__(self, other):
+		if type(self) != type(other):
+			other = mode(other)
+		return cmp(self.mode, other.mode)
+
+	def __repr__(self):
+		if self.mode == MODE_REGULAR:
+			return 'MODE_REGULAR'
+		elif self.mode == MODE_VERBATIM:
+			return 'MODE_VERBATIM'
+		elif self.mode == MODE_CS_SCAN:
+			return 'MODE_CS_SCAN'
+		elif self.mode == MODE_COMMENT:
+			return 'MODE_COMMENT'
+		elif self.mode == MODE_MATH:
+			return 'MODE_MATH'
+		elif self.mode == MODE_DMATH:
+			return 'MODE_DMATH'
+		elif self.mode == MODE_GOBBLEWHITE:
+			return 'MODE_GOBBLEWHITE'
+		else:
+			raise ValueError, 'mode not in the_modes'
+
+# just a wrapper around a class initialisation
+def mode(arg):
+	return Mode().init(arg)
+
+
+# After phase 1, the text consists of chunks, with a certain type
+# this type will be assigned to the chtype member of the chunk
+# the where-field contains the file position where this is found
+# and the data field contains (1): a tuple describing start- end end
+# positions of the substring (can be used as slice for the buf-variable),
+# (2) just a string, mostly generated by the changeit routine,
+# or (3) a list, describing a (recursive) subgroup of chunks
+PLAIN = 0			# ASSUME PLAINTEXT, data = the text
+GROUP = 1			# GROUP ({}), data = [chunk, chunk,..]
+CSNAME = 2			# CONTROL SEQ TOKEN, data = the command
+COMMENT = 3			# data is the actual comment
+DMATH = 4			# DISPLAYMATH, data = [chunk, chunk,..]
+MATH = 5			# MATH, see DISPLAYMATH
+OTHER = 6			# CHAR WITH CATCODE OTHER, data = char
+ACTIVE = 7			# ACTIVE CHAR
+GOBBLEDWHITE = 8		# Gobbled LWSP, after CSNAME
+ENDLINE = 9			# END-OF-LINE, data = '\n'
+DENDLINE = 10			# DOUBLE EOL, data='\n', indicates \par
+ENV = 11			# LaTeX-environment
+					# data =(envname,[ch,ch,ch,.])
+CSLINE = 12			# for texi: next chunk will be one group
+					# of args. Will be set all on 1 line
+IGNORE = 13			# IGNORE this data
+ENDENV = 14			# TEMP END OF GROUP INDICATOR
+IF = 15				# IF-directive
+					# data = (flag,negate,[ch, ch, ch,...])
+the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \
+	  GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF
+
+# class, just to display symbolic name
+class ChunkType:
+	def init(self, chunk_type):
+		if chunk_type not in the_types:
+			raise 'ValueError', 'chunk_type not in the_types'
+		self.chunk_type = chunk_type
+		return self
+
+	def __cmp__(self, other):
+		if type(self) != type(other):
+			other = chunk_type(other)
+		return cmp(self.chunk_type, other.chunk_type)
+
+	def __repr__(self):
+		if self.chunk_type == PLAIN:
+			return 'PLAIN'
+		elif self.chunk_type == GROUP:
+			return 'GROUP'
+		elif self.chunk_type == CSNAME:
+			return 'CSNAME'
+		elif self.chunk_type == COMMENT:
+			return 'COMMENT'
+		elif self.chunk_type == DMATH:
+			return 'DMATH'
+		elif self.chunk_type == MATH:
+			return 'MATH'
+		elif self.chunk_type == OTHER:
+			return 'OTHER'
+		elif self.chunk_type == ACTIVE:
+			return 'ACTIVE'
+		elif self.chunk_type == GOBBLEDWHITE:
+			return 'GOBBLEDWHITE'
+		elif self.chunk_type == DENDLINE:
+			return 'DENDLINE'
+		elif self.chunk_type == ENDLINE:
+			return 'ENDLINE'
+		elif self.chunk_type == ENV:
+			return 'ENV'
+		elif self.chunk_type == CSLINE:
+			return 'CSLINE'
+		elif self.chunk_type == IGNORE:
+			return 'IGNORE'
+		elif self.chunk_type == ENDENV:
+			return 'ENDENV'
+		elif self.chunk_type == IF:
+			return 'IF'
+		else:
+			raise ValueError, 'chunk_type not in the_types'
+
+# ...and the wrapper
+def chunk_type(type):
+	return ChunkType().init(type)
+
+# store a type object of the ChunkType-class-instance...
+chunk_type_type = type(chunk_type(0))
+	
+# this class contains a part of the parsed buffer
+class Chunk:
+	def init(self, chtype, where, data):
+		if type(chtype) != chunk_type_type:
+			chtype = chunk_type(chtype)
+		self.chtype = chtype
+		if type(where) != type(0):
+			raise TypeError, '\'where\' is not a number'
+		self.where = where
+		self.data = data
+		##print 'CHUNK', self
+		return self
+
+	def __repr__(self):
+		return 'chunk' + `self.chtype, self.where, self.data`
+
+# and the wrapper
+def chunk(chtype, where, data):
+	 return Chunk().init(chtype, where, data)
+	 
+
+
+error = 'partparse.error'
+
+#
+# TeX's catcodes...
+#
+CC_ESCAPE = 0
+CC_LBRACE = 1
+CC_RBRACE = 2
+CC_MATHSHIFT = 3
+CC_ALIGNMENT = 4
+CC_ENDLINE = 5
+CC_PARAMETER = 6
+CC_SUPERSCRIPT = 7
+CC_SUBSCRIPT = 8
+CC_IGNORE = 9
+CC_WHITE = 10
+CC_LETTER = 11
+CC_OTHER = 12
+CC_ACTIVE = 13
+CC_COMMENT = 14
+CC_INVALID = 15
+
+# and the names
+cc_names = [\
+	  'CC_ESCAPE', \
+	  'CC_LBRACE', \
+	  'CC_RBRACE', \
+	  'CC_MATHSHIFT', \
+	  'CC_ALIGNMENT', \
+	  'CC_ENDLINE', \
+	  'CC_PARAMETER', \
+	  'CC_SUPERSCRIPT', \
+	  'CC_SUBSCRIPT', \
+	  'CC_IGNORE', \
+	  'CC_WHITE', \
+	  'CC_LETTER', \
+	  'CC_OTHER', \
+	  'CC_ACTIVE', \
+	  'CC_COMMENT', \
+	  'CC_INVALID', \
+	  ]
+
+# Show a list of catcode-name-symbols
+def pcl(codelist):
+	result = ''
+	for i in codelist:
+		result = result + cc_names[i] + ', '
+	return '[' + result[:-2] + ']'
+
+# the name of the catcode (ACTIVE, OTHER, etc.)
+def pc(code):
+	return cc_names[code]
+	
+
+# Which catcodes make the parser stop parsing regular plaintext
+regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \
+	  CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \
+	  CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
+
+# same for scanning a control sequence name
+csname_scancodes = [CC_LETTER]
+
+# same for gobbling LWSP
+white_scancodes = [CC_WHITE]
+##white_scancodes = [CC_WHITE, CC_ENDLINE]
+
+# make a list of all catcode id's, except for catcode ``other''
+all_but_other_codes = range(16)
+del all_but_other_codes[CC_OTHER]
+##print all_but_other_codes
+
+# when does a comment end
+comment_stopcodes = [CC_ENDLINE]
+
+# gather all characters together, specified by a list of catcodes
+def code2string(cc, codelist):
+	print 'code2string: codelist = ' + pcl(codelist),
+	result = ''
+	for catagory in codelist:
+		if cc[catagory]:
+			result = result + cc[catagory]
+	print 'result = ' + `result`
+	return result
+
+# automatically generate all characters of catcode other, being the
+# complement set in the ASCII range (128 characters)
+def make_other_codes(cc):
+	otherchars = range(128)		# could be made 256, no problem
+	for catagory in all_but_other_codes:
+		if cc[catagory]:
+			for c in cc[catagory]:
+				otherchars[ord(c)] = None
+	result = ''
+	for i in otherchars:
+		if i != None:
+			result = result + chr(i)
+	return result
+
+# catcode dump (which characters have which catcodes).
+def dump_cc(name, cc):
+	print '\t' + name
+	print '=' * (8+len(name))
+	if len(cc) != 16:
+		raise TypeError, 'cc not good cat class'
+	for i in range(16):
+		print pc(i) + '\t' + `cc[i]`
+		
+
+# In the beginning,....
+epoch_cc = [None] * 16
+##dump_cc('epoch_cc', epoch_cc)
+
+
+# INITEX
+initex_cc = epoch_cc[:]
+initex_cc[CC_ESCAPE] = '\\'
+initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
+	  '\n', '\0', ' '
+initex_cc[CC_LETTER] = string.uppercase + string.lowercase
+initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
+#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
+##dump_cc('initex_cc', initex_cc)
+
+	
+# LPLAIN: LaTeX catcode setting (see lplain.tex)
+lplain_cc = initex_cc[:]
+lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
+lplain_cc[CC_MATHSHIFT] = '$'
+lplain_cc[CC_ALIGNMENT] = '&'
+lplain_cc[CC_PARAMETER] = '#'
+lplain_cc[CC_SUPERSCRIPT] = '^\x0B'	# '^' and C-k
+lplain_cc[CC_SUBSCRIPT] = '_\x01'	# '_' and C-a
+lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
+lplain_cc[CC_ACTIVE] = '~\x0C'		# '~' and C-l
+lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
+##dump_cc('lplain_cc', lplain_cc)
+
+	
+# Guido's LaTeX environment catcoded '_' as ``other''
+# my own purpose catlist
+my_cc = lplain_cc[:]
+my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
+my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_'	      # add it to OTHER list
+dump_cc('my_cc', my_cc)
+
+
+
+# needed for un_re, my equivalent for regexp-quote in Emacs
+re_meaning = '\\[]^$'
+
+def un_re(str):
+	result = ''
+	for i in str:
+		if i in re_meaning:
+			result = result + '\\'
+		result = result + i
+	return result
+	
+# NOTE the negate ('^') operator in *some* of the regexps below
+def make_rc_regular(cc):
+	# problems here if '[]' are included!!
+	return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
+
+def make_rc_cs_scan(cc):
+	return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
+
+def make_rc_comment(cc):
+	return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
+
+def make_rc_endwhite(cc):
+	return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
+	
+
+
+# regular: normal mode: 
+rc_regular = make_rc_regular(my_cc)
+
+# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
+rc_cs_scan = make_rc_cs_scan(my_cc)
+rc_comment = make_rc_comment(my_cc)
+rc_endwhite = make_rc_endwhite(my_cc)
+
+
+# parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
+#     RECURSION-LEVEL will is incremented on entry.
+#     result contains the list of chunks returned
+#     together with this list, the buffer position is returned
+
+#     RECURSION-LEVEL will be set to zero *again*, when recursively a
+#     {,D}MATH-mode scan has been enetered.
+#     This has been done in order to better check for environment-mismatches
+
+def parseit(buf, *rest):
+	global lineno
+
+	if len(rest) == 3:
+		parsemode, start, lvl = rest
+	elif len(rest) == 2:
+		parsemode, start, lvl = rest + (0, )
+	elif len(rest) == 1:
+		parsemode, start, lvl = rest + (0, 0)
+	elif len(rest) == 0:
+		parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
+	else:
+		raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
+	result = []
+	end = len(buf)
+	if lvl == 0 and parsemode == mode(MODE_REGULAR):
+		lineno = 1
+	lvl = lvl + 1
+
+	##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
+
+	#
+	# some of the more regular modes...
+	#
+
+	if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
+		cstate = []
+		newpos = start
+		curpmode = parsemode
+		while 1:
+			where = newpos
+			#print '\tnew round: ' + epsilon(buf, where)
+			if where == end:
+				if lvl > 1 or curpmode != mode(MODE_REGULAR):
+					# not the way we started...
+					raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
+				# the real ending of lvl-1 parse
+				return end, result
+
+			pos = rc_regular.search(buf, where)
+
+			if pos < 0:
+				pos = end
+
+			if pos != where:
+				newpos, c = pos, chunk(PLAIN, where, (where, pos))
+				result.append(c)
+				continue
+
+
+			#
+			# ok, pos == where and pos != end
+			#
+			foundchar = buf[where]
+			if foundchar in my_cc[CC_LBRACE]:
+				# recursive subgroup parse...
+				newpos, data = parseit(buf, curpmode, where+1, lvl)
+				result.append(chunk(GROUP, where, data))
+				
+			elif foundchar in my_cc[CC_RBRACE]:
+				if lvl <= 1:
+					raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
+				if  lvl == 1 and mode != mode(MODE_REGULAR):
+					raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
+				return where + 1, result
+				
+			elif foundchar in my_cc[CC_ESCAPE]:
+				#
+				# call the routine that actually deals with
+				#     this problem. If do_ret is None, than
+				#     return the value of do_ret
+				#
+				# Note that handle_cs might call this routine
+				#     recursively again...
+				#
+				do_ret, newpos = handlecs(buf, where, \
+					  curpmode, lvl, result, end)
+				if do_ret != None:
+					return do_ret
+					
+			elif foundchar in my_cc[CC_COMMENT]:
+				newpos, data = parseit(buf, \
+					  mode(MODE_COMMENT), where+1, lvl)
+				result.append(chunk(COMMENT, where, data))
+				
+			elif foundchar in my_cc[CC_MATHSHIFT]:
+				# note that recursive calls to math-mode
+				# scanning are called with recursion-level 0
+				# again, in order to check for bad mathend
+				#
+				if where + 1 != end and \
+					  buf[where + 1] in \
+					  my_cc[CC_MATHSHIFT]:
+					#
+					# double mathshift, e.g. '$$'
+					#
+					if curpmode == mode(MODE_REGULAR):
+						newpos, data = parseit(buf, \
+							  mode(MODE_DMATH), \
+							  where+2, 0)
+						result.append(chunk(DMATH, \
+							  where, data))
+					elif curpmode == mode(MODE_MATH):
+						raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
+					elif lvl != 1:
+						raise error, 'bad mathend.' + \
+							  lle(lvl, buf, where)
+					else:
+						return where + 2, result
+				else:
+					#
+					# single math shift, e.g. '$'
+					#
+					if curpmode == mode(MODE_REGULAR):
+						newpos, data = parseit(buf, \
+							  mode(MODE_MATH), \
+							  where+1, 0)
+						result.append(chunk(MATH, \
+							  where, data))
+					elif curpmode == mode(MODE_DMATH):
+						raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
+					elif lvl != 1:
+						raise error, 'bad mathend.' + \
+							  lv(lvl, buf, where)
+					else:
+						return where + 1, result
+					
+			elif foundchar in my_cc[CC_IGNORE]:
+				print 'warning: ignored char', `foundchar`
+				newpos = where + 1
+				
+			elif foundchar in my_cc[CC_ACTIVE]:
+				result.append(chunk(ACTIVE, where, foundchar))
+				newpos = where + 1
+				
+			elif foundchar in my_cc[CC_INVALID]:
+				raise error, 'invalid char ' + `foundchar`
+				newpos = where + 1
+				
+			elif foundchar in my_cc[CC_ENDLINE]:
+				#
+				# after an end of line, eat the rest of
+				# whitespace on the beginning of the next line
+				# this is what LaTeX more or less does
+				#
+				# also, try to indicate double newlines (\par)
+				#
+				lineno = lineno + 1
+				savedwhere = where
+				newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
+				if newpos != end and buf[newpos] in \
+					  my_cc[CC_ENDLINE]:
+					result.append(chunk(DENDLINE, \
+						  savedwhere, foundchar))
+				else:
+					result.append(chunk(ENDLINE, \
+						  savedwhere, foundchar))
+			else:
+				result.append(chunk(OTHER, where, foundchar))
+				newpos = where + 1
+				
+	elif parsemode == mode(MODE_CS_SCAN):
+		#
+		# scan for a control sequence token. `\ape', `\nut' or `\%'
+		#
+		if start == end:
+			raise EOFError, 'can\'t find end of csname'
+		pos = rc_cs_scan.search(buf, start)
+		if pos < 0:
+			pos = end
+		if pos == start:
+			# first non-letter right where we started the search
+			# ---> the control sequence name consists of one single
+			# character. Also: don't eat white space...
+			if buf[pos] in my_cc[CC_ENDLINE]:
+				lineno = lineno + 1
+			pos = pos + 1
+			return pos, (start, pos)
+		else:
+			spos = pos
+			if buf[pos] == '\n':
+				lineno = lineno + 1
+				spos = pos + 1
+			pos2, dummy = parseit(buf, \
+				  mode(MODE_GOBBLEWHITE), spos, lvl)
+			return pos2, (start, pos)
+
+	elif parsemode == mode(MODE_GOBBLEWHITE):
+		if start == end:
+			return start, ''
+		pos = rc_endwhite.search(buf, start)
+		if pos < 0:
+			pos = start
+		return pos, (start, pos)
+
+	elif parsemode == mode(MODE_COMMENT):
+		pos = rc_comment.search(buf, start)
+		lineno = lineno + 1
+		if pos < 0:
+			print 'no newline perhaps?'
+			raise EOFError, 'can\'t find end of comment'
+		pos = pos + 1
+		pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
+		return pos2, (start, pos)
+
+
+	else:
+		raise error, 'Unknown mode (' + `parsemode` + ')'
+
+
+#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
+
+#boxcommands = 'mbox', 'fbox'
+#defcommands = 'def', 'newcommand'
+
+endverbstr = '\\end{verbatim}'
+
+re_endverb = regex.compile(un_re(endverbstr))
+
+#
+# handlecs: helper function for parseit, for the special thing we might
+#     wanna do after certain command control sequences
+# returns: None or return_data, newpos
+#
+# in the latter case, the calling function is instructed to immediately
+# return with the data in return_data
+#
+def handlecs(buf, where, curpmode, lvl, result, end):
+	global lineno
+
+	# get the control sequence name...
+	newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
+	saveddata = data
+	
+	if s(buf, data) in ('begin', 'end'):
+		# skip the expected '{' and get the LaTeX-envname '}'
+		newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
+		if len(data) != 1:
+			raise error, 'expected 1 chunk of data.' + \
+				  lle(lvl, buf, where)
+
+		# yucky, we've got an environment
+		envname = s(buf, data[0].data)
+		##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
+		if s(buf, saveddata) == 'begin' and envname == 'verbatim':
+			# verbatim deserves special treatment
+			pos = re_endverb.search(buf, newpos)
+			if pos < 0:
+				raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
+			result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
+			newpos = pos + len(endverbstr)
+			
+		elif s(buf, saveddata) == 'begin':
+			# start parsing recursively... If that parse returns
+			# from an '\end{...}', then should the last item of
+			# the returned data be a string containing the ended
+			# environment
+			newpos, data = parseit(buf, curpmode, newpos, lvl)
+			if not data or type(data[-1]) != type(''):
+				raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
+			retenv = data[-1]
+			del data[-1]
+			if retenv != envname:
+				#[`retenv`, `envname`]
+				raise error, 'environments do not match.' + \
+					  lle(lvl, buf, where) + \
+					  epsilon(buf, newpos)
+			result.append(chunk(ENV, where, (retenv, data)))
+		else:
+			# 'end'... append the environment name, as just
+			# pointed out, and order parsit to return...
+			result.append(envname)
+			##print 'POINT of return: ' + epsilon(buf, newpos)
+			# the tuple will be returned by parseit
+			return (newpos, result), newpos
+
+	# end of \begin ... \end handling
+			
+	elif s(buf, data)[0:2] == 'if':
+		# another scary monster: the 'if' directive
+		flag = s(buf, data)[2:]
+
+		# recursively call parseit, just like environment above..
+		# the last item of data should contain the if-termination
+		# e.g., 'else' of 'fi'
+		newpos, data = parseit(buf, curpmode, newpos, lvl)
+		if not data or data[-1] not in ('else', 'fi'):
+			raise error, 'wrong if... termination' + \
+				  lle(lvl, buf, where) + epsilon(buf, newpos)
+
+		ifterm = data[-1]
+		del data[-1]
+		# 0 means dont_negate flag
+		result.append(chunk(IF, where, (flag, 0, data)))
+		if ifterm == 'else':
+			# do the whole thing again, there is only one way
+			# to end this one, by 'fi'
+			newpos, data = parseit(buf, curpmode, newpos, lvl)
+			if not data or data[-1] not in ('fi', ):
+				raise error, 'wrong if...else... termination' \
+					  + lle(lvl, buf, where) \
+					  + epsilon(buf, newpos)
+
+			ifterm = data[-1]
+			del data[-1]
+			result.append(chunk(IF, where, (flag, 1, data)))
+		#done implicitely: return None, newpos
+
+	elif s(buf, data) in ('else', 'fi'):
+		result.append(s(buf, data))
+		# order calling party to return tuple
+		return (newpos, result), newpos
+
+	# end of \if, \else, ... \fi handling
+
+	elif s(buf, saveddata) == 'verb':
+		x2 = saveddata[1]
+		result.append(chunk(CSNAME, where, data))
+		if x2 == end:
+			raise error, 'premature end of command.' + lle(lvl, buf, where)
+		delimchar = buf[x2]
+		print 'VERB: delimchar ' + `delimchar`
+		pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
+		if pos < 0:
+			raise error, 'end of \'verb\' argument (' + \
+				  `delimchar` + ') not found.' + \
+				  lle(lvl, buf, where)
+		result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
+		newpos = pos + 1
+	else:
+		result.append(chunk(CSNAME, where, data))
+	return None, newpos
+
+# this is just a function to get the string value if the possible data-tuple
+def s(buf, data):
+	if type(data) == type(''):
+		return data
+	if len(data) != 2 or not (type(data[0]) == type(data[1]) == type(0)):
+		raise TypeError, 'expected tuple of 2 integers'
+	x1, x2 = data
+	return buf[x1:x2]
+	
+
+##length, data1, i = getnextarg(length, buf, pp, i + 1)
+
+# make a deep-copy of some chunks
+def crcopy(r):
+	result = []
+	for x in r:
+		result.append(chunkcopy(x))
+	return result
+	
+		
+
+# copy a chunk, would better be a method of class Chunk...
+def chunkcopy(ch):
+	if ch.chtype == chunk_type(GROUP):
+		listc = ch.data[:]
+		for i in range(len(listc)):
+			listc[i] = chunkcopy(listc[i])
+		return chunk(GROUP, ch.where, listc)
+	else:
+		return chunk(ch.chtype, ch.where, ch.data)
+
+
+# get next argument for TeX-macro, flatten a group (insert between)
+# or return Command Sequence token, or give back one character
+def getnextarg(length, buf, pp, item):
+
+	##wobj = Wobj().init()
+	##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
+	##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
+
+	while item < length and pp[item].chtype == chunk_type(ENDLINE):
+		del pp[item]
+		length = length - 1
+	if item >= length:
+		raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
+	if pp[item].chtype == chunk_type(GROUP):
+		newpp = pp[item].data
+		del pp[item]
+		length = length - 1
+		changeit(buf, newpp)
+		length = length + len(newpp)
+		pp[item:item] = newpp
+		item = item + len(newpp)
+		if len(newpp) < 10:
+			wobj = Wobj().init()
+			dumpit(buf, wobj.write, newpp)
+			##print 'GETNEXTARG: inserted ' + `wobj.data`
+		return length, item
+	elif pp[item].chtype == chunk_type(PLAIN):
+		#grab one char
+		print 'WARNING: grabbing one char'
+		if len(s(buf, pp[item].data)) > 1:
+			pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
+			item, length = item+1, length+1
+			pp[item].data = s(buf, pp[item].data)[1:]
+		else:
+			item = item+1
+		return length, item
+	else:
+		try:
+			str = `s(buf, ch.data)`
+		except TypeError:
+			str = `ch.data`
+			if len(str) > 400:
+				str = str[:400] + '...'
+		print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
+		return length, item
+
+
+# this one is needed to find the end of LaTeX's optional argument, like
+# item[...]
+re_endopt = regex.compile(']')
+
+# get a LaTeX-optional argument, you know, the square braces '[' and ']'
+def getoptarg(length, buf, pp, item):
+
+	wobj = Wobj().init()
+	dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
+	##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
+
+	if item >= length or \
+		  pp[item].chtype != chunk_type(PLAIN) or \
+		  s(buf, pp[item].data)[0] != '[':
+		return length, item
+
+	pp[item].data = s(buf, pp[item].data)[1:]
+	if len(pp[item].data) == 0:
+		del pp[item]
+		length = length-1
+
+	while 1:
+		if item == length:
+			raise error, 'No end of optional arg found'
+		if pp[item].chtype == chunk_type(PLAIN):
+			text = s(buf, pp[item].data)
+			pos = re_endopt.search(text)
+			if pos >= 0:
+				pp[item].data = text[:pos]
+				if pos == 0:
+					del pp[item]
+					length = length-1
+				else:
+					item=item+1
+				text = text[pos+1:]
+				
+				while text and text[0] in ' \t':
+					text = text[1:]
+					
+				if text:
+					pp.insert(item, chunk(PLAIN, 0, text))
+					length = length + 1
+				return length, item
+
+		item = item+1
+
+
+# Wobj just add write-requests to the ``data'' attribute
+class Wobj:
+	def init(self):
+		self.data = ''
+		return self
+	def write(self, data):
+		self.data = self.data + data
+		
+# ignore these commands
+ignoredcommands = ('bcode', 'ecode')
+# map commands like these to themselves as plaintext
+wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF')
+# \{ --> {,  \} --> }, etc
+themselves = ('{', '}', '.', '@') + wordsselves
+# these ones also themselves (see argargs macro in myformat.sty)
+inargsselves = (',', '[', ']', '(', ')')
+# this is how *I* would show the difference between emph and strong
+#  code 1 means: fold to uppercase
+markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
+	  'strong': ('*', '*')}
+
+# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
+fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
+
+# transparent for these commands
+for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp', \
+	  'r', 'i', 't')
+
+
+# try to remove macros and return flat text
+def flattext(buf, pp):
+	pp = crcopy(pp)
+	print '---> FLATTEXT ' + `pp`
+	wobj = Wobj().init()
+
+	i, length = 0, len(pp)
+	while 1:
+		if len(pp) != length:
+			raise 'FATAL', 'inconsistent length'
+		if i >= length:
+			break
+		ch = pp[i]
+		i = i+1
+		if ch.chtype == chunk_type(PLAIN):
+			pass
+		elif ch.chtype == chunk_type(CSNAME):
+			if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
+				ch.chtype = chunk_type(PLAIN)
+			elif s(buf, ch.data) == 'e':
+				ch.chtype = chunk_type(PLAIN)
+				ch.data = '\\'
+			elif len(s(buf, ch.data)) == 1 \
+				  and s(buf, ch.data) in onlylatexspecial:
+				ch.chtype = chunk_type(PLAIN)
+				# if it is followed by an empty group,
+				# remove that group, it was needed for
+				# a true space
+				if i < length \
+					  and pp[i].chtype==chunk_type(GROUP) \
+					  and len(pp[i].data) == 0:
+					del pp[i]
+					length = length-1
+						
+			elif s(buf, ch.data) in markcmds.keys():
+				length, newi = getnextarg(length, buf, pp, i)
+				str = flattext(buf, pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi - i)
+				ch.chtype = chunk_type(PLAIN)
+				markcmd = s(buf, ch.data)
+				x = markcmds[markcmd]
+				if type(x) == type(()):
+					pre, after = x
+					str = pre+str+after
+				elif x == 1:
+					str = string.upper(str)
+				else:
+					raise 'FATAL', 'corrupt markcmds'
+				ch.data = str
+			else:
+				if s(buf, ch.data) not in ignoredcommands:
+					print 'WARNING: deleting command ' + `s(buf, ch.data)`
+					print 'PP' + `pp[i-1]`
+				del pp[i-1]
+				i, length = i-1, length-1
+		elif ch.chtype == chunk_type(GROUP):
+			length, newi = getnextarg(length, buf, pp, i-1)
+			i = i-1
+##			str = flattext(buf, crcopy(pp[i-1:newi]))
+##			del pp[i:newi]
+##			length = length - (newi - i)
+##			ch.chtype = chunk_type(PLAIN)
+##			ch.data = str
+		else:
+			pass
+		
+	dumpit(buf, wobj.write, pp)
+	print 'FLATTEXT: RETURNING ' + `wobj.data`
+	return wobj.data
+
+# try to generate node names (a bit shorter than the chapter title)
+# note that the \nodename command (see elsewhere) overules these efforts
+def invent_node_names(text):
+	words = string.split(text)
+
+	print 'WORDS ' + `words`
+
+	if len(words) == 2 \
+		  and string.lower(words[0]) == 'built-in' \
+		  and string.lower(words[1]) not in ('modules', 'functions'):
+		return words[1]
+	if len(words) == 3 and string.lower(words[1]) == 'module':
+		return words[2]
+	if len(words) == 3 and string.lower(words[1]) == 'object':
+		return string.join(words[0:2])
+	if len(words) > 4 and string.lower(string.join(words[-4:])) == \
+		  'methods and data attributes':
+		return string.join(words[:2])
+	return text
+	
+re_commas_etc = regex.compile('[,`\'@{}]')
+
+re_whitespace = regex.compile('[ \t]*')
+
+
+##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
+
+# look if the next non-white stuff is also a command, resulting in skipping
+# double endlines (DENDLINE) too, and thus omitting \par's
+# Sometimes this is too much, maybe consider DENDLINE's as stop
+def next_command_p(length, buf, pp, i, cmdname):
+
+	while 1:
+		if i >= len(pp):
+			break
+		ch = pp[i]
+		i = i+1
+		if ch.chtype == chunk_type(ENDLINE):
+			continue
+		if ch.chtype == chunk_type(DENDLINE):
+			continue
+		if ch.chtype == chunk_type(PLAIN):
+			if re_whitespace.search(s(buf, ch.data)) == 0 and \
+				  re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
+				continue
+			return -1
+		if ch.chtype == chunk_type(CSNAME):
+			if s(buf, ch.data) == cmdname:
+				return i # _after_ the command
+			return -1
+		return -1
+			
+				
+# things that are special to LaTeX, but not to texi..
+onlylatexspecial = '_~^$#&%'
+
+class Struct(): pass
+
+hist = Struct()
+out = Struct()
+
+def startchange():
+	global hist, out
+
+	hist.inenv = []
+	hist.nodenames = []
+	hist.cindex = []
+	hist.inargs = 0
+	hist.enumeratenesting, hist.itemizenesting = 0, 0
+
+	out.doublenodes = []
+	out.doublecindeces = []
+	
+
+spacech = [chunk(PLAIN, 0, ' ')]
+commach = [chunk(PLAIN, 0, ', ')]
+cindexch = [chunk(CSLINE, 0, 'cindex')]
+
+# the standard variation in symbols for itemize
+itemizesymbols = ['bullet', 'minus', 'dots']
+
+# same for enumerate
+enumeratesymbols = ['1', 'A', 'a']
+
+##
+## \begin{ {func,data,exc}desc }{name}...
+##   the resulting texi-code is dependent on the contents of indexsubitem
+##
+
+# indexsubitem: `['XXX', 'function']
+# funcdesc:
+#     deffn {`idxsi`} NAME (FUNCARGS)
+
+# indexsubitem: `['XXX', 'method']`
+# funcdesc:
+#     defmethod {`idxsi[0]`} NAME (FUNCARGS)
+
+# indexsubitem: `['in', 'module', 'MODNAME']'
+# datadesc:
+#     defcv data {`idxsi[1:]`} NAME
+# excdesc:
+#     defcv exception {`idxsi[1:]`} NAME
+# funcdesc:
+#     deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
+
+# indexsubitem: `['OBJECT', 'attribute']'
+# datadesc
+#     defcv attribute {`OBJECT`} NAME
+
+
+## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
+##   or \funcline{NAME}{ARGS}
+##
+def do_funcdesc(length, buf, pp, i):
+	startpoint = i-1
+	ch = pp[startpoint]
+	wh = ch.where
+	length, newi = getnextarg(length, buf, pp, i)
+	funcname = chunk(GROUP, wh, pp[i:newi])
+	del pp[i:newi]
+	length = length - (newi-i)
+	save = hist.inargs
+	hist.inargs = 1
+	length, newi = getnextarg(length, buf, pp, i)
+	hist.inargs = save
+	del save
+	the_args = [chunk(PLAIN, wh, '()'[0])] + \
+		  pp[i:newi] + \
+		  [chunk(PLAIN, wh, '()'[1])]
+	del pp[i:newi]
+	length = length - (newi-i)
+
+	idxsi = hist.indexsubitem	# words
+	command = ''
+	cat_class = ''
+	if idxsi and idxsi[-1] == 'method':
+		command = 'defmethod'
+		cat_class = string.join(idxsi[:-1])
+	elif len(idxsi) == 2 and idxsi[1] == 'function':
+		command = 'deffn'
+		cat_class = string.join(idxsi)
+	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
+		command = 'deffn'
+		cat_class = 'function of ' + string.join(idxsi[1:])
+
+	if not command:
+		raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
+		
+	ch.chtype = chunk_type(CSLINE)
+	ch.data = command
+	
+	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
+	cslinearg.append(chunk(PLAIN, wh, ' '))
+	cslinearg.append(funcname)
+	cslinearg.append(chunk(PLAIN, wh, ' '))
+	l = len(cslinearg)
+	cslinearg[l:l] = the_args
+
+	pp.insert(i, chunk(GROUP, wh, cslinearg))
+	i, length = i+1, length+1
+	hist.command = command
+	return length, i
+	
+
+## this routine will be called on \begin{excdesc}{NAME}
+## or \excline{NAME}
+##	
+def do_excdesc(length, buf, pp, i):
+	startpoint = i-1
+	ch = pp[startpoint]
+	wh = ch.where
+	length, newi = getnextarg(length, buf, pp, i)
+	excname = chunk(GROUP, wh, pp[i:newi])
+	del pp[i:newi]
+	length = length - (newi-i)
+
+	idxsi = hist.indexsubitem	# words
+	command = ''
+	cat_class = ''
+	class_class = ''
+	if len(idxsi) == 2 and idxsi[1] == 'exception':
+		command = 'defvr'
+		cat_class = string.join(idxsi)
+	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
+		command = 'defcv'
+		cat_class = 'exception'
+		class_class = string.join(idxsi[1:])
+	elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
+		command = 'defcv'
+		cat_class = 'exception'
+		class_class = string.join(idxsi[2:])
+		
+
+	if not command:
+		raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
+		
+	ch.chtype = chunk_type(CSLINE)
+	ch.data = command
+	
+	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
+	cslinearg.append(chunk(PLAIN, wh, ' '))
+	if class_class:
+		cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
+		cslinearg.append(chunk(PLAIN, wh, ' '))
+	cslinearg.append(excname)
+
+	pp.insert(i, chunk(GROUP, wh, cslinearg))
+	i, length = i+1, length+1
+	hist.command = command
+	return length, i
+
+## same for datadesc or dataline...
+def do_datadesc(length, buf, pp, i):
+	startpoint = i-1
+	ch = pp[startpoint]
+	wh = ch.where
+	length, newi = getnextarg(length, buf, pp, i)
+	dataname = chunk(GROUP, wh, pp[i:newi])
+	del pp[i:newi]
+	length = length - (newi-i)
+
+	idxsi = hist.indexsubitem	# words
+	command = ''
+	cat_class = ''
+	class_class = ''
+	if len(idxsi) == 2 and idxsi[1] == 'attribute':
+		command = 'defcv'
+		cat_class = 'attribute'
+		class_class = idxsi[0]
+	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
+		command = 'defcv'
+		cat_class = 'data'
+		class_class = string.join(idxsi[1:])
+	elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
+		command = 'defcv'
+		cat_class = 'data'
+		class_class = string.join(idxsi[2:])
+		
+
+	if not command:
+		raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
+		
+	ch.chtype = chunk_type(CSLINE)
+	ch.data = command
+	
+	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
+	cslinearg.append(chunk(PLAIN, wh, ' '))
+	if class_class:
+		cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
+		cslinearg.append(chunk(PLAIN, wh, ' '))
+	cslinearg.append(dataname)
+
+	pp.insert(i, chunk(GROUP, wh, cslinearg))
+	i, length = i+1, length+1
+	hist.command = command
+	return length, i
+	
+
+# regular indices: those that are not set in tt font by default....
+regindices = ('cindex', )
+
+# remove illegal characters from node names
+def rm_commas_etc(text):
+	result = ''
+	changed = 0
+	while 1:
+		pos = re_commas_etc.search(text)
+		if pos >= 0:
+			changed = 1
+			result = result + text[:pos]
+			text = text[pos+1:]
+		else:
+			result = result + text
+			break
+	if changed:
+		print 'Warning: nodename changhed to ' + `result`
+
+	return result
+			
+# boolean flags
+flags = {'texi': 1}
+	
+
+##
+## changeit: the actual routine, that changes the contents of the parsed
+##           chunks
+##
+
+def changeit(buf, pp):
+	global onlylatexspecial, hist, out
+
+	i, length = 0, len(pp)
+	while 1:
+		# sanity check: length should always equal len(pp)
+		if len(pp) != length:
+			raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
+		if i >= length:
+			break
+		ch = pp[i]
+		i = i + 1
+
+		if type(ch) == type(''):
+			#normally, only chunks are present in pp,
+			# but in some cases, some extra info
+			# has been inserted, e.g., the \end{...} clauses
+			raise 'FATAL', 'got string, probably too many ' + `end`
+
+		if ch.chtype == chunk_type(GROUP):
+			# check for {\em ...} constructs
+			if ch.data and \
+				  ch.data[0].chtype == chunk_type(CSNAME) and \
+				  s(buf, ch.data[0].data) in fontchanges.keys():
+				k = s(buf, ch.data[0].data)
+				del ch.data[0]
+				pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
+				length, i = length+1, i+1
+
+			# recursively parse the contents of the group
+			changeit(buf, ch.data)
+
+		elif ch.chtype == chunk_type(IF):
+			# \if...
+			flag, negate, data = ch.data
+			print 'IF: flag, negate = ' + `flag, negate`
+			if flag not in flags.keys():
+				raise error, 'unknown flag ' + `flag`
+				
+			value = flags[flag]
+			if negate:
+				value = (not value)
+			del pp[i-1]
+			length, i = length-1, i-1
+			if value:
+				pp[i:i] = data
+				length = length + len(data)
+					
+			
+		elif ch.chtype == chunk_type(ENV):
+			# \begin{...} ....
+			envname, data = ch.data
+
+			#push this environment name on stack
+			hist.inenv.insert(0, envname)
+
+			#append an endenv chunk after grouped data
+			data.append(chunk(ENDENV, ch.where, envname))
+			##[`data`]
+
+			#delete this object
+			del pp[i-1]
+			i, length = i-1, length-1
+
+			#insert found data
+			pp[i:i] = data
+			length = length + len(data)
+
+			if envname == 'verbatim':
+				pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \
+					  chunk(GROUP, ch.where, [])]
+				length, i = length+2, i+2
+
+			elif envname == 'itemize':
+				if hist.itemizenesting > len(itemizesymbols):
+					raise error, 'too deep itemize nesting'
+				ingroupch = [chunk(CSNAME, ch.where,\
+					  itemizesymbols[hist.itemizenesting])]
+				hist.itemizenesting = hist.itemizenesting + 1
+				pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\
+					  chunk(GROUP, ch.where, ingroupch)]
+				length, i = length+2, i+2
+				
+			elif envname == 'enumerate':
+				if hist.enumeratenesting > len(enumeratesymbols):
+					raise error, 'too deep enumerate nesting'
+				ingroupch = [chunk(PLAIN, ch.where,\
+					  enumeratesymbols[hist.enumeratenesting])]
+				hist.enumeratenesting = hist.enumeratenesting + 1
+				pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\
+					  chunk(GROUP, ch.where, ingroupch)]
+				length, i = length+2, i+2
+				
+			elif envname == 'description':
+				ingroupch = [chunk(CSNAME, ch.where, 'b')]
+				pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \
+					  chunk(GROUP, ch.where, ingroupch)]
+				length, i = length+2, i+2
+				
+			elif envname == 'tableiii':
+				wh = ch.where
+				newcode = []
+				
+				#delete tabular format description
+				# e.g., {|l|c|l|}
+				length, newi = getnextarg(length, buf, pp, i)
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				newcode.append(chunk(CSLINE, wh, 'table'))
+				ingroupch = [chunk(CSNAME, wh, 'asis')]
+				newcode.append(chunk(GROUP, wh, ingroupch))
+				newcode.append(chunk(CSLINE, wh, 'item'))
+
+				#get the name of macro for @item
+				# e.g., {code}
+				length, newi = getnextarg(length, buf, pp, i)
+				
+				if newi-i != 1:
+					raise error, 'Sorry, expected 1 chunk argument'
+				if pp[i].chtype != chunk_type(PLAIN):
+					raise error, 'Sorry, expected plain text argument'
+				hist.itemargmacro = s(buf, pp[i].data)
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				for count in range(3):
+					length, newi = getnextarg(length, buf, pp, i)
+					emphgroup = [\
+						  chunk(CSNAME, wh, 'emph'), \
+						  chunk(GROUP, 0, pp[i:newi])]
+					del pp[i:newi]
+					length = length - (newi-i)
+					if count == 0:
+						itemarg = emphgroup
+					elif count == 2:
+						itembody = itembody + \
+							  [chunk(PLAIN, wh, '  ---  ')] + \
+							  emphgroup
+					else:
+						itembody = emphgroup
+				newcode.append(chunk(GROUP, wh, itemarg))
+				newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
+				pp[i:i] = newcode
+				l = len(newcode)
+				length, i = length+l, i+l
+				del newcode, l
+
+				if length != len(pp):
+					raise 'STILL, SOMETHING wrong', `i`
+
+
+			elif envname == 'funcdesc':
+				pp.insert(i, chunk(PLAIN, ch.where, ''))
+				i, length = i+1, length+1
+				length, i = do_funcdesc(length, buf, pp, i)
+					  
+			elif envname == 'excdesc':
+				pp.insert(i, chunk(PLAIN, ch.where, ''))
+				i, length = i+1, length+1
+				length, i = do_excdesc(length, buf, pp, i)
+					  
+			elif envname == 'datadesc':
+				pp.insert(i, chunk(PLAIN, ch.where, ''))
+				i, length = i+1, length+1
+				length, i = do_datadesc(length, buf, pp, i)
+					  
+			else:
+				print 'WARNING: don\'t know what to do with env ' + `envname`
+				
+		elif ch.chtype == chunk_type(ENDENV):
+			envname = ch.data
+			if envname != hist.inenv[0]:
+				raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
+			del hist.inenv[0]
+			del pp[i-1]
+			i, length = i-1, length-1
+
+			if envname == 'verbatim':
+				pp[i:i] = [\
+					  chunk(CSLINE, ch.where, 'end'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, 'example')])]
+				i, length = i+2, length+2
+			elif envname == 'itemize':
+				hist.itemizenesting = hist.itemizenesting - 1
+				pp[i:i] = [\
+					  chunk(CSLINE, ch.where, 'end'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, 'itemize')])]
+				i, length = i+2, length+2
+			elif envname == 'enumerate':
+				hist.enumeratenesting = hist.enumeratenesting-1
+				pp[i:i] = [\
+					  chunk(CSLINE, ch.where, 'end'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, 'enumerate')])]
+				i, length = i+2, length+2
+			elif envname == 'description':
+				pp[i:i] = [\
+					  chunk(CSLINE, ch.where, 'end'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, 'table')])]
+				i, length = i+2, length+2
+			elif envname == 'tableiii':
+				pp[i:i] = [\
+					  chunk(CSLINE, ch.where, 'end'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, 'table')])]
+				i, length = i+2, length + 2
+				pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
+				i, length = i+1, length+1
+
+			elif envname in ('funcdesc', 'excdesc', 'datadesc'):
+				pp[i:i] = [\
+					  chunk(CSLINE, ch.where, 'end'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, hist.command)])]
+				i, length = i+2, length+2
+			else:
+				print 'WARNING: ending env ' + `envname` + 'has no actions'
+				
+		elif ch.chtype == chunk_type(CSNAME):
+			# control name transformations
+			if s(buf, ch.data) in ignoredcommands:
+				del pp[i-1]
+				i, length = i-1, length-1
+			elif s(buf, ch.data) == '@' and \
+				  i != length and \
+				  pp[i].chtype == chunk_type(PLAIN) and \
+				  s(buf, pp[i].data)[0] == '.':
+				# \@. --> \. --> @.
+				ch.data = '.'
+				del pp[i]
+				length = length-1
+			elif s(buf, ch.data) == '\\':
+				# \\ --> \* --> @*
+				ch.data = '*'
+			elif len(s(buf, ch.data)) == 1 and \
+				  s(buf, ch.data) in onlylatexspecial:
+				ch.chtype = chunk_type(PLAIN)
+				# check if such a command is followed by
+				# an empty group: e.g., `\%{}'.  If so, remove
+				# this empty group too
+				if i < length and \
+					  pp[i].chtype == chunk_type(GROUP) \
+					  and len(pp[i].data) == 0:
+					del pp[i]
+					length = length-1
+
+			elif hist.inargs and s(buf, ch.data) in inargsselves:
+				# This is the special processing of the
+				# arguments of the \begin{funcdesc}... or
+				# \funcline... arguments
+				# \, --> , \[ --> [, \] --> ]
+				ch.chtype = chunk_type(PLAIN)
+				
+			elif s(buf, ch.data) == 'renewcommand':
+				# \renewcommand{\indexsubitem}....
+				i, length = i-1, length-1
+				del pp[i]
+				length, newi = getnextarg(length, buf, pp, i)
+				if newi-i == 1 \
+					  and i < length \
+					  and pp[i].chtype == chunk_type(CSNAME) \
+					  and s(buf, pp[i].data) == 'indexsubitem':
+					del pp[i:newi]
+					length = length - (newi-i)
+					length, newi = getnextarg(length, buf, pp, i)
+					text = flattext(buf, pp[i:newi])
+					if text[0] != '(' or text[-1] != ')':
+						raise error, 'expected indexsubitme enclosed in braces'
+					words = string.split(text[1:-1])
+					hist.indexsubitem = words
+					del text, words
+				else:
+					print 'WARNING: renewcommand with unsupported arg removed'
+				del pp[i:newi]
+				length = length - (newi-i)
+
+			elif s(buf, ch.data) == 'item':
+				ch.chtype = chunk_type(CSLINE)
+				length, newi = getoptarg(length, buf, pp, i)
+				ingroupch = pp[i:newi]
+				del pp[i:newi]
+				length = length - (newi-i)
+				pp.insert(i, chunk(GROUP, ch.where, ingroupch))
+				i, length = i+1, length+1
+				
+			elif s(buf, ch.data) == 'ttindex':
+				idxsi = hist.indexsubitem
+
+				cat_class = ''
+				if len(idxsi) >= 2 and idxsi[1] in \
+					  ('method', 'function'):
+					command = 'findex'
+				elif len(idxsi) >= 2 and idxsi[1] in \
+					  ('exception', 'object'):
+					command = 'vindex'
+				else:
+					print 'WARNING: can\'t catagorize ' + `idxsi` + ' for \'ttindex\' command'
+					command = 'cindex'
+
+				if not cat_class:
+					cat_class = '('+string.join(idxsi)+')'
+
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = command
+
+				length, newi = getnextarg(length, buf, pp, i)
+				arg = pp[i:newi]
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				cat_arg = [chunk(PLAIN, ch.where, cat_class)]
+
+				# determine what should be set in roman, and
+				# what in tt-font
+				if command in regindices:
+
+					arg = [chunk(CSNAME, ch.where, 't'), \
+						  chunk(GROUP, ch.where, arg)]
+				else:
+					cat_arg = [chunk(CSNAME, ch.where, 'r'), \
+						  chunk(GROUP, ch.where, cat_arg)]
+
+				ingroupch = arg + \
+					  [chunk(PLAIN, ch.where, ' ')] + \
+					  cat_arg
+				
+				pp.insert(i, chunk(GROUP, ch.where, ingroupch))
+				length, i = length+1, i+1
+
+				
+			elif s(buf, ch.data) == 'ldots':
+				# \ldots --> \dots{} --> @dots{}
+				ch.data = 'dots'
+				if i == length \
+					  or pp[i].chtype != chunk_type(GROUP) \
+					  or pp[i].data != []:
+					pp.insert(i, chunk(GROUP, ch.where, []))
+					i, length = i+1, length+1
+			elif s(buf, ch.data) in wordsselves:
+				# \UNIX --> UNIX
+				ch.chtype = chunk_type(PLAIN)
+				if i != length \
+					  and pp[i].chtype == chunk_type(GROUP) \
+					  and pp[i].data == []:
+					del pp[i]
+					length = length-1
+			elif s(buf, ch.data) in for_texi:
+				pass
+
+			elif s(buf, ch.data) == 'e':
+				# \e --> \
+				ch.data = '\\'
+				ch.chtype = chunk_type(PLAIN)
+			elif s(buf, ch.data) == 'lineiii':
+				# This is the most tricky one
+				# \lineiii{a1}{a2}[{a3}] -->
+				# @item @<cts. of itemargmacro>{a1}
+				#  a2 [ -- a3]
+				#
+				##print 'LINEIIIIII!!!!!!!'
+##				wobj = Wobj().init()
+##				dumpit(buf, wobj.write, pp[i-1:i+5])
+##				print '--->' + wobj.data + '<----'
+				if not hist.inenv:
+					raise error, \
+						  'no environment for lineiii'
+				if hist.inenv[0] != 'tableiii':
+					raise error, \
+						  'wrong command (' + \
+						  `'lineiii'` + \
+						  ') in wrong environment (' \
+						  + `hist.inenv[0]` + ')'
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = 'item'
+				length, newi = getnextarg(length, buf, pp, i)
+				ingroupch = [chunk(CSNAME, 0, \
+					  hist.itemargmacro), \
+					  chunk(GROUP, 0, pp[i:newi])]
+				del pp[i:newi]
+				length = length - (newi-i)
+##				print 'ITEM ARG: --->',
+##				wobj = Wobj().init()
+##				dumpit(buf, wobj.write, ingroupch)
+##				print wobj.data, '<---'
+				pp.insert(i, chunk(GROUP, ch.where, ingroupch))
+				grouppos = i
+				i, length = i+1, length+1
+				length, i = getnextarg(length, buf, pp, i)
+				length, newi = getnextarg(length, buf, pp, i)
+				if newi > i:
+					# we have a 3rd arg
+					pp.insert(i, chunk(PLAIN, ch.where, '  ---  '))
+					i = newi + 1
+					length = length + 1
+##					pp[grouppos].data = pp[grouppos].data \
+##						  + [chunk(PLAIN, ch.where, '  ')] \
+##						  + pp[i:newi]
+##					del pp[i:newi]
+##					length = length - (newi-i)
+				if length != len(pp):
+					raise 'IN LINEIII IS THE ERR', `i`
+
+			elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
+				#\xxxsection{A} ---->
+				# @node A, , ,
+				# @xxxsection A
+				## also: remove commas and quotes
+				ch.chtype = chunk_type(CSLINE)
+				length, newi = getnextarg(length, buf, pp, i)
+				afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
+				if afternodenamecmd < 0:
+					cp1 = crcopy(pp[i:newi])
+					pp[i:newi] = [\
+						  chunk(GROUP, ch.where, \
+						  pp[i:newi])]
+					length, newi = length - (newi-i) + 1, \
+						  i+1
+					text = flattext(buf, cp1)
+					text = invent_node_names(text)
+				else:
+					length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
+					cp1 = crcopy(pp[afternodenamecmd:endarg])
+					del pp[newi:endarg]
+					length = length - (endarg-newi)
+
+					pp[i:newi] = [\
+						  chunk(GROUP, ch.where, \
+						  pp[i:newi])]
+					length, newi = length - (newi-i) + 1, \
+						  i + 1
+					text = flattext(buf, cp1)
+				if text[-1] == '.':
+					text = text[:-1]
+				print 'FLATTEXT:', `text`
+				if text in hist.nodenames:
+					print 'WARNING: node name ' + `text` + ' already used'
+					out.doublenodes.append(text)
+				else:
+					hist.nodenames.append(text)
+				text = rm_commas_etc(text)
+				pp[i-1:i-1] = [\
+					  chunk(CSLINE, ch.where, 'node'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, text+', , ,')\
+					  ])]
+				i, length = newi+2, length+2
+
+			elif s(buf,ch.data) == 'funcline':
+				# fold it to a very short environment
+				pp[i-1:i-1] = [\
+					  chunk(CSLINE, ch.where, 'end'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, hist.command)])]
+				i, length = i+2, length+2
+				length, i = do_funcdesc(length, buf, pp, i)
+				
+			elif s(buf,ch.data) == 'dataline':
+				pp[i-1:i-1] = [\
+					  chunk(CSLINE, ch.where, 'end'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, hist.command)])]
+				i, length = i+2, length+2
+				length, i = do_datadesc(length, buf, pp, i)
+				
+			elif s(buf,ch.data) == 'excline':
+				pp[i-1:i-1] = [\
+					  chunk(CSLINE, ch.where, 'end'), \
+					  chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, hist.command)])]
+				i, length = i+2, length+2
+				length, i = do_excdesc(length, buf, pp, i)
+				
+				
+			elif s(buf, ch.data) == 'index':
+				#\index{A} --->
+				# @cindex A
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = 'cindex'
+				length, newi = getnextarg(length, buf, pp, i)
+
+				ingroupch = pp[i:newi]
+				del pp[i:newi]
+				length = length - (newi-i)
+				pp.insert(i, chunk(GROUP, ch.where, ingroupch))
+				length, i = length+1, i+1
+
+			elif s(buf, ch.data) == 'bifuncindex':
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = 'findex'
+				length, newi = getnextarg(length, buf, pp, i)
+				ingroupch = pp[i:newi]
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				ingroupch.append(chunk(PLAIN, ch.where, ' '))
+				ingroupch.append(chunk(CSNAME, ch.where, 'r'))
+				ingroupch.append(chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, \
+					  '(built-in function)')]))
+
+				pp.insert(i, chunk(GROUP, ch.where, ingroupch))
+				length, i = length+1, i+1
+				
+				
+			elif s(buf, ch.data) == 'opindex':
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = 'findex'
+				length, newi = getnextarg(length, buf, pp, i)
+				ingroupch = pp[i:newi]
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				ingroupch.append(chunk(PLAIN, ch.where, ' '))
+				ingroupch.append(chunk(CSNAME, ch.where, 'r'))
+				ingroupch.append(chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, \
+					  '(operator)')]))
+
+				pp.insert(i, chunk(GROUP, ch.where, ingroupch))
+				length, i = length+1, i+1
+
+
+			elif s(buf, ch.data) == 'bimodindex':
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = 'pindex'
+				length, newi = getnextarg(length, buf, pp, i)
+				ingroupch = pp[i:newi]
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				ingroupch.append(chunk(PLAIN, ch.where, ' '))
+				ingroupch.append(chunk(CSNAME, ch.where, 'r'))
+				ingroupch.append(chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, \
+					  '(built-in)')]))
+
+				pp.insert(i, chunk(GROUP, ch.where, ingroupch))
+				length, i = length+1, i+1
+
+			elif s(buf, ch.data) == 'sectcode':
+				ch.data = 'code'
+
+
+			elif s(buf, ch.data) == 'stmodindex':
+				ch.chtype = chunk_type(CSLINE)
+				# use the program index as module index
+				ch.data = 'pindex'
+				length, newi = getnextarg(length, buf, pp, i)
+				ingroupch = pp[i:newi]
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				ingroupch.append(chunk(PLAIN, ch.where, ' '))
+				ingroupch.append(chunk(CSNAME, ch.where, 'r'))
+				ingroupch.append(chunk(GROUP, ch.where, [\
+					  chunk(PLAIN, ch.where, \
+					  '(standard)')]))
+
+				pp.insert(i, chunk(GROUP, ch.where, ingroupch))
+				length, i = length+1, i+1
+
+
+			elif s(buf, ch.data) == 'stindex':
+				# XXX must actually go to newindex st
+				wh = ch.where
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = 'cindex'
+				length, newi = getnextarg(length, buf, pp, i)
+				ingroupch = [chunk(CSNAME, wh, 'code'), \
+					  chunk(GROUP, wh, pp[i:newi])]
+
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				t = ingroupch[:]
+				t.append(chunk(PLAIN, wh, ' statement'))
+
+				pp.insert(i, chunk(GROUP, wh, t))
+				i, length = i+1, length+1
+
+				pp.insert(i, chunk(CSLINE, wh, 'cindex'))
+				i, length = i+1, length+1
+
+				t = ingroupch[:]
+				t.insert(0, chunk(PLAIN, wh, 'statement, '))
+				
+				pp.insert(i, chunk(GROUP, wh, t))
+				i, length = i+1, length+1
+				
+
+			elif s(buf, ch.data) == 'indexii':
+				#\indexii{A}{B} --->
+				# @cindex A B
+				# @cindex B, A
+				length, newi = getnextarg(length, buf, pp, i)
+				cp11 = pp[i:newi]
+				cp21 = crcopy(pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi-i)
+				length, newi = getnextarg(length, buf, pp, i)
+				cp12 = pp[i:newi]
+				cp22 = crcopy(pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = 'cindex'
+				pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
+					  chunk(PLAIN, ch.where, ' ')] + cp12))
+				i, length = i+1, length+1
+				pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
+					  chunk(GROUP, ch.where, cp22 + [\
+					  chunk(PLAIN, ch.where, ', ')]+ cp21)]
+				i, length = i+2, length+2
+
+			elif s(buf, ch.data) == 'indexiii':
+				length, newi = getnextarg(length, buf, pp, i)
+				cp11 = pp[i:newi]
+				cp21 = crcopy(pp[i:newi])
+				cp31 = crcopy(pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi-i)
+				length, newi = getnextarg(length, buf, pp, i)
+				cp12 = pp[i:newi]
+				cp22 = crcopy(pp[i:newi])
+				cp32 = crcopy(pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi-i)
+				length, newi = getnextarg(length, buf, pp, i)
+				cp13 = pp[i:newi]
+				cp23 = crcopy(pp[i:newi])
+				cp33 = crcopy(pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = 'cindex'
+				pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
+					  chunk(PLAIN, ch.where, ' ')] + cp12 \
+					  + [chunk(PLAIN, ch.where, ' ')] \
+					  + cp13))
+				i, length = i+1, length+1
+				pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
+					  chunk(GROUP, ch.where, cp22 + [\
+					  chunk(PLAIN, ch.where, ' ')]+ cp23\
+					  + [chunk(PLAIN, ch.where, ', ')] +\
+					  cp21)]
+				i, length = i+2, length+2
+				pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
+					  chunk(GROUP, ch.where, cp33 + [\
+					  chunk(PLAIN, ch.where, ', ')]+ cp31\
+					  + [chunk(PLAIN, ch.where, ' ')] +\
+					  cp32)]
+				i, length = i+2, length+2
+				
+				
+			elif s(buf, ch.data) == 'indexiv':
+				length, newi = getnextarg(length, buf, pp, i)
+				cp11 = pp[i:newi]
+				cp21 = crcopy(pp[i:newi])
+				cp31 = crcopy(pp[i:newi])
+				cp41 = crcopy(pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi-i)
+				length, newi = getnextarg(length, buf, pp, i)
+				cp12 = pp[i:newi]
+				cp22 = crcopy(pp[i:newi])
+				cp32 = crcopy(pp[i:newi])
+				cp42 = crcopy(pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi-i)
+				length, newi = getnextarg(length, buf, pp, i)
+				cp13 = pp[i:newi]
+				cp23 = crcopy(pp[i:newi])
+				cp33 = crcopy(pp[i:newi])
+				cp43 = crcopy(pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi-i)
+				length, newi = getnextarg(length, buf, pp, i)
+				cp14 = pp[i:newi]
+				cp24 = crcopy(pp[i:newi])
+				cp34 = crcopy(pp[i:newi])
+				cp44 = crcopy(pp[i:newi])
+				del pp[i:newi]
+				length = length - (newi-i)
+
+				ch.chtype = chunk_type(CSLINE)
+				ch.data = 'cindex'
+				ingroupch = cp11 + \
+					  spacech + cp12 + \
+					  spacech + cp13 + \
+					  spacech + cp14
+				pp.insert(i, chunk(GROUP, ch.where, ingroupch))
+				i, length = i+1, length+1
+				ingroupch = cp22 + \
+					  spacech + cp23 + \
+					  spacech + cp24 + \
+					  commach + cp21
+				pp[i:i] = cindexch + [\
+					  chunk(GROUP, ch.where, ingroupch)]
+				i, length = i+2, length+2
+				ingroupch = cp33 + \
+					  spacech + cp34 + \
+					  commach + cp31 + \
+					  spacech + cp32
+				pp[i:i] = cindexch + [\
+					  chunk(GROUP, ch.where, ingroupch)]
+				i, length = i+2, length+2
+				ingroupch = cp44 + \
+					  commach + cp41 + \
+					  spacech + cp42 + \
+					  spacech + cp43
+				pp[i:i] = cindexch + [\
+					  chunk(GROUP, ch.where, ingroupch)]
+				i, length = i+2, length+2
+				
+				
+
+			else:
+				print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
+
+
+	
+re_atsign = regex.compile('[@{}]')
+re_newline = regex.compile('\n')
+
+def dumpit(buf, wm, pp):
+
+	global out
+	
+	i, length = 0, len(pp)
+
+	addspace = 0
+	
+	while 1:
+		if len(pp) != length:
+			raise 'FATAL', 'inconsistent length'
+		if i == length:
+			break
+		ch = pp[i]
+		i = i + 1
+
+		if addspace:
+			dospace = 1
+			addspace = 0
+		else:
+			dospace = 0
+
+		if ch.chtype == chunk_type(CSNAME):
+			wm('@' + s(buf, ch.data))
+			if s(buf, ch.data) == 'node' and \
+				  pp[i].chtype == chunk_type(PLAIN) and \
+				  s(buf, pp[i].data) in out.doublenodes:
+				##XXX doesnt work yet??
+				wm(' ZZZ-' + zfill(`i`, 4))
+			if s(buf, ch.data)[0] in string.letters:
+				addspace = 1
+		elif ch.chtype == chunk_type(PLAIN):
+			if dospace and s(buf, ch.data) not in (' ', '\t'):
+				wm(' ')
+			text = s(buf, ch.data)
+			while 1:
+				pos = re_atsign.search(text)
+				if pos < 0:
+					break
+				wm(text[:pos] + '@' + text[pos])
+				text = text[pos+1:]
+			wm(text)
+		elif ch.chtype == chunk_type(GROUP):
+			wm('{')
+			dumpit(buf, wm, ch.data)
+			wm('}')
+		elif ch.chtype == chunk_type(DENDLINE):
+			wm('\n\n')
+			while i != length and pp[i].chtype in \
+				  (chunk_type(DENDLINE), chunk_type(ENDLINE)):
+				i = i + 1
+		elif ch.chtype == chunk_type(OTHER):
+			wm(s(buf, ch.data))
+		elif ch.chtype == chunk_type(ACTIVE):
+			wm(s(buf, ch.data))
+		elif ch.chtype == chunk_type(ENDLINE):
+			wm('\n')
+		elif ch.chtype == chunk_type(CSLINE):
+			if i >= 2 and pp[i-2].chtype not in \
+				  (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
+				  and (pp[i-2].chtype != chunk_type(PLAIN) \
+				  or s(buf, pp[i-2].data)[-1] != '\n'):
+				  
+				wm('\n')
+			wm('@' + s(buf, ch.data))
+			if i == length:
+				raise error, 'CSLINE expected another chunk'
+			if pp[i].chtype != chunk_type(GROUP):
+				raise error, 'CSLINE expected GROUP'
+			if type(pp[i].data) != type([]):
+				raise error, 'GROUP chould contain []-data'
+			
+			wobj = Wobj().init()
+			dumpit(buf, wobj.write, pp[i].data)
+			i = i + 1
+			text = wobj.data
+			del wobj
+			if text:
+				wm(' ')
+				while 1:
+					pos = re_newline.search(text)
+					if pos < 0:
+						break
+					print 'WARNING: found newline in csline arg'
+					wm(text[:pos] + ' ')
+					text = text[pos+1:]
+				wm(text)
+			if i >= length or \
+				  pp[i].chtype not in (chunk_type(CSLINE), \
+				  chunk_type(ENDLINE), chunk_type(DENDLINE)) \
+				  and (pp[i].chtype != chunk_type(PLAIN) \
+				  or s(buf, pp[i].data)[0] != '\n'):
+				wm('\n')
+			
+		elif ch.chtype == chunk_type(COMMENT):
+			print 'COMMENT: previous chunk =', pp[i-2]
+			if pp[i-2].chtype == chunk_type(PLAIN):
+				print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
+			if s(buf, ch.data) and \
+				  regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
+				if i >= 2 and pp[i-2].chtype not in \
+					  (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
+					  and not (pp[i-2].chtype == chunk_type(PLAIN) \
+					  and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
+					print 'ADDING NEWLINE'
+					wm('\n')
+				wm('@c ' + s(buf, ch.data))
+		elif ch.chtype == chunk_type(IGNORE):
+			pass
+		else:
+			try:
+				str = `s(buf, ch.data)`
+			except TypeError:
+				str = `ch.data`
+			if len(str) > 400:
+				str = str[:400] + '...'
+			print 'warning:', ch.chtype, 'not handled, data ' + str
+
+
+
+from posix import popen
+
+def main():
+
+	
+	buf = open(sys.argv[1], 'r').read()
+	restargs = sys.argv[2:]
+
+	w, pp = parseit(buf)
+	startchange()
+##	try:
+	while 1:
+		changeit(buf, pp)
+##		pass
+		break
+
+##	finally:
+	while 1:
+		outf = open('@out.texi', 'w')
+		preamble = open('texipre.dat', 'r')
+		while 1:
+			l = preamble.readline()
+			if not l:
+				preamble.close()
+				break
+			outf.write(l)
+		
+		dumpit(buf, outf.write, pp)
+
+		while restargs:
+			del buf, pp
+			buf = open(restargs[0], 'r').read()
+			del restargs[0]
+			w, pp = parseit(buf)
+			startchange()
+			changeit(buf, pp)
+			dumpit(buf, outf.write, pp)
+
+		postamble = open('texipost.dat', 'r')
+		while 1:
+			l = postamble.readline()
+			if not l:
+				postamble.close()
+				break
+			outf.write(l)
+		
+		outf.close()
+
+##		pass
+		break
+	
+
author	Guido van Rossum <guido@python.org>	1992-12-08 14:37:55 +0000
committer	Guido van Rossum <guido@python.org>	1992-12-08 14:37:55 +0000
commit	95cd2ef1623d8f3ac574d617964e5b4e097bc54d (patch)
tree	c763f4b5a345daddf1ac94020f2e5c227fc3b9dc /Doc/partparse.py
parent	2a7178efe1353aed277c0714e92a0790c84f7b88 (diff)
download	cpython-git-95cd2ef1623d8f3ac574d617964e5b4e097bc54d.tar.gz