diff options
| author | Guido van Rossum <guido@python.org> | 1992-12-08 14:37:55 +0000 | 
|---|---|---|
| committer | Guido van Rossum <guido@python.org> | 1992-12-08 14:37:55 +0000 | 
| commit | 95cd2ef1623d8f3ac574d617964e5b4e097bc54d (patch) | |
| tree | c763f4b5a345daddf1ac94020f2e5c227fc3b9dc /Doc/partparse.py | |
| parent | 2a7178efe1353aed277c0714e92a0790c84f7b88 (diff) | |
| download | cpython-git-95cd2ef1623d8f3ac574d617964e5b4e097bc54d.tar.gz | |
Incorporated Jan-Hein's changes and texinfo conversion.
Diffstat (limited to 'Doc/partparse.py')
| -rw-r--r-- | Doc/partparse.py | 2137 | 
1 files changed, 2137 insertions, 0 deletions
| diff --git a/Doc/partparse.py b/Doc/partparse.py new file mode 100644 index 0000000000..83b58bd89d --- /dev/null +++ b/Doc/partparse.py @@ -0,0 +1,2137 @@ +# +# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file, +#     and generate texinfo source. +# +# This is *not* a good example of good programming practices. In fact, this +#     file could use a complete rewrite, in order to become faster, more +#     easy extensible and maintainable. +# +# However, I added some comments on a few places for the pityful person who +#     would ever need to take a look into this file. +# +# Have I been clear enough?? +# +# -jh + + +import sys, string, regex + +# Different parse modes for phase 1 +MODE_REGULAR = 0 +MODE_VERBATIM = 1 +MODE_CS_SCAN = 2 +MODE_COMMENT = 3 +MODE_MATH = 4 +MODE_DMATH = 5 +MODE_GOBBLEWHITE = 6 + +the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \ +	  MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE + +# Show the neighbourhood of the scanned buffer +def epsilon(buf, where): +	wmt, wpt = where - 10, where + 10 +	if wmt < 0: +		wmt = 0 +	if wpt > len(buf): +		wpt = len(buf) +	return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.' + +# Should return the line number. never worked +def lin(): +	global lineno +	return ' Line ' + `lineno` + '.' + +# Displays the recursion level. +def lv(lvl): +	return ' Level ' + `lvl` + '.' + +# Combine the three previous functions. Used often. +def lle(lvl, buf, where): +	return lv(lvl) + lin() + epsilon(buf, where) +	 +	 +# This class is only needed for _symbolic_ representation of the parse mode. +class Mode: +	def init(self, arg): +		if arg not in the_modes: +			raise ValueError, 'mode not in the_modes' +		self.mode = arg +		return self + +	def __cmp__(self, other): +		if type(self) != type(other): +			other = mode(other) +		return cmp(self.mode, other.mode) + +	def __repr__(self): +		if self.mode == MODE_REGULAR: +			return 'MODE_REGULAR' +		elif self.mode == MODE_VERBATIM: +			return 'MODE_VERBATIM' +		elif self.mode == MODE_CS_SCAN: +			return 'MODE_CS_SCAN' +		elif self.mode == MODE_COMMENT: +			return 'MODE_COMMENT' +		elif self.mode == MODE_MATH: +			return 'MODE_MATH' +		elif self.mode == MODE_DMATH: +			return 'MODE_DMATH' +		elif self.mode == MODE_GOBBLEWHITE: +			return 'MODE_GOBBLEWHITE' +		else: +			raise ValueError, 'mode not in the_modes' + +# just a wrapper around a class initialisation +def mode(arg): +	return Mode().init(arg) + + +# After phase 1, the text consists of chunks, with a certain type +# this type will be assigned to the chtype member of the chunk +# the where-field contains the file position where this is found +# and the data field contains (1): a tuple describing start- end end +# positions of the substring (can be used as slice for the buf-variable), +# (2) just a string, mostly generated by the changeit routine, +# or (3) a list, describing a (recursive) subgroup of chunks +PLAIN = 0			# ASSUME PLAINTEXT, data = the text +GROUP = 1			# GROUP ({}), data = [chunk, chunk,..] +CSNAME = 2			# CONTROL SEQ TOKEN, data = the command +COMMENT = 3			# data is the actual comment +DMATH = 4			# DISPLAYMATH, data = [chunk, chunk,..] +MATH = 5			# MATH, see DISPLAYMATH +OTHER = 6			# CHAR WITH CATCODE OTHER, data = char +ACTIVE = 7			# ACTIVE CHAR +GOBBLEDWHITE = 8		# Gobbled LWSP, after CSNAME +ENDLINE = 9			# END-OF-LINE, data = '\n' +DENDLINE = 10			# DOUBLE EOL, data='\n', indicates \par +ENV = 11			# LaTeX-environment +					# data =(envname,[ch,ch,ch,.]) +CSLINE = 12			# for texi: next chunk will be one group +					# of args. Will be set all on 1 line +IGNORE = 13			# IGNORE this data +ENDENV = 14			# TEMP END OF GROUP INDICATOR +IF = 15				# IF-directive +					# data = (flag,negate,[ch, ch, ch,...]) +the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \ +	  GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF + +# class, just to display symbolic name +class ChunkType: +	def init(self, chunk_type): +		if chunk_type not in the_types: +			raise 'ValueError', 'chunk_type not in the_types' +		self.chunk_type = chunk_type +		return self + +	def __cmp__(self, other): +		if type(self) != type(other): +			other = chunk_type(other) +		return cmp(self.chunk_type, other.chunk_type) + +	def __repr__(self): +		if self.chunk_type == PLAIN: +			return 'PLAIN' +		elif self.chunk_type == GROUP: +			return 'GROUP' +		elif self.chunk_type == CSNAME: +			return 'CSNAME' +		elif self.chunk_type == COMMENT: +			return 'COMMENT' +		elif self.chunk_type == DMATH: +			return 'DMATH' +		elif self.chunk_type == MATH: +			return 'MATH' +		elif self.chunk_type == OTHER: +			return 'OTHER' +		elif self.chunk_type == ACTIVE: +			return 'ACTIVE' +		elif self.chunk_type == GOBBLEDWHITE: +			return 'GOBBLEDWHITE' +		elif self.chunk_type == DENDLINE: +			return 'DENDLINE' +		elif self.chunk_type == ENDLINE: +			return 'ENDLINE' +		elif self.chunk_type == ENV: +			return 'ENV' +		elif self.chunk_type == CSLINE: +			return 'CSLINE' +		elif self.chunk_type == IGNORE: +			return 'IGNORE' +		elif self.chunk_type == ENDENV: +			return 'ENDENV' +		elif self.chunk_type == IF: +			return 'IF' +		else: +			raise ValueError, 'chunk_type not in the_types' + +# ...and the wrapper +def chunk_type(type): +	return ChunkType().init(type) + +# store a type object of the ChunkType-class-instance... +chunk_type_type = type(chunk_type(0)) +	 +# this class contains a part of the parsed buffer +class Chunk: +	def init(self, chtype, where, data): +		if type(chtype) != chunk_type_type: +			chtype = chunk_type(chtype) +		self.chtype = chtype +		if type(where) != type(0): +			raise TypeError, '\'where\' is not a number' +		self.where = where +		self.data = data +		##print 'CHUNK', self +		return self + +	def __repr__(self): +		return 'chunk' + `self.chtype, self.where, self.data` + +# and the wrapper +def chunk(chtype, where, data): +	 return Chunk().init(chtype, where, data) +	  + + +error = 'partparse.error' + +# +# TeX's catcodes... +# +CC_ESCAPE = 0 +CC_LBRACE = 1 +CC_RBRACE = 2 +CC_MATHSHIFT = 3 +CC_ALIGNMENT = 4 +CC_ENDLINE = 5 +CC_PARAMETER = 6 +CC_SUPERSCRIPT = 7 +CC_SUBSCRIPT = 8 +CC_IGNORE = 9 +CC_WHITE = 10 +CC_LETTER = 11 +CC_OTHER = 12 +CC_ACTIVE = 13 +CC_COMMENT = 14 +CC_INVALID = 15 + +# and the names +cc_names = [\ +	  'CC_ESCAPE', \ +	  'CC_LBRACE', \ +	  'CC_RBRACE', \ +	  'CC_MATHSHIFT', \ +	  'CC_ALIGNMENT', \ +	  'CC_ENDLINE', \ +	  'CC_PARAMETER', \ +	  'CC_SUPERSCRIPT', \ +	  'CC_SUBSCRIPT', \ +	  'CC_IGNORE', \ +	  'CC_WHITE', \ +	  'CC_LETTER', \ +	  'CC_OTHER', \ +	  'CC_ACTIVE', \ +	  'CC_COMMENT', \ +	  'CC_INVALID', \ +	  ] + +# Show a list of catcode-name-symbols +def pcl(codelist): +	result = '' +	for i in codelist: +		result = result + cc_names[i] + ', ' +	return '[' + result[:-2] + ']' + +# the name of the catcode (ACTIVE, OTHER, etc.) +def pc(code): +	return cc_names[code] +	 + +# Which catcodes make the parser stop parsing regular plaintext +regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \ +	  CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \ +	  CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE] + +# same for scanning a control sequence name +csname_scancodes = [CC_LETTER] + +# same for gobbling LWSP +white_scancodes = [CC_WHITE] +##white_scancodes = [CC_WHITE, CC_ENDLINE] + +# make a list of all catcode id's, except for catcode ``other'' +all_but_other_codes = range(16) +del all_but_other_codes[CC_OTHER] +##print all_but_other_codes + +# when does a comment end +comment_stopcodes = [CC_ENDLINE] + +# gather all characters together, specified by a list of catcodes +def code2string(cc, codelist): +	print 'code2string: codelist = ' + pcl(codelist), +	result = '' +	for catagory in codelist: +		if cc[catagory]: +			result = result + cc[catagory] +	print 'result = ' + `result` +	return result + +# automatically generate all characters of catcode other, being the +# complement set in the ASCII range (128 characters) +def make_other_codes(cc): +	otherchars = range(128)		# could be made 256, no problem +	for catagory in all_but_other_codes: +		if cc[catagory]: +			for c in cc[catagory]: +				otherchars[ord(c)] = None +	result = '' +	for i in otherchars: +		if i != None: +			result = result + chr(i) +	return result + +# catcode dump (which characters have which catcodes). +def dump_cc(name, cc): +	print '\t' + name +	print '=' * (8+len(name)) +	if len(cc) != 16: +		raise TypeError, 'cc not good cat class' +	for i in range(16): +		print pc(i) + '\t' + `cc[i]` +		 + +# In the beginning,.... +epoch_cc = [None] * 16 +##dump_cc('epoch_cc', epoch_cc) + + +# INITEX +initex_cc = epoch_cc[:] +initex_cc[CC_ESCAPE] = '\\' +initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \ +	  '\n', '\0', ' ' +initex_cc[CC_LETTER] = string.uppercase + string.lowercase +initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F' +#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway +##dump_cc('initex_cc', initex_cc) + +	 +# LPLAIN: LaTeX catcode setting (see lplain.tex) +lplain_cc = initex_cc[:] +lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}' +lplain_cc[CC_MATHSHIFT] = '$' +lplain_cc[CC_ALIGNMENT] = '&' +lplain_cc[CC_PARAMETER] = '#' +lplain_cc[CC_SUPERSCRIPT] = '^\x0B'	# '^' and C-k +lplain_cc[CC_SUBSCRIPT] = '_\x01'	# '_' and C-a +lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t' +lplain_cc[CC_ACTIVE] = '~\x0C'		# '~' and C-l +lplain_cc[CC_OTHER] = make_other_codes(lplain_cc) +##dump_cc('lplain_cc', lplain_cc) + +	 +# Guido's LaTeX environment catcoded '_' as ``other'' +# my own purpose catlist +my_cc = lplain_cc[:] +my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here +my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_'	      # add it to OTHER list +dump_cc('my_cc', my_cc) + + + +# needed for un_re, my equivalent for regexp-quote in Emacs +re_meaning = '\\[]^$' + +def un_re(str): +	result = '' +	for i in str: +		if i in re_meaning: +			result = result + '\\' +		result = result + i +	return result +	 +# NOTE the negate ('^') operator in *some* of the regexps below +def make_rc_regular(cc): +	# problems here if '[]' are included!! +	return regex.compile('[' + code2string(cc, regular_stopcodes) + ']') + +def make_rc_cs_scan(cc): +	return regex.compile('[^' + code2string(cc, csname_scancodes) + ']') + +def make_rc_comment(cc): +	return regex.compile('[' + code2string(cc, comment_stopcodes) + ']') + +def make_rc_endwhite(cc): +	return regex.compile('[^' + code2string(cc, white_scancodes) + ']') +	 + + +# regular: normal mode:  +rc_regular = make_rc_regular(my_cc) + +# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$' +rc_cs_scan = make_rc_cs_scan(my_cc) +rc_comment = make_rc_comment(my_cc) +rc_endwhite = make_rc_endwhite(my_cc) + + +# parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0) +#     RECURSION-LEVEL will is incremented on entry. +#     result contains the list of chunks returned +#     together with this list, the buffer position is returned + +#     RECURSION-LEVEL will be set to zero *again*, when recursively a +#     {,D}MATH-mode scan has been enetered. +#     This has been done in order to better check for environment-mismatches + +def parseit(buf, *rest): +	global lineno + +	if len(rest) == 3: +		parsemode, start, lvl = rest +	elif len(rest) == 2: +		parsemode, start, lvl = rest + (0, ) +	elif len(rest) == 1: +		parsemode, start, lvl = rest + (0, 0) +	elif len(rest) == 0: +		parsemode, start, lvl = mode(MODE_REGULAR), 0, 0 +	else: +		raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])' +	result = [] +	end = len(buf) +	if lvl == 0 and parsemode == mode(MODE_REGULAR): +		lineno = 1 +	lvl = lvl + 1 + +	##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')' + +	# +	# some of the more regular modes... +	# + +	if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)): +		cstate = [] +		newpos = start +		curpmode = parsemode +		while 1: +			where = newpos +			#print '\tnew round: ' + epsilon(buf, where) +			if where == end: +				if lvl > 1 or curpmode != mode(MODE_REGULAR): +					# not the way we started... +					raise EOFError, 'premature end of file.' + lle(lvl, buf, where) +				# the real ending of lvl-1 parse +				return end, result + +			pos = rc_regular.search(buf, where) + +			if pos < 0: +				pos = end + +			if pos != where: +				newpos, c = pos, chunk(PLAIN, where, (where, pos)) +				result.append(c) +				continue + + +			# +			# ok, pos == where and pos != end +			# +			foundchar = buf[where] +			if foundchar in my_cc[CC_LBRACE]: +				# recursive subgroup parse... +				newpos, data = parseit(buf, curpmode, where+1, lvl) +				result.append(chunk(GROUP, where, data)) +				 +			elif foundchar in my_cc[CC_RBRACE]: +				if lvl <= 1: +					raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where) +				if  lvl == 1 and mode != mode(MODE_REGULAR): +					raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)' +				return where + 1, result +				 +			elif foundchar in my_cc[CC_ESCAPE]: +				# +				# call the routine that actually deals with +				#     this problem. If do_ret is None, than +				#     return the value of do_ret +				# +				# Note that handle_cs might call this routine +				#     recursively again... +				# +				do_ret, newpos = handlecs(buf, where, \ +					  curpmode, lvl, result, end) +				if do_ret != None: +					return do_ret +					 +			elif foundchar in my_cc[CC_COMMENT]: +				newpos, data = parseit(buf, \ +					  mode(MODE_COMMENT), where+1, lvl) +				result.append(chunk(COMMENT, where, data)) +				 +			elif foundchar in my_cc[CC_MATHSHIFT]: +				# note that recursive calls to math-mode +				# scanning are called with recursion-level 0 +				# again, in order to check for bad mathend +				# +				if where + 1 != end and \ +					  buf[where + 1] in \ +					  my_cc[CC_MATHSHIFT]: +					# +					# double mathshift, e.g. '$$' +					# +					if curpmode == mode(MODE_REGULAR): +						newpos, data = parseit(buf, \ +							  mode(MODE_DMATH), \ +							  where+2, 0) +						result.append(chunk(DMATH, \ +							  where, data)) +					elif curpmode == mode(MODE_MATH): +						raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) +					elif lvl != 1: +						raise error, 'bad mathend.' + \ +							  lle(lvl, buf, where) +					else: +						return where + 2, result +				else: +					# +					# single math shift, e.g. '$' +					# +					if curpmode == mode(MODE_REGULAR): +						newpos, data = parseit(buf, \ +							  mode(MODE_MATH), \ +							  where+1, 0) +						result.append(chunk(MATH, \ +							  where, data)) +					elif curpmode == mode(MODE_DMATH): +						raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where) +					elif lvl != 1: +						raise error, 'bad mathend.' + \ +							  lv(lvl, buf, where) +					else: +						return where + 1, result +					 +			elif foundchar in my_cc[CC_IGNORE]: +				print 'warning: ignored char', `foundchar` +				newpos = where + 1 +				 +			elif foundchar in my_cc[CC_ACTIVE]: +				result.append(chunk(ACTIVE, where, foundchar)) +				newpos = where + 1 +				 +			elif foundchar in my_cc[CC_INVALID]: +				raise error, 'invalid char ' + `foundchar` +				newpos = where + 1 +				 +			elif foundchar in my_cc[CC_ENDLINE]: +				# +				# after an end of line, eat the rest of +				# whitespace on the beginning of the next line +				# this is what LaTeX more or less does +				# +				# also, try to indicate double newlines (\par) +				# +				lineno = lineno + 1 +				savedwhere = where +				newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl) +				if newpos != end and buf[newpos] in \ +					  my_cc[CC_ENDLINE]: +					result.append(chunk(DENDLINE, \ +						  savedwhere, foundchar)) +				else: +					result.append(chunk(ENDLINE, \ +						  savedwhere, foundchar)) +			else: +				result.append(chunk(OTHER, where, foundchar)) +				newpos = where + 1 +				 +	elif parsemode == mode(MODE_CS_SCAN): +		# +		# scan for a control sequence token. `\ape', `\nut' or `\%' +		# +		if start == end: +			raise EOFError, 'can\'t find end of csname' +		pos = rc_cs_scan.search(buf, start) +		if pos < 0: +			pos = end +		if pos == start: +			# first non-letter right where we started the search +			# ---> the control sequence name consists of one single +			# character. Also: don't eat white space... +			if buf[pos] in my_cc[CC_ENDLINE]: +				lineno = lineno + 1 +			pos = pos + 1 +			return pos, (start, pos) +		else: +			spos = pos +			if buf[pos] == '\n': +				lineno = lineno + 1 +				spos = pos + 1 +			pos2, dummy = parseit(buf, \ +				  mode(MODE_GOBBLEWHITE), spos, lvl) +			return pos2, (start, pos) + +	elif parsemode == mode(MODE_GOBBLEWHITE): +		if start == end: +			return start, '' +		pos = rc_endwhite.search(buf, start) +		if pos < 0: +			pos = start +		return pos, (start, pos) + +	elif parsemode == mode(MODE_COMMENT): +		pos = rc_comment.search(buf, start) +		lineno = lineno + 1 +		if pos < 0: +			print 'no newline perhaps?' +			raise EOFError, 'can\'t find end of comment' +		pos = pos + 1 +		pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl) +		return pos2, (start, pos) + + +	else: +		raise error, 'Unknown mode (' + `parsemode` + ')' + + +#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl) + +#boxcommands = 'mbox', 'fbox' +#defcommands = 'def', 'newcommand' + +endverbstr = '\\end{verbatim}' + +re_endverb = regex.compile(un_re(endverbstr)) + +# +# handlecs: helper function for parseit, for the special thing we might +#     wanna do after certain command control sequences +# returns: None or return_data, newpos +# +# in the latter case, the calling function is instructed to immediately +# return with the data in return_data +# +def handlecs(buf, where, curpmode, lvl, result, end): +	global lineno + +	# get the control sequence name... +	newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl) +	saveddata = data +	 +	if s(buf, data) in ('begin', 'end'): +		# skip the expected '{' and get the LaTeX-envname '}' +		newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl) +		if len(data) != 1: +			raise error, 'expected 1 chunk of data.' + \ +				  lle(lvl, buf, where) + +		# yucky, we've got an environment +		envname = s(buf, data[0].data) +		##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl) +		if s(buf, saveddata) == 'begin' and envname == 'verbatim': +			# verbatim deserves special treatment +			pos = re_endverb.search(buf, newpos) +			if pos < 0: +				raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where) +			result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))]))) +			newpos = pos + len(endverbstr) +			 +		elif s(buf, saveddata) == 'begin': +			# start parsing recursively... If that parse returns +			# from an '\end{...}', then should the last item of +			# the returned data be a string containing the ended +			# environment +			newpos, data = parseit(buf, curpmode, newpos, lvl) +			if not data or type(data[-1]) != type(''): +				raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos) +			retenv = data[-1] +			del data[-1] +			if retenv != envname: +				#[`retenv`, `envname`] +				raise error, 'environments do not match.' + \ +					  lle(lvl, buf, where) + \ +					  epsilon(buf, newpos) +			result.append(chunk(ENV, where, (retenv, data))) +		else: +			# 'end'... append the environment name, as just +			# pointed out, and order parsit to return... +			result.append(envname) +			##print 'POINT of return: ' + epsilon(buf, newpos) +			# the tuple will be returned by parseit +			return (newpos, result), newpos + +	# end of \begin ... \end handling +			 +	elif s(buf, data)[0:2] == 'if': +		# another scary monster: the 'if' directive +		flag = s(buf, data)[2:] + +		# recursively call parseit, just like environment above.. +		# the last item of data should contain the if-termination +		# e.g., 'else' of 'fi' +		newpos, data = parseit(buf, curpmode, newpos, lvl) +		if not data or data[-1] not in ('else', 'fi'): +			raise error, 'wrong if... termination' + \ +				  lle(lvl, buf, where) + epsilon(buf, newpos) + +		ifterm = data[-1] +		del data[-1] +		# 0 means dont_negate flag +		result.append(chunk(IF, where, (flag, 0, data))) +		if ifterm == 'else': +			# do the whole thing again, there is only one way +			# to end this one, by 'fi' +			newpos, data = parseit(buf, curpmode, newpos, lvl) +			if not data or data[-1] not in ('fi', ): +				raise error, 'wrong if...else... termination' \ +					  + lle(lvl, buf, where) \ +					  + epsilon(buf, newpos) + +			ifterm = data[-1] +			del data[-1] +			result.append(chunk(IF, where, (flag, 1, data))) +		#done implicitely: return None, newpos + +	elif s(buf, data) in ('else', 'fi'): +		result.append(s(buf, data)) +		# order calling party to return tuple +		return (newpos, result), newpos + +	# end of \if, \else, ... \fi handling + +	elif s(buf, saveddata) == 'verb': +		x2 = saveddata[1] +		result.append(chunk(CSNAME, where, data)) +		if x2 == end: +			raise error, 'premature end of command.' + lle(lvl, buf, where) +		delimchar = buf[x2] +		print 'VERB: delimchar ' + `delimchar` +		pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1) +		if pos < 0: +			raise error, 'end of \'verb\' argument (' + \ +				  `delimchar` + ') not found.' + \ +				  lle(lvl, buf, where) +		result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))])) +		newpos = pos + 1 +	else: +		result.append(chunk(CSNAME, where, data)) +	return None, newpos + +# this is just a function to get the string value if the possible data-tuple +def s(buf, data): +	if type(data) == type(''): +		return data +	if len(data) != 2 or not (type(data[0]) == type(data[1]) == type(0)): +		raise TypeError, 'expected tuple of 2 integers' +	x1, x2 = data +	return buf[x1:x2] +	 + +##length, data1, i = getnextarg(length, buf, pp, i + 1) + +# make a deep-copy of some chunks +def crcopy(r): +	result = [] +	for x in r: +		result.append(chunkcopy(x)) +	return result +	 +		 + +# copy a chunk, would better be a method of class Chunk... +def chunkcopy(ch): +	if ch.chtype == chunk_type(GROUP): +		listc = ch.data[:] +		for i in range(len(listc)): +			listc[i] = chunkcopy(listc[i]) +		return chunk(GROUP, ch.where, listc) +	else: +		return chunk(ch.chtype, ch.where, ch.data) + + +# get next argument for TeX-macro, flatten a group (insert between) +# or return Command Sequence token, or give back one character +def getnextarg(length, buf, pp, item): + +	##wobj = Wobj().init() +	##dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) +	##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' + +	while item < length and pp[item].chtype == chunk_type(ENDLINE): +		del pp[item] +		length = length - 1 +	if item >= length: +		raise error, 'no next arg.' + epsilon(buf, pp[-1].where) +	if pp[item].chtype == chunk_type(GROUP): +		newpp = pp[item].data +		del pp[item] +		length = length - 1 +		changeit(buf, newpp) +		length = length + len(newpp) +		pp[item:item] = newpp +		item = item + len(newpp) +		if len(newpp) < 10: +			wobj = Wobj().init() +			dumpit(buf, wobj.write, newpp) +			##print 'GETNEXTARG: inserted ' + `wobj.data` +		return length, item +	elif pp[item].chtype == chunk_type(PLAIN): +		#grab one char +		print 'WARNING: grabbing one char' +		if len(s(buf, pp[item].data)) > 1: +			pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1])) +			item, length = item+1, length+1 +			pp[item].data = s(buf, pp[item].data)[1:] +		else: +			item = item+1 +		return length, item +	else: +		try: +			str = `s(buf, ch.data)` +		except TypeError: +			str = `ch.data` +			if len(str) > 400: +				str = str[:400] + '...' +		print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str +		return length, item + + +# this one is needed to find the end of LaTeX's optional argument, like +# item[...] +re_endopt = regex.compile(']') + +# get a LaTeX-optional argument, you know, the square braces '[' and ']' +def getoptarg(length, buf, pp, item): + +	wobj = Wobj().init() +	dumpit(buf, wobj.write, pp[item:min(length, item + 5)]) +	##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---' + +	if item >= length or \ +		  pp[item].chtype != chunk_type(PLAIN) or \ +		  s(buf, pp[item].data)[0] != '[': +		return length, item + +	pp[item].data = s(buf, pp[item].data)[1:] +	if len(pp[item].data) == 0: +		del pp[item] +		length = length-1 + +	while 1: +		if item == length: +			raise error, 'No end of optional arg found' +		if pp[item].chtype == chunk_type(PLAIN): +			text = s(buf, pp[item].data) +			pos = re_endopt.search(text) +			if pos >= 0: +				pp[item].data = text[:pos] +				if pos == 0: +					del pp[item] +					length = length-1 +				else: +					item=item+1 +				text = text[pos+1:] +				 +				while text and text[0] in ' \t': +					text = text[1:] +					 +				if text: +					pp.insert(item, chunk(PLAIN, 0, text)) +					length = length + 1 +				return length, item + +		item = item+1 + + +# Wobj just add write-requests to the ``data'' attribute +class Wobj: +	def init(self): +		self.data = '' +		return self +	def write(self, data): +		self.data = self.data + data +		 +# ignore these commands +ignoredcommands = ('bcode', 'ecode') +# map commands like these to themselves as plaintext +wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF') +# \{ --> {,  \} --> }, etc +themselves = ('{', '}', '.', '@') + wordsselves +# these ones also themselves (see argargs macro in myformat.sty) +inargsselves = (',', '[', ']', '(', ')') +# this is how *I* would show the difference between emph and strong +#  code 1 means: fold to uppercase +markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \ +	  'strong': ('*', '*')} + +# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT} +fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'} + +# transparent for these commands +for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp', \ +	  'r', 'i', 't') + + +# try to remove macros and return flat text +def flattext(buf, pp): +	pp = crcopy(pp) +	print '---> FLATTEXT ' + `pp` +	wobj = Wobj().init() + +	i, length = 0, len(pp) +	while 1: +		if len(pp) != length: +			raise 'FATAL', 'inconsistent length' +		if i >= length: +			break +		ch = pp[i] +		i = i+1 +		if ch.chtype == chunk_type(PLAIN): +			pass +		elif ch.chtype == chunk_type(CSNAME): +			if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves: +				ch.chtype = chunk_type(PLAIN) +			elif s(buf, ch.data) == 'e': +				ch.chtype = chunk_type(PLAIN) +				ch.data = '\\' +			elif len(s(buf, ch.data)) == 1 \ +				  and s(buf, ch.data) in onlylatexspecial: +				ch.chtype = chunk_type(PLAIN) +				# if it is followed by an empty group, +				# remove that group, it was needed for +				# a true space +				if i < length \ +					  and pp[i].chtype==chunk_type(GROUP) \ +					  and len(pp[i].data) == 0: +					del pp[i] +					length = length-1 +						 +			elif s(buf, ch.data) in markcmds.keys(): +				length, newi = getnextarg(length, buf, pp, i) +				str = flattext(buf, pp[i:newi]) +				del pp[i:newi] +				length = length - (newi - i) +				ch.chtype = chunk_type(PLAIN) +				markcmd = s(buf, ch.data) +				x = markcmds[markcmd] +				if type(x) == type(()): +					pre, after = x +					str = pre+str+after +				elif x == 1: +					str = string.upper(str) +				else: +					raise 'FATAL', 'corrupt markcmds' +				ch.data = str +			else: +				if s(buf, ch.data) not in ignoredcommands: +					print 'WARNING: deleting command ' + `s(buf, ch.data)` +					print 'PP' + `pp[i-1]` +				del pp[i-1] +				i, length = i-1, length-1 +		elif ch.chtype == chunk_type(GROUP): +			length, newi = getnextarg(length, buf, pp, i-1) +			i = i-1 +##			str = flattext(buf, crcopy(pp[i-1:newi])) +##			del pp[i:newi] +##			length = length - (newi - i) +##			ch.chtype = chunk_type(PLAIN) +##			ch.data = str +		else: +			pass +		 +	dumpit(buf, wobj.write, pp) +	print 'FLATTEXT: RETURNING ' + `wobj.data` +	return wobj.data + +# try to generate node names (a bit shorter than the chapter title) +# note that the \nodename command (see elsewhere) overules these efforts +def invent_node_names(text): +	words = string.split(text) + +	print 'WORDS ' + `words` + +	if len(words) == 2 \ +		  and string.lower(words[0]) == 'built-in' \ +		  and string.lower(words[1]) not in ('modules', 'functions'): +		return words[1] +	if len(words) == 3 and string.lower(words[1]) == 'module': +		return words[2] +	if len(words) == 3 and string.lower(words[1]) == 'object': +		return string.join(words[0:2]) +	if len(words) > 4 and string.lower(string.join(words[-4:])) == \ +		  'methods and data attributes': +		return string.join(words[:2]) +	return text +	 +re_commas_etc = regex.compile('[,`\'@{}]') + +re_whitespace = regex.compile('[ \t]*') + + +##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename') + +# look if the next non-white stuff is also a command, resulting in skipping +# double endlines (DENDLINE) too, and thus omitting \par's +# Sometimes this is too much, maybe consider DENDLINE's as stop +def next_command_p(length, buf, pp, i, cmdname): + +	while 1: +		if i >= len(pp): +			break +		ch = pp[i] +		i = i+1 +		if ch.chtype == chunk_type(ENDLINE): +			continue +		if ch.chtype == chunk_type(DENDLINE): +			continue +		if ch.chtype == chunk_type(PLAIN): +			if re_whitespace.search(s(buf, ch.data)) == 0 and \ +				  re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)): +				continue +			return -1 +		if ch.chtype == chunk_type(CSNAME): +			if s(buf, ch.data) == cmdname: +				return i # _after_ the command +			return -1 +		return -1 +			 +				 +# things that are special to LaTeX, but not to texi.. +onlylatexspecial = '_~^$#&%' + +class Struct(): pass + +hist = Struct() +out = Struct() + +def startchange(): +	global hist, out + +	hist.inenv = [] +	hist.nodenames = [] +	hist.cindex = [] +	hist.inargs = 0 +	hist.enumeratenesting, hist.itemizenesting = 0, 0 + +	out.doublenodes = [] +	out.doublecindeces = [] +	 + +spacech = [chunk(PLAIN, 0, ' ')] +commach = [chunk(PLAIN, 0, ', ')] +cindexch = [chunk(CSLINE, 0, 'cindex')] + +# the standard variation in symbols for itemize +itemizesymbols = ['bullet', 'minus', 'dots'] + +# same for enumerate +enumeratesymbols = ['1', 'A', 'a'] + +## +## \begin{ {func,data,exc}desc }{name}... +##   the resulting texi-code is dependent on the contents of indexsubitem +## + +# indexsubitem: `['XXX', 'function'] +# funcdesc: +#     deffn {`idxsi`} NAME (FUNCARGS) + +# indexsubitem: `['XXX', 'method']` +# funcdesc: +#     defmethod {`idxsi[0]`} NAME (FUNCARGS) + +# indexsubitem: `['in', 'module', 'MODNAME']' +# datadesc: +#     defcv data {`idxsi[1:]`} NAME +# excdesc: +#     defcv exception {`idxsi[1:]`} NAME +# funcdesc: +#     deffn {function of `idxsi[1:]`} NAME (FUNCARGS) + +# indexsubitem: `['OBJECT', 'attribute']' +# datadesc +#     defcv attribute {`OBJECT`} NAME + + +## this routine will be called on \begin{funcdesc}{NAME}{ARGS} +##   or \funcline{NAME}{ARGS} +## +def do_funcdesc(length, buf, pp, i): +	startpoint = i-1 +	ch = pp[startpoint] +	wh = ch.where +	length, newi = getnextarg(length, buf, pp, i) +	funcname = chunk(GROUP, wh, pp[i:newi]) +	del pp[i:newi] +	length = length - (newi-i) +	save = hist.inargs +	hist.inargs = 1 +	length, newi = getnextarg(length, buf, pp, i) +	hist.inargs = save +	del save +	the_args = [chunk(PLAIN, wh, '()'[0])] + \ +		  pp[i:newi] + \ +		  [chunk(PLAIN, wh, '()'[1])] +	del pp[i:newi] +	length = length - (newi-i) + +	idxsi = hist.indexsubitem	# words +	command = '' +	cat_class = '' +	if idxsi and idxsi[-1] == 'method': +		command = 'defmethod' +		cat_class = string.join(idxsi[:-1]) +	elif len(idxsi) == 2 and idxsi[1] == 'function': +		command = 'deffn' +		cat_class = string.join(idxsi) +	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: +		command = 'deffn' +		cat_class = 'function of ' + string.join(idxsi[1:]) + +	if not command: +		raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` +		 +	ch.chtype = chunk_type(CSLINE) +	ch.data = command +	 +	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] +	cslinearg.append(chunk(PLAIN, wh, ' ')) +	cslinearg.append(funcname) +	cslinearg.append(chunk(PLAIN, wh, ' ')) +	l = len(cslinearg) +	cslinearg[l:l] = the_args + +	pp.insert(i, chunk(GROUP, wh, cslinearg)) +	i, length = i+1, length+1 +	hist.command = command +	return length, i +	 + +## this routine will be called on \begin{excdesc}{NAME} +## or \excline{NAME} +##	 +def do_excdesc(length, buf, pp, i): +	startpoint = i-1 +	ch = pp[startpoint] +	wh = ch.where +	length, newi = getnextarg(length, buf, pp, i) +	excname = chunk(GROUP, wh, pp[i:newi]) +	del pp[i:newi] +	length = length - (newi-i) + +	idxsi = hist.indexsubitem	# words +	command = '' +	cat_class = '' +	class_class = '' +	if len(idxsi) == 2 and idxsi[1] == 'exception': +		command = 'defvr' +		cat_class = string.join(idxsi) +	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: +		command = 'defcv' +		cat_class = 'exception' +		class_class = string.join(idxsi[1:]) +	elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']: +		command = 'defcv' +		cat_class = 'exception' +		class_class = string.join(idxsi[2:]) +		 + +	if not command: +		raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` +		 +	ch.chtype = chunk_type(CSLINE) +	ch.data = command +	 +	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] +	cslinearg.append(chunk(PLAIN, wh, ' ')) +	if class_class: +		cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) +		cslinearg.append(chunk(PLAIN, wh, ' ')) +	cslinearg.append(excname) + +	pp.insert(i, chunk(GROUP, wh, cslinearg)) +	i, length = i+1, length+1 +	hist.command = command +	return length, i + +## same for datadesc or dataline... +def do_datadesc(length, buf, pp, i): +	startpoint = i-1 +	ch = pp[startpoint] +	wh = ch.where +	length, newi = getnextarg(length, buf, pp, i) +	dataname = chunk(GROUP, wh, pp[i:newi]) +	del pp[i:newi] +	length = length - (newi-i) + +	idxsi = hist.indexsubitem	# words +	command = '' +	cat_class = '' +	class_class = '' +	if len(idxsi) == 2 and idxsi[1] == 'attribute': +		command = 'defcv' +		cat_class = 'attribute' +		class_class = idxsi[0] +	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']: +		command = 'defcv' +		cat_class = 'data' +		class_class = string.join(idxsi[1:]) +	elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']: +		command = 'defcv' +		cat_class = 'data' +		class_class = string.join(idxsi[2:]) +		 + +	if not command: +		raise error, 'don\'t know what to do with indexsubitem ' + `idxsi` +		 +	ch.chtype = chunk_type(CSLINE) +	ch.data = command +	 +	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])] +	cslinearg.append(chunk(PLAIN, wh, ' ')) +	if class_class: +		cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)])) +		cslinearg.append(chunk(PLAIN, wh, ' ')) +	cslinearg.append(dataname) + +	pp.insert(i, chunk(GROUP, wh, cslinearg)) +	i, length = i+1, length+1 +	hist.command = command +	return length, i +	 + +# regular indices: those that are not set in tt font by default.... +regindices = ('cindex', ) + +# remove illegal characters from node names +def rm_commas_etc(text): +	result = '' +	changed = 0 +	while 1: +		pos = re_commas_etc.search(text) +		if pos >= 0: +			changed = 1 +			result = result + text[:pos] +			text = text[pos+1:] +		else: +			result = result + text +			break +	if changed: +		print 'Warning: nodename changhed to ' + `result` + +	return result +			 +# boolean flags +flags = {'texi': 1} +	 + +## +## changeit: the actual routine, that changes the contents of the parsed +##           chunks +## + +def changeit(buf, pp): +	global onlylatexspecial, hist, out + +	i, length = 0, len(pp) +	while 1: +		# sanity check: length should always equal len(pp) +		if len(pp) != length: +			raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)` +		if i >= length: +			break +		ch = pp[i] +		i = i + 1 + +		if type(ch) == type(''): +			#normally, only chunks are present in pp, +			# but in some cases, some extra info +			# has been inserted, e.g., the \end{...} clauses +			raise 'FATAL', 'got string, probably too many ' + `end` + +		if ch.chtype == chunk_type(GROUP): +			# check for {\em ...} constructs +			if ch.data and \ +				  ch.data[0].chtype == chunk_type(CSNAME) and \ +				  s(buf, ch.data[0].data) in fontchanges.keys(): +				k = s(buf, ch.data[0].data) +				del ch.data[0] +				pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k])) +				length, i = length+1, i+1 + +			# recursively parse the contents of the group +			changeit(buf, ch.data) + +		elif ch.chtype == chunk_type(IF): +			# \if... +			flag, negate, data = ch.data +			print 'IF: flag, negate = ' + `flag, negate` +			if flag not in flags.keys(): +				raise error, 'unknown flag ' + `flag` +				 +			value = flags[flag] +			if negate: +				value = (not value) +			del pp[i-1] +			length, i = length-1, i-1 +			if value: +				pp[i:i] = data +				length = length + len(data) +					 +			 +		elif ch.chtype == chunk_type(ENV): +			# \begin{...} .... +			envname, data = ch.data + +			#push this environment name on stack +			hist.inenv.insert(0, envname) + +			#append an endenv chunk after grouped data +			data.append(chunk(ENDENV, ch.where, envname)) +			##[`data`] + +			#delete this object +			del pp[i-1] +			i, length = i-1, length-1 + +			#insert found data +			pp[i:i] = data +			length = length + len(data) + +			if envname == 'verbatim': +				pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \ +					  chunk(GROUP, ch.where, [])] +				length, i = length+2, i+2 + +			elif envname == 'itemize': +				if hist.itemizenesting > len(itemizesymbols): +					raise error, 'too deep itemize nesting' +				ingroupch = [chunk(CSNAME, ch.where,\ +					  itemizesymbols[hist.itemizenesting])] +				hist.itemizenesting = hist.itemizenesting + 1 +				pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\ +					  chunk(GROUP, ch.where, ingroupch)] +				length, i = length+2, i+2 +				 +			elif envname == 'enumerate': +				if hist.enumeratenesting > len(enumeratesymbols): +					raise error, 'too deep enumerate nesting' +				ingroupch = [chunk(PLAIN, ch.where,\ +					  enumeratesymbols[hist.enumeratenesting])] +				hist.enumeratenesting = hist.enumeratenesting + 1 +				pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\ +					  chunk(GROUP, ch.where, ingroupch)] +				length, i = length+2, i+2 +				 +			elif envname == 'description': +				ingroupch = [chunk(CSNAME, ch.where, 'b')] +				pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \ +					  chunk(GROUP, ch.where, ingroupch)] +				length, i = length+2, i+2 +				 +			elif envname == 'tableiii': +				wh = ch.where +				newcode = [] +				 +				#delete tabular format description +				# e.g., {|l|c|l|} +				length, newi = getnextarg(length, buf, pp, i) +				del pp[i:newi] +				length = length - (newi-i) + +				newcode.append(chunk(CSLINE, wh, 'table')) +				ingroupch = [chunk(CSNAME, wh, 'asis')] +				newcode.append(chunk(GROUP, wh, ingroupch)) +				newcode.append(chunk(CSLINE, wh, 'item')) + +				#get the name of macro for @item +				# e.g., {code} +				length, newi = getnextarg(length, buf, pp, i) +				 +				if newi-i != 1: +					raise error, 'Sorry, expected 1 chunk argument' +				if pp[i].chtype != chunk_type(PLAIN): +					raise error, 'Sorry, expected plain text argument' +				hist.itemargmacro = s(buf, pp[i].data) +				del pp[i:newi] +				length = length - (newi-i) + +				for count in range(3): +					length, newi = getnextarg(length, buf, pp, i) +					emphgroup = [\ +						  chunk(CSNAME, wh, 'emph'), \ +						  chunk(GROUP, 0, pp[i:newi])] +					del pp[i:newi] +					length = length - (newi-i) +					if count == 0: +						itemarg = emphgroup +					elif count == 2: +						itembody = itembody + \ +							  [chunk(PLAIN, wh, '  ---  ')] + \ +							  emphgroup +					else: +						itembody = emphgroup +				newcode.append(chunk(GROUP, wh, itemarg)) +				newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')] +				pp[i:i] = newcode +				l = len(newcode) +				length, i = length+l, i+l +				del newcode, l + +				if length != len(pp): +					raise 'STILL, SOMETHING wrong', `i` + + +			elif envname == 'funcdesc': +				pp.insert(i, chunk(PLAIN, ch.where, '')) +				i, length = i+1, length+1 +				length, i = do_funcdesc(length, buf, pp, i) +					   +			elif envname == 'excdesc': +				pp.insert(i, chunk(PLAIN, ch.where, '')) +				i, length = i+1, length+1 +				length, i = do_excdesc(length, buf, pp, i) +					   +			elif envname == 'datadesc': +				pp.insert(i, chunk(PLAIN, ch.where, '')) +				i, length = i+1, length+1 +				length, i = do_datadesc(length, buf, pp, i) +					   +			else: +				print 'WARNING: don\'t know what to do with env ' + `envname` +				 +		elif ch.chtype == chunk_type(ENDENV): +			envname = ch.data +			if envname != hist.inenv[0]: +				raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]` +			del hist.inenv[0] +			del pp[i-1] +			i, length = i-1, length-1 + +			if envname == 'verbatim': +				pp[i:i] = [\ +					  chunk(CSLINE, ch.where, 'end'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, 'example')])] +				i, length = i+2, length+2 +			elif envname == 'itemize': +				hist.itemizenesting = hist.itemizenesting - 1 +				pp[i:i] = [\ +					  chunk(CSLINE, ch.where, 'end'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, 'itemize')])] +				i, length = i+2, length+2 +			elif envname == 'enumerate': +				hist.enumeratenesting = hist.enumeratenesting-1 +				pp[i:i] = [\ +					  chunk(CSLINE, ch.where, 'end'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, 'enumerate')])] +				i, length = i+2, length+2 +			elif envname == 'description': +				pp[i:i] = [\ +					  chunk(CSLINE, ch.where, 'end'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, 'table')])] +				i, length = i+2, length+2 +			elif envname == 'tableiii': +				pp[i:i] = [\ +					  chunk(CSLINE, ch.where, 'end'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, 'table')])] +				i, length = i+2, length + 2 +				pp.insert(i, chunk(DENDLINE, ch.where, '\n')) +				i, length = i+1, length+1 + +			elif envname in ('funcdesc', 'excdesc', 'datadesc'): +				pp[i:i] = [\ +					  chunk(CSLINE, ch.where, 'end'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, hist.command)])] +				i, length = i+2, length+2 +			else: +				print 'WARNING: ending env ' + `envname` + 'has no actions' +				 +		elif ch.chtype == chunk_type(CSNAME): +			# control name transformations +			if s(buf, ch.data) in ignoredcommands: +				del pp[i-1] +				i, length = i-1, length-1 +			elif s(buf, ch.data) == '@' and \ +				  i != length and \ +				  pp[i].chtype == chunk_type(PLAIN) and \ +				  s(buf, pp[i].data)[0] == '.': +				# \@. --> \. --> @. +				ch.data = '.' +				del pp[i] +				length = length-1 +			elif s(buf, ch.data) == '\\': +				# \\ --> \* --> @* +				ch.data = '*' +			elif len(s(buf, ch.data)) == 1 and \ +				  s(buf, ch.data) in onlylatexspecial: +				ch.chtype = chunk_type(PLAIN) +				# check if such a command is followed by +				# an empty group: e.g., `\%{}'.  If so, remove +				# this empty group too +				if i < length and \ +					  pp[i].chtype == chunk_type(GROUP) \ +					  and len(pp[i].data) == 0: +					del pp[i] +					length = length-1 + +			elif hist.inargs and s(buf, ch.data) in inargsselves: +				# This is the special processing of the +				# arguments of the \begin{funcdesc}... or +				# \funcline... arguments +				# \, --> , \[ --> [, \] --> ] +				ch.chtype = chunk_type(PLAIN) +				 +			elif s(buf, ch.data) == 'renewcommand': +				# \renewcommand{\indexsubitem}.... +				i, length = i-1, length-1 +				del pp[i] +				length, newi = getnextarg(length, buf, pp, i) +				if newi-i == 1 \ +					  and i < length \ +					  and pp[i].chtype == chunk_type(CSNAME) \ +					  and s(buf, pp[i].data) == 'indexsubitem': +					del pp[i:newi] +					length = length - (newi-i) +					length, newi = getnextarg(length, buf, pp, i) +					text = flattext(buf, pp[i:newi]) +					if text[0] != '(' or text[-1] != ')': +						raise error, 'expected indexsubitme enclosed in braces' +					words = string.split(text[1:-1]) +					hist.indexsubitem = words +					del text, words +				else: +					print 'WARNING: renewcommand with unsupported arg removed' +				del pp[i:newi] +				length = length - (newi-i) + +			elif s(buf, ch.data) == 'item': +				ch.chtype = chunk_type(CSLINE) +				length, newi = getoptarg(length, buf, pp, i) +				ingroupch = pp[i:newi] +				del pp[i:newi] +				length = length - (newi-i) +				pp.insert(i, chunk(GROUP, ch.where, ingroupch)) +				i, length = i+1, length+1 +				 +			elif s(buf, ch.data) == 'ttindex': +				idxsi = hist.indexsubitem + +				cat_class = '' +				if len(idxsi) >= 2 and idxsi[1] in \ +					  ('method', 'function'): +					command = 'findex' +				elif len(idxsi) >= 2 and idxsi[1] in \ +					  ('exception', 'object'): +					command = 'vindex' +				else: +					print 'WARNING: can\'t catagorize ' + `idxsi` + ' for \'ttindex\' command' +					command = 'cindex' + +				if not cat_class: +					cat_class = '('+string.join(idxsi)+')' + +				ch.chtype = chunk_type(CSLINE) +				ch.data = command + +				length, newi = getnextarg(length, buf, pp, i) +				arg = pp[i:newi] +				del pp[i:newi] +				length = length - (newi-i) + +				cat_arg = [chunk(PLAIN, ch.where, cat_class)] + +				# determine what should be set in roman, and +				# what in tt-font +				if command in regindices: + +					arg = [chunk(CSNAME, ch.where, 't'), \ +						  chunk(GROUP, ch.where, arg)] +				else: +					cat_arg = [chunk(CSNAME, ch.where, 'r'), \ +						  chunk(GROUP, ch.where, cat_arg)] + +				ingroupch = arg + \ +					  [chunk(PLAIN, ch.where, ' ')] + \ +					  cat_arg +				 +				pp.insert(i, chunk(GROUP, ch.where, ingroupch)) +				length, i = length+1, i+1 + +				 +			elif s(buf, ch.data) == 'ldots': +				# \ldots --> \dots{} --> @dots{} +				ch.data = 'dots' +				if i == length \ +					  or pp[i].chtype != chunk_type(GROUP) \ +					  or pp[i].data != []: +					pp.insert(i, chunk(GROUP, ch.where, [])) +					i, length = i+1, length+1 +			elif s(buf, ch.data) in wordsselves: +				# \UNIX --> UNIX +				ch.chtype = chunk_type(PLAIN) +				if i != length \ +					  and pp[i].chtype == chunk_type(GROUP) \ +					  and pp[i].data == []: +					del pp[i] +					length = length-1 +			elif s(buf, ch.data) in for_texi: +				pass + +			elif s(buf, ch.data) == 'e': +				# \e --> \ +				ch.data = '\\' +				ch.chtype = chunk_type(PLAIN) +			elif s(buf, ch.data) == 'lineiii': +				# This is the most tricky one +				# \lineiii{a1}{a2}[{a3}] --> +				# @item @<cts. of itemargmacro>{a1} +				#  a2 [ -- a3] +				# +				##print 'LINEIIIIII!!!!!!!' +##				wobj = Wobj().init() +##				dumpit(buf, wobj.write, pp[i-1:i+5]) +##				print '--->' + wobj.data + '<----' +				if not hist.inenv: +					raise error, \ +						  'no environment for lineiii' +				if hist.inenv[0] != 'tableiii': +					raise error, \ +						  'wrong command (' + \ +						  `'lineiii'` + \ +						  ') in wrong environment (' \ +						  + `hist.inenv[0]` + ')' +				ch.chtype = chunk_type(CSLINE) +				ch.data = 'item' +				length, newi = getnextarg(length, buf, pp, i) +				ingroupch = [chunk(CSNAME, 0, \ +					  hist.itemargmacro), \ +					  chunk(GROUP, 0, pp[i:newi])] +				del pp[i:newi] +				length = length - (newi-i) +##				print 'ITEM ARG: --->', +##				wobj = Wobj().init() +##				dumpit(buf, wobj.write, ingroupch) +##				print wobj.data, '<---' +				pp.insert(i, chunk(GROUP, ch.where, ingroupch)) +				grouppos = i +				i, length = i+1, length+1 +				length, i = getnextarg(length, buf, pp, i) +				length, newi = getnextarg(length, buf, pp, i) +				if newi > i: +					# we have a 3rd arg +					pp.insert(i, chunk(PLAIN, ch.where, '  ---  ')) +					i = newi + 1 +					length = length + 1 +##					pp[grouppos].data = pp[grouppos].data \ +##						  + [chunk(PLAIN, ch.where, '  ')] \ +##						  + pp[i:newi] +##					del pp[i:newi] +##					length = length - (newi-i) +				if length != len(pp): +					raise 'IN LINEIII IS THE ERR', `i` + +			elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'): +				#\xxxsection{A} ----> +				# @node A, , , +				# @xxxsection A +				## also: remove commas and quotes +				ch.chtype = chunk_type(CSLINE) +				length, newi = getnextarg(length, buf, pp, i) +				afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename') +				if afternodenamecmd < 0: +					cp1 = crcopy(pp[i:newi]) +					pp[i:newi] = [\ +						  chunk(GROUP, ch.where, \ +						  pp[i:newi])] +					length, newi = length - (newi-i) + 1, \ +						  i+1 +					text = flattext(buf, cp1) +					text = invent_node_names(text) +				else: +					length, endarg = getnextarg(length, buf, pp, afternodenamecmd) +					cp1 = crcopy(pp[afternodenamecmd:endarg]) +					del pp[newi:endarg] +					length = length - (endarg-newi) + +					pp[i:newi] = [\ +						  chunk(GROUP, ch.where, \ +						  pp[i:newi])] +					length, newi = length - (newi-i) + 1, \ +						  i + 1 +					text = flattext(buf, cp1) +				if text[-1] == '.': +					text = text[:-1] +				print 'FLATTEXT:', `text` +				if text in hist.nodenames: +					print 'WARNING: node name ' + `text` + ' already used' +					out.doublenodes.append(text) +				else: +					hist.nodenames.append(text) +				text = rm_commas_etc(text) +				pp[i-1:i-1] = [\ +					  chunk(CSLINE, ch.where, 'node'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, text+', , ,')\ +					  ])] +				i, length = newi+2, length+2 + +			elif s(buf,ch.data) == 'funcline': +				# fold it to a very short environment +				pp[i-1:i-1] = [\ +					  chunk(CSLINE, ch.where, 'end'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, hist.command)])] +				i, length = i+2, length+2 +				length, i = do_funcdesc(length, buf, pp, i) +				 +			elif s(buf,ch.data) == 'dataline': +				pp[i-1:i-1] = [\ +					  chunk(CSLINE, ch.where, 'end'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, hist.command)])] +				i, length = i+2, length+2 +				length, i = do_datadesc(length, buf, pp, i) +				 +			elif s(buf,ch.data) == 'excline': +				pp[i-1:i-1] = [\ +					  chunk(CSLINE, ch.where, 'end'), \ +					  chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, hist.command)])] +				i, length = i+2, length+2 +				length, i = do_excdesc(length, buf, pp, i) +				 +				 +			elif s(buf, ch.data) == 'index': +				#\index{A} ---> +				# @cindex A +				ch.chtype = chunk_type(CSLINE) +				ch.data = 'cindex' +				length, newi = getnextarg(length, buf, pp, i) + +				ingroupch = pp[i:newi] +				del pp[i:newi] +				length = length - (newi-i) +				pp.insert(i, chunk(GROUP, ch.where, ingroupch)) +				length, i = length+1, i+1 + +			elif s(buf, ch.data) == 'bifuncindex': +				ch.chtype = chunk_type(CSLINE) +				ch.data = 'findex' +				length, newi = getnextarg(length, buf, pp, i) +				ingroupch = pp[i:newi] +				del pp[i:newi] +				length = length - (newi-i) + +				ingroupch.append(chunk(PLAIN, ch.where, ' ')) +				ingroupch.append(chunk(CSNAME, ch.where, 'r')) +				ingroupch.append(chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, \ +					  '(built-in function)')])) + +				pp.insert(i, chunk(GROUP, ch.where, ingroupch)) +				length, i = length+1, i+1 +				 +				 +			elif s(buf, ch.data) == 'opindex': +				ch.chtype = chunk_type(CSLINE) +				ch.data = 'findex' +				length, newi = getnextarg(length, buf, pp, i) +				ingroupch = pp[i:newi] +				del pp[i:newi] +				length = length - (newi-i) + +				ingroupch.append(chunk(PLAIN, ch.where, ' ')) +				ingroupch.append(chunk(CSNAME, ch.where, 'r')) +				ingroupch.append(chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, \ +					  '(operator)')])) + +				pp.insert(i, chunk(GROUP, ch.where, ingroupch)) +				length, i = length+1, i+1 + + +			elif s(buf, ch.data) == 'bimodindex': +				ch.chtype = chunk_type(CSLINE) +				ch.data = 'pindex' +				length, newi = getnextarg(length, buf, pp, i) +				ingroupch = pp[i:newi] +				del pp[i:newi] +				length = length - (newi-i) + +				ingroupch.append(chunk(PLAIN, ch.where, ' ')) +				ingroupch.append(chunk(CSNAME, ch.where, 'r')) +				ingroupch.append(chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, \ +					  '(built-in)')])) + +				pp.insert(i, chunk(GROUP, ch.where, ingroupch)) +				length, i = length+1, i+1 + +			elif s(buf, ch.data) == 'sectcode': +				ch.data = 'code' + + +			elif s(buf, ch.data) == 'stmodindex': +				ch.chtype = chunk_type(CSLINE) +				# use the program index as module index +				ch.data = 'pindex' +				length, newi = getnextarg(length, buf, pp, i) +				ingroupch = pp[i:newi] +				del pp[i:newi] +				length = length - (newi-i) + +				ingroupch.append(chunk(PLAIN, ch.where, ' ')) +				ingroupch.append(chunk(CSNAME, ch.where, 'r')) +				ingroupch.append(chunk(GROUP, ch.where, [\ +					  chunk(PLAIN, ch.where, \ +					  '(standard)')])) + +				pp.insert(i, chunk(GROUP, ch.where, ingroupch)) +				length, i = length+1, i+1 + + +			elif s(buf, ch.data) == 'stindex': +				# XXX must actually go to newindex st +				wh = ch.where +				ch.chtype = chunk_type(CSLINE) +				ch.data = 'cindex' +				length, newi = getnextarg(length, buf, pp, i) +				ingroupch = [chunk(CSNAME, wh, 'code'), \ +					  chunk(GROUP, wh, pp[i:newi])] + +				del pp[i:newi] +				length = length - (newi-i) + +				t = ingroupch[:] +				t.append(chunk(PLAIN, wh, ' statement')) + +				pp.insert(i, chunk(GROUP, wh, t)) +				i, length = i+1, length+1 + +				pp.insert(i, chunk(CSLINE, wh, 'cindex')) +				i, length = i+1, length+1 + +				t = ingroupch[:] +				t.insert(0, chunk(PLAIN, wh, 'statement, ')) +				 +				pp.insert(i, chunk(GROUP, wh, t)) +				i, length = i+1, length+1 +				 + +			elif s(buf, ch.data) == 'indexii': +				#\indexii{A}{B} ---> +				# @cindex A B +				# @cindex B, A +				length, newi = getnextarg(length, buf, pp, i) +				cp11 = pp[i:newi] +				cp21 = crcopy(pp[i:newi]) +				del pp[i:newi] +				length = length - (newi-i) +				length, newi = getnextarg(length, buf, pp, i) +				cp12 = pp[i:newi] +				cp22 = crcopy(pp[i:newi]) +				del pp[i:newi] +				length = length - (newi-i) + +				ch.chtype = chunk_type(CSLINE) +				ch.data = 'cindex' +				pp.insert(i, chunk(GROUP, ch.where, cp11 + [\ +					  chunk(PLAIN, ch.where, ' ')] + cp12)) +				i, length = i+1, length+1 +				pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \ +					  chunk(GROUP, ch.where, cp22 + [\ +					  chunk(PLAIN, ch.where, ', ')]+ cp21)] +				i, length = i+2, length+2 + +			elif s(buf, ch.data) == 'indexiii': +				length, newi = getnextarg(length, buf, pp, i) +				cp11 = pp[i:newi] +				cp21 = crcopy(pp[i:newi]) +				cp31 = crcopy(pp[i:newi]) +				del pp[i:newi] +				length = length - (newi-i) +				length, newi = getnextarg(length, buf, pp, i) +				cp12 = pp[i:newi] +				cp22 = crcopy(pp[i:newi]) +				cp32 = crcopy(pp[i:newi]) +				del pp[i:newi] +				length = length - (newi-i) +				length, newi = getnextarg(length, buf, pp, i) +				cp13 = pp[i:newi] +				cp23 = crcopy(pp[i:newi]) +				cp33 = crcopy(pp[i:newi]) +				del pp[i:newi] +				length = length - (newi-i) + +				ch.chtype = chunk_type(CSLINE) +				ch.data = 'cindex' +				pp.insert(i, chunk(GROUP, ch.where, cp11 + [\ +					  chunk(PLAIN, ch.where, ' ')] + cp12 \ +					  + [chunk(PLAIN, ch.where, ' ')] \ +					  + cp13)) +				i, length = i+1, length+1 +				pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \ +					  chunk(GROUP, ch.where, cp22 + [\ +					  chunk(PLAIN, ch.where, ' ')]+ cp23\ +					  + [chunk(PLAIN, ch.where, ', ')] +\ +					  cp21)] +				i, length = i+2, length+2 +				pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \ +					  chunk(GROUP, ch.where, cp33 + [\ +					  chunk(PLAIN, ch.where, ', ')]+ cp31\ +					  + [chunk(PLAIN, ch.where, ' ')] +\ +					  cp32)] +				i, length = i+2, length+2 +				 +				 +			elif s(buf, ch.data) == 'indexiv': +				length, newi = getnextarg(length, buf, pp, i) +				cp11 = pp[i:newi] +				cp21 = crcopy(pp[i:newi]) +				cp31 = crcopy(pp[i:newi]) +				cp41 = crcopy(pp[i:newi]) +				del pp[i:newi] +				length = length - (newi-i) +				length, newi = getnextarg(length, buf, pp, i) +				cp12 = pp[i:newi] +				cp22 = crcopy(pp[i:newi]) +				cp32 = crcopy(pp[i:newi]) +				cp42 = crcopy(pp[i:newi]) +				del pp[i:newi] +				length = length - (newi-i) +				length, newi = getnextarg(length, buf, pp, i) +				cp13 = pp[i:newi] +				cp23 = crcopy(pp[i:newi]) +				cp33 = crcopy(pp[i:newi]) +				cp43 = crcopy(pp[i:newi]) +				del pp[i:newi] +				length = length - (newi-i) +				length, newi = getnextarg(length, buf, pp, i) +				cp14 = pp[i:newi] +				cp24 = crcopy(pp[i:newi]) +				cp34 = crcopy(pp[i:newi]) +				cp44 = crcopy(pp[i:newi]) +				del pp[i:newi] +				length = length - (newi-i) + +				ch.chtype = chunk_type(CSLINE) +				ch.data = 'cindex' +				ingroupch = cp11 + \ +					  spacech + cp12 + \ +					  spacech + cp13 + \ +					  spacech + cp14 +				pp.insert(i, chunk(GROUP, ch.where, ingroupch)) +				i, length = i+1, length+1 +				ingroupch = cp22 + \ +					  spacech + cp23 + \ +					  spacech + cp24 + \ +					  commach + cp21 +				pp[i:i] = cindexch + [\ +					  chunk(GROUP, ch.where, ingroupch)] +				i, length = i+2, length+2 +				ingroupch = cp33 + \ +					  spacech + cp34 + \ +					  commach + cp31 + \ +					  spacech + cp32 +				pp[i:i] = cindexch + [\ +					  chunk(GROUP, ch.where, ingroupch)] +				i, length = i+2, length+2 +				ingroupch = cp44 + \ +					  commach + cp41 + \ +					  spacech + cp42 + \ +					  spacech + cp43 +				pp[i:i] = cindexch + [\ +					  chunk(GROUP, ch.where, ingroupch)] +				i, length = i+2, length+2 +				 +				 + +			else: +				print 'don\'t know what to do with keyword ' + `s(buf, ch.data)` + + +	 +re_atsign = regex.compile('[@{}]') +re_newline = regex.compile('\n') + +def dumpit(buf, wm, pp): + +	global out +	 +	i, length = 0, len(pp) + +	addspace = 0 +	 +	while 1: +		if len(pp) != length: +			raise 'FATAL', 'inconsistent length' +		if i == length: +			break +		ch = pp[i] +		i = i + 1 + +		if addspace: +			dospace = 1 +			addspace = 0 +		else: +			dospace = 0 + +		if ch.chtype == chunk_type(CSNAME): +			wm('@' + s(buf, ch.data)) +			if s(buf, ch.data) == 'node' and \ +				  pp[i].chtype == chunk_type(PLAIN) and \ +				  s(buf, pp[i].data) in out.doublenodes: +				##XXX doesnt work yet?? +				wm(' ZZZ-' + zfill(`i`, 4)) +			if s(buf, ch.data)[0] in string.letters: +				addspace = 1 +		elif ch.chtype == chunk_type(PLAIN): +			if dospace and s(buf, ch.data) not in (' ', '\t'): +				wm(' ') +			text = s(buf, ch.data) +			while 1: +				pos = re_atsign.search(text) +				if pos < 0: +					break +				wm(text[:pos] + '@' + text[pos]) +				text = text[pos+1:] +			wm(text) +		elif ch.chtype == chunk_type(GROUP): +			wm('{') +			dumpit(buf, wm, ch.data) +			wm('}') +		elif ch.chtype == chunk_type(DENDLINE): +			wm('\n\n') +			while i != length and pp[i].chtype in \ +				  (chunk_type(DENDLINE), chunk_type(ENDLINE)): +				i = i + 1 +		elif ch.chtype == chunk_type(OTHER): +			wm(s(buf, ch.data)) +		elif ch.chtype == chunk_type(ACTIVE): +			wm(s(buf, ch.data)) +		elif ch.chtype == chunk_type(ENDLINE): +			wm('\n') +		elif ch.chtype == chunk_type(CSLINE): +			if i >= 2 and pp[i-2].chtype not in \ +				  (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ +				  and (pp[i-2].chtype != chunk_type(PLAIN) \ +				  or s(buf, pp[i-2].data)[-1] != '\n'): +				   +				wm('\n') +			wm('@' + s(buf, ch.data)) +			if i == length: +				raise error, 'CSLINE expected another chunk' +			if pp[i].chtype != chunk_type(GROUP): +				raise error, 'CSLINE expected GROUP' +			if type(pp[i].data) != type([]): +				raise error, 'GROUP chould contain []-data' +			 +			wobj = Wobj().init() +			dumpit(buf, wobj.write, pp[i].data) +			i = i + 1 +			text = wobj.data +			del wobj +			if text: +				wm(' ') +				while 1: +					pos = re_newline.search(text) +					if pos < 0: +						break +					print 'WARNING: found newline in csline arg' +					wm(text[:pos] + ' ') +					text = text[pos+1:] +				wm(text) +			if i >= length or \ +				  pp[i].chtype not in (chunk_type(CSLINE), \ +				  chunk_type(ENDLINE), chunk_type(DENDLINE)) \ +				  and (pp[i].chtype != chunk_type(PLAIN) \ +				  or s(buf, pp[i].data)[0] != '\n'): +				wm('\n') +			 +		elif ch.chtype == chunk_type(COMMENT): +			print 'COMMENT: previous chunk =', pp[i-2] +			if pp[i-2].chtype == chunk_type(PLAIN): +				print 'PLAINTEXT =', `s(buf, pp[i-2].data)` +			if s(buf, ch.data) and \ +				  regex.match('^[ \t]*$', s(buf, ch.data)) < 0: +				if i >= 2 and pp[i-2].chtype not in \ +					  (chunk_type(ENDLINE), chunk_type(DENDLINE)) \ +					  and not (pp[i-2].chtype == chunk_type(PLAIN) \ +					  and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0): +					print 'ADDING NEWLINE' +					wm('\n') +				wm('@c ' + s(buf, ch.data)) +		elif ch.chtype == chunk_type(IGNORE): +			pass +		else: +			try: +				str = `s(buf, ch.data)` +			except TypeError: +				str = `ch.data` +			if len(str) > 400: +				str = str[:400] + '...' +			print 'warning:', ch.chtype, 'not handled, data ' + str + + + +from posix import popen + +def main(): + +	 +	buf = open(sys.argv[1], 'r').read() +	restargs = sys.argv[2:] + +	w, pp = parseit(buf) +	startchange() +##	try: +	while 1: +		changeit(buf, pp) +##		pass +		break + +##	finally: +	while 1: +		outf = open('@out.texi', 'w') +		preamble = open('texipre.dat', 'r') +		while 1: +			l = preamble.readline() +			if not l: +				preamble.close() +				break +			outf.write(l) +		 +		dumpit(buf, outf.write, pp) + +		while restargs: +			del buf, pp +			buf = open(restargs[0], 'r').read() +			del restargs[0] +			w, pp = parseit(buf) +			startchange() +			changeit(buf, pp) +			dumpit(buf, outf.write, pp) + +		postamble = open('texipost.dat', 'r') +		while 1: +			l = postamble.readline() +			if not l: +				postamble.close() +				break +			outf.write(l) +		 +		outf.close() + +##		pass +		break +	 +	 | 
