#!/usr/bin/env python2.7 # WARNING! This a Python 2 script. Read README.rst for rationale. import os import re import sys from plex import Scanner, Str, Lexicon, Opt, Bol, State, AnyChar, TEXT, IGNORE from plex.traditional import re as Re try: from io import BytesIO as UStringIO # Python 2 except ImportError: from io import StringIO as UStringIO # Python 3 class MyScanner(Scanner): def __init__(self, info, name=''): Scanner.__init__(self, self.lexicon, info, name) def begin(self, state_name): Scanner.begin(self, state_name) def sep_seq(sequence, sep): pat = Str(sequence[0]) for s in sequence[1:]: pat += sep + Str(s) return pat def runScanner(data, scanner_class, lexicon=None): info = UStringIO(data) outfo = UStringIO() if lexicon is not None: scanner = scanner_class(lexicon, info) else: scanner = scanner_class(info) while True: value, text = scanner.read() if value is None: break elif value is IGNORE: pass else: outfo.write(value) return outfo.getvalue(), scanner class LenSubsScanner(MyScanner): """Following clapack, we remove ftnlen arguments, which f2c puts after a char * argument to hold the length of the passed string. This is just a nuisance in C. """ def __init__(self, info, name=''): MyScanner.__init__(self, info, name) self.paren_count = 0 def beginArgs(self, text): if self.paren_count == 0: self.begin('args') self.paren_count += 1 return text def endArgs(self, text): self.paren_count -= 1 if self.paren_count == 0: self.begin('') return text digits = Re('[0-9]+') iofun = Re(r'\([^;]*;') decl = Re(r'\([^)]*\)[,;'+'\n]') any = Re('[.]*') S = Re('[ \t\n]*') cS = Str(',') + S len_ = Re('[a-z][a-z0-9]*_len') iofunctions = Str("s_cat", "s_copy", "s_stop", "s_cmp", "i_len", "do_fio", "do_lio") + iofun # Routines to not scrub the ftnlen argument from keep_ftnlen = (Str('ilaenv_') | Str('iparmq_') | Str('s_rnge')) + Str('(') lexicon = Lexicon([ (iofunctions, TEXT), (keep_ftnlen, beginArgs), State('args', [ (Str(')'), endArgs), (Str('('), beginArgs), (AnyChar, TEXT), ]), (cS+Re(r'[1-9][0-9]*L'), IGNORE), (cS+Str('ftnlen')+Opt(S+len_), IGNORE), (cS+sep_seq(['(', 'ftnlen', ')'], S)+S+digits, IGNORE), (Bol+Str('ftnlen ')+len_+Str(';\n'), IGNORE), (cS+len_, TEXT), (AnyChar, TEXT), ]) def scrubFtnlen(source): return runScanner(source, LenSubsScanner)[0] def cleanSource(source): # remove whitespace at end of lines source = re.sub(r'[\t ]+\n', '\n', source) # remove comments like .. Scalar Arguments .. source = re.sub(r'(?m)^[\t ]*/\* *\.\. .*?\n', '', source) # collapse blanks of more than two in-a-row to two source = re.sub(r'\n\n\n\n+', r'\n\n\n', source) return source class LineQueue: def __init__(self): object.__init__(self) self._queue = [] def add(self, line): self._queue.append(line) def clear(self): self._queue = [] def flushTo(self, other_queue): for line in self._queue: other_queue.add(line) self.clear() def getValue(self): q = LineQueue() self.flushTo(q) s = ''.join(q._queue) self.clear() return s class CommentQueue(LineQueue): def __init__(self): LineQueue.__init__(self) def add(self, line): if line.strip() == '': LineQueue.add(self, '\n') else: line = ' ' + line[2:-3].rstrip() + '\n' LineQueue.add(self, line) def flushTo(self, other_queue): if len(self._queue) == 0: pass elif len(self._queue) == 1: other_queue.add('/*' + self._queue[0][2:].rstrip() + ' */\n') else: other_queue.add('/*\n') LineQueue.flushTo(self, other_queue) other_queue.add('*/\n') self.clear() # This really seems to be about 4x longer than it needs to be def cleanComments(source): lines = LineQueue() comments = CommentQueue() def isCommentLine(line): return line.startswith('/*') and line.endswith('*/\n') blanks = LineQueue() def isBlank(line): return line.strip() == '' def SourceLines(line): if isCommentLine(line): comments.add(line) return HaveCommentLines else: lines.add(line) return SourceLines def HaveCommentLines(line): if isBlank(line): blanks.add('\n') return HaveBlankLines elif isCommentLine(line): comments.add(line) return HaveCommentLines else: comments.flushTo(lines) lines.add(line) return SourceLines def HaveBlankLines(line): if isBlank(line): blanks.add('\n') return HaveBlankLines elif isCommentLine(line): blanks.flushTo(comments) comments.add(line) return HaveCommentLines else: comments.flushTo(lines) blanks.flushTo(lines) lines.add(line) return SourceLines state = SourceLines for line in UStringIO(source): state = state(line) comments.flushTo(lines) return lines.getValue() def removeHeader(source): lines = LineQueue() def LookingForHeader(line): m = re.match(r'/\*[^\n]*-- translated', line) if m: return InHeader else: lines.add(line) return LookingForHeader def InHeader(line): if line.startswith('*/'): return OutOfHeader else: return InHeader def OutOfHeader(line): if line.startswith('#include "f2c.h"'): pass else: lines.add(line) return OutOfHeader state = LookingForHeader for line in UStringIO(source): state = state(line) return lines.getValue() def removeSubroutinePrototypes(source): # This function has never worked as advertised by its name: # - "/* Subroutine */" declarations may span multiple lines and # cannot be matched by a line by line approach. # - The caret in the initial regex would prevent any match, even # of single line "/* Subroutine */" declarations. # # While we could "fix" this function to do what the name implies # it should do, we have no hint of what it should really do. # # Therefore we keep the existing (non-)functionaity, documenting # this function as doing nothing at all. return source def removeBuiltinFunctions(source): lines = LineQueue() def LookingForBuiltinFunctions(line): if line.strip() == '/* Builtin functions */': return InBuiltInFunctions else: lines.add(line) return LookingForBuiltinFunctions def InBuiltInFunctions(line): if line.strip() == '': return LookingForBuiltinFunctions else: return InBuiltInFunctions state = LookingForBuiltinFunctions for line in UStringIO(source): state = state(line) return lines.getValue() def replaceDlamch(source): """Replace dlamch_ calls with appropriate macros""" def repl(m): s = m.group(1) return dict(E='EPSILON', P='PRECISION', S='SAFEMINIMUM', B='BASE')[s[0]] source = re.sub(r'dlamch_\("(.*?)"\)', repl, source) source = re.sub(r'^\s+extern.*? dlamch_.*?;$(?m)', '', source) return source # do it def scrubSource(source, nsteps=None, verbose=False): steps = [ ('scrubbing ftnlen', scrubFtnlen), ('remove header', removeHeader), ('clean source', cleanSource), ('clean comments', cleanComments), ('replace dlamch_() calls', replaceDlamch), ('remove prototypes', removeSubroutinePrototypes), ('remove builtin function prototypes', removeBuiltinFunctions), ] if nsteps is not None: steps = steps[:nsteps] for msg, step in steps: if verbose: print(msg) source = step(source) return source if __name__ == '__main__': filename = sys.argv[1] outfilename = os.path.join(sys.argv[2], os.path.basename(filename)) with open(filename) as fo: source = fo.read() if len(sys.argv) > 3: nsteps = int(sys.argv[3]) else: nsteps = None source = scrub_source(source, nsteps, verbose=True) with open(outfilename, 'w') as writefo: writefo.write(source)