diff options
Diffstat (limited to 'scripts/utility.py')
-rw-r--r-- | scripts/utility.py | 47 |
1 files changed, 32 insertions, 15 deletions
diff --git a/scripts/utility.py b/scripts/utility.py index d816e3fd..066775f1 100644 --- a/scripts/utility.py +++ b/scripts/utility.py @@ -7,41 +7,58 @@ """ import os +import os.path def unpack_output_file(path): """ Unpack an output file into objects contining the line number, the text, - and the token name. + and the token name. The output file can be either a ``.output`` file + containing a token stream, or a ``.txt`` with input and tokens. """ from collections import namedtuple entry = namedtuple('OutputEntry', ['text', 'token', 'linenumber']) + + skip_until_tokens = path.endswith('.txt') + for linenumber, line in enumerate(open(path).readlines()): line = line.strip() - if line: - # Line can start with ' or ", so let's check which one it is - # and find the matching one - quotation_start = 0 - quotation_end = line.rfind(line[0]) - text = line[quotation_start+1:quotation_end] - token = line.split()[-1] - text = text.replace('\\n', '\n') - text = text.replace('\\t', '\t') - yield entry(text, token, linenumber + 1) + if not line: + continue + + if skip_until_tokens: + if line != '---tokens---': + continue + else: + skip_until_tokens = False + + # Line can start with ' or ", so let's check which one it is + # and find the matching one + quotation_start = 0 + quotation_end = line.rfind(line[0]) + text = line[quotation_start+1:quotation_end] + token = line.split()[-1] + text = text.replace('\\n', '\n') + text = text.replace('\\t', '\t') + yield entry(text, token, linenumber + 1) def process_output_files(root_directory, callback): """ - Process all output files in a directory using the provided callback. - The callback should return `True` in case of success, `False` otherwise. + Process all output (i.e. .output and .txt files for snippets) files + in a directory tree using the provided callback. + The callback should return ``True`` in case of success, ``False`` + otherwise. The function returns the number of files for which the callback returned - `False`. + ``False``. """ errors = 0 for dir, _, files in os.walk(root_directory): for file in files: - if not file.endswith('.output'): + _, ext = os.path.splitext(file) + + if ext not in {'.txt', '.output'}: continue path = os.path.join(dir, file) |