diff options
Diffstat (limited to 'cmd2/parsing.py')
-rw-r--r-- | cmd2/parsing.py | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/cmd2/parsing.py b/cmd2/parsing.py new file mode 100644 index 00000000..ffeb8bbe --- /dev/null +++ b/cmd2/parsing.py @@ -0,0 +1,292 @@ +# +# -*- coding: utf-8 -*- +"""Command parsing classes for cmd2""" + +import re +import shlex + +import cmd2 + +BLANK_LINE = '\n' + +class Statement(str): + """String subclass with additional attributes to store the results of parsing. + + The cmd module in the standard library passes commands around as a + string. To retain backwards compatibility, cmd2 does the same. However, we + need a place to capture the additional output of the command parsing, so we add + our own attributes to this subclass. + + The string portion of the class contains the arguments, but not the command, nor + the output redirection clauses. + """ + def __init__(self, object): + self.raw = str(object) + self.command = None + self.multilineCommand = None + # has to be an empty string for compatibility with standard library cmd + self.args = '' + self.terminator = None + self.suffix = None + self.pipeTo = None + self.output = None + self.outputTo = None + +class CommandParser(): + """Parse raw text into command components. + + Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion. + """ + def __init__( + self, + quotes=['"', "'"], + allow_redirection=True, + redirection_chars=['|', '<', '>'], + terminators=[';'], + multilineCommands = [], + aliases = {}, + shortcuts = [], + ): + self.quotes = quotes + self.allow_redirection = allow_redirection + self.redirection_chars = redirection_chars + self.terminators = terminators + self.multilineCommands = multilineCommands + self.aliases = aliases + self.shortcuts = shortcuts + + def parseString(self, rawinput): + # strip C-style comments + # shlex will handle the python/shell style comments for us + def replacer(match): + s = match.group(0) + if s.startswith('/'): + # treat the removed comment as an empty string + return '' + else: + return s + pattern = re.compile( + #r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + r'/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + re.DOTALL | re.MULTILINE + ) + rawinput = re.sub(pattern, replacer, rawinput) + line = rawinput + + # expand shortcuts, have to do this first because + # a shortcut can expand into multiple tokens, ie '!ls' becomes + # 'shell ls' + for (shortcut, expansion) in self.shortcuts: + if line.startswith(shortcut): + # If the next character after the shortcut isn't a space, then insert one + shortcut_len = len(shortcut) + if len(line) == shortcut_len or line[shortcut_len] != ' ': + expansion += ' ' + + # Expand the shortcut + line = line.replace(shortcut, expansion, 1) + break + + # handle the special case/hardcoded terminator of a blank line + # we have to do this before we shlex on whitespace because it + # destroys all unquoted whitespace in the input + terminator = None + if line[-1:] == BLANK_LINE: + terminator = BLANK_LINE + + s = shlex.shlex(line, posix=False) + s.whitespace_split = True + tokens = self.split_on_punctuation(list(s)) + + # of the valid terminators, find the first one to occur in the input + terminator_pos = len(tokens)+1 + for test_terminator in self.terminators: + try: + pos = tokens.index(test_terminator) + if pos < terminator_pos: + terminator_pos = pos + terminator = test_terminator + break + except ValueError: + # the terminator is not in the tokens + pass + + if terminator: + if terminator == BLANK_LINE: + terminator_pos = len(tokens)+1 + else: + terminator_pos = tokens.index(terminator) + # everything before the first terminator is the command and the args + (command, args) = self._command_and_args(tokens[:terminator_pos]) + #terminator = tokens[terminator_pos] + # we will set the suffix later + # remove all the tokens before and including the terminator + tokens = tokens[terminator_pos+1:] + + # check for input from file + inputFrom = None + try: + input_pos = tokens.index('<') + inputFrom = ' '.join(tokens[input_pos+1:]) + tokens = tokens[:input_pos] + except ValueError: + pass + + + # check for output redirect + output = None + outputTo = None + try: + output_pos = tokens.index('>') + output = '>' + outputTo = ' '.join(tokens[output_pos+1:]) + # remove all the tokens after the output redirect + tokens = tokens[:output_pos] + except ValueError: + pass + + try: + output_pos = tokens.index('>>') + output = '>>' + outputTo = ' '.join(tokens[output_pos+1:]) + # remove all tokens after the output redirect + tokens = tokens[:output_pos] + except ValueError: + pass + + # check for pipes + try: + # find the first pipe if it exists + pipe_pos = tokens.index('|') + # set everything after the first pipe to result.pipeTo + pipeTo = ' '.join(tokens[pipe_pos+1:]) + # remove all the tokens after the pipe + tokens = tokens[:pipe_pos] + except ValueError: + # no pipe in the tokens + pipeTo = None + + if terminator: + # whatever is left is the suffix + suffix = ' '.join(tokens) + else: + # no terminator, so whatever is left is the command and the args + suffix = None + (command, args) = self._command_and_args(tokens) + + # expand aliases + # make a copy of aliases so we can edit it + tmp_aliases = list(self.aliases.keys()) + keep_expanding = len(tmp_aliases) > 0 + + while keep_expanding: + for cur_alias in tmp_aliases: + keep_expanding = False + if command == cur_alias: + command = self.aliases[cur_alias] + tmp_aliases.remove(cur_alias) + keep_expanding = len(tmp_aliases) > 0 + break + + # set multiline + if command in self.multilineCommands: + multilineCommand = command + # return no arguments if this is a "partial" command, + # i.e. we have a multiline command but no terminator yet + if not terminator: + args = '' + else: + multilineCommand = None + + # build Statement object + result = Statement(args) + result.raw = rawinput + result.command = command + result.args = args + result.terminator = terminator + result.inputFrom = inputFrom + result.output = output + result.outputTo = outputTo + result.pipeTo = pipeTo + result.suffix = suffix + result.multilineCommand = multilineCommand + return result + + def _command_and_args(self, tokens): + """given a list of tokens, and return a tuple of the command + and the args as a string. + """ + command = None + args = '' + + if tokens: + command = tokens[0] + + if len(tokens) > 1: + args = ' '.join(tokens[1:]) + + return (command, args) + + def split_on_punctuation(self, initial_tokens): + """ + # Further splits tokens from a command line using punctuation characters + # as word breaks when they are in unquoted strings. Each run of punctuation + # characters is treated as a single token. + + :param initial_tokens: the tokens as parsed by shlex + :return: the punctuated tokens + """ + punctuation = [] + punctuation.extend(self.terminators) + if self.allow_redirection: + punctuation.extend(self.redirection_chars) + + punctuated_tokens = [] + + for cur_initial_token in initial_tokens: + + # Save tokens up to 1 character in length or quoted tokens. No need to parse these. + if len(cur_initial_token) <= 1 or cur_initial_token[0] in self.quotes: + punctuated_tokens.append(cur_initial_token) + continue + + # Iterate over each character in this token + cur_index = 0 + cur_char = cur_initial_token[cur_index] + + # Keep track of the token we are building + new_token = '' + + while True: + if cur_char not in punctuation: + + # Keep appending to new_token until we hit a punctuation char + while cur_char not in punctuation: + new_token += cur_char + cur_index += 1 + if cur_index < len(cur_initial_token): + cur_char = cur_initial_token[cur_index] + else: + break + + else: + cur_punc = cur_char + + # Keep appending to new_token until we hit something other than cur_punc + while cur_char == cur_punc: + new_token += cur_char + cur_index += 1 + if cur_index < len(cur_initial_token): + cur_char = cur_initial_token[cur_index] + else: + break + + # Save the new token + punctuated_tokens.append(new_token) + new_token = '' + + # Check if we've viewed all characters + if cur_index >= len(cur_initial_token): + break + + return punctuated_tokens |