# # -*- coding: utf-8 -*- """Statement parsing classes for cmd2""" import re import shlex from typing import List, Tuple import cmd2 BLANK_LINE = '\n' class Statement(str): """String subclass with additional attributes to store the results of parsing. The cmd module in the standard library passes commands around as a string. To retain backwards compatibility, cmd2 does the same. However, we need a place to capture the additional output of the command parsing, so we add our own attributes to this subclass. The string portion of the class contains the arguments, but not the command, nor the output redirection clauses. """ def __init__(self, object): self.raw = str(object) self.command = None self.multilineCommand = None # has to be an empty string for compatibility with standard library cmd self.args = '' self.terminator = None self.suffix = None self.pipeTo = None self.output = None self.outputTo = None class StatementParser(): """Parse raw text into command components. Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion. """ def __init__( self, quotes=['"', "'"], allow_redirection=True, redirection_chars=['|', '<', '>'], terminators=[';'], multilineCommands = [], aliases = {}, shortcuts = [], ): self.quotes = quotes self.allow_redirection = allow_redirection self.redirection_chars = redirection_chars self.terminators = terminators self.multilineCommands = multilineCommands self.aliases = aliases self.shortcuts = shortcuts def parse(self, rawinput: str) -> Statement: # strip C-style comments # shlex will handle the python/shell style comments for us def replacer(match): s = match.group(0) if s.startswith('/'): # treat the removed comment as an empty string return '' else: return s pattern = re.compile( #r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', r'/\*.*?(\*/|$)|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE ) rawinput = re.sub(pattern, replacer, rawinput) line = rawinput command = None args = '' # expand shortcuts, have to do this first because # a shortcut can expand into multiple tokens, ie '!ls' becomes # 'shell ls' for (shortcut, expansion) in self.shortcuts: if line.startswith(shortcut): # If the next character after the shortcut isn't a space, then insert one shortcut_len = len(shortcut) if len(line) == shortcut_len or line[shortcut_len] != ' ': expansion += ' ' # Expand the shortcut line = line.replace(shortcut, expansion, 1) break # handle the special case/hardcoded terminator of a blank line # we have to do this before we shlex on whitespace because it # destroys all unquoted whitespace in the input terminator = None if line[-1:] == BLANK_LINE: terminator = BLANK_LINE s = shlex.shlex(line, posix=False) s.whitespace_split = True tokens = self.split_on_punctuation(list(s)) if tokens: command_to_expand = tokens[0] tokens[0] = self.expand_aliases(command_to_expand) # of the valid terminators, find the first one to occur in the input terminator_pos = len(tokens)+1 for test_terminator in self.terminators: try: pos = tokens.index(test_terminator) if pos < terminator_pos: terminator_pos = pos terminator = test_terminator break except ValueError: # the terminator is not in the tokens pass if terminator: if terminator == BLANK_LINE: terminator_pos = len(tokens)+1 else: terminator_pos = tokens.index(terminator) # everything before the first terminator is the command and the args (command, args) = self._command_and_args(tokens[:terminator_pos]) # we will set the suffix later # remove all the tokens before and including the terminator tokens = tokens[terminator_pos+1:] else: (testcommand, testargs) = self._command_and_args(tokens) if testcommand in self.multilineCommands: # no terminator on this line but we have a multiline command # everything else on the line is part of the args # because redirectors can only be after a terminator command = testcommand args = testargs tokens = [] # check for input from file inputFrom = None try: input_pos = tokens.index('<') inputFrom = ' '.join(tokens[input_pos+1:]) tokens = tokens[:input_pos] except ValueError: pass # check for output redirect output = None outputTo = None try: output_pos = tokens.index('>') output = '>' outputTo = ' '.join(tokens[output_pos+1:]) # remove all the tokens after the output redirect tokens = tokens[:output_pos] except ValueError: pass try: output_pos = tokens.index('>>') output = '>>' outputTo = ' '.join(tokens[output_pos+1:]) # remove all tokens after the output redirect tokens = tokens[:output_pos] except ValueError: pass # check for pipes try: # find the first pipe if it exists pipe_pos = tokens.index('|') # set everything after the first pipe to result.pipeTo pipeTo = ' '.join(tokens[pipe_pos+1:]) # remove all the tokens after the pipe tokens = tokens[:pipe_pos] except ValueError: # no pipe in the tokens pipeTo = None if terminator: # whatever is left is the suffix suffix = ' '.join(tokens) else: # no terminator, so whatever is left is the command and the args suffix = None if not command: # command could already have been set, if so, don't set it again (command, args) = self._command_and_args(tokens) # set multiline if command in self.multilineCommands: multilineCommand = command else: multilineCommand = None # build Statement object result = Statement(args) result.raw = rawinput result.command = command result.args = args result.terminator = terminator result.inputFrom = inputFrom result.output = output result.outputTo = outputTo result.pipeTo = pipeTo result.suffix = suffix result.multilineCommand = multilineCommand return result def expand_aliases(self, command: str) -> str: """Given a command, expand any aliases for the command""" # make a copy of aliases so we can edit it tmp_aliases = list(self.aliases.keys()) keep_expanding = len(tmp_aliases) > 0 while keep_expanding: for cur_alias in tmp_aliases: keep_expanding = False if command == cur_alias: command = self.aliases[cur_alias] tmp_aliases.remove(cur_alias) keep_expanding = len(tmp_aliases) > 0 break return command def _command_and_args(self, tokens: List[str]) -> Tuple[str, str]: """given a list of tokens, and return a tuple of the command and the args as a string. """ command = None args = '' if tokens: command = tokens[0] if len(tokens) > 1: args = ' '.join(tokens[1:]) return (command, args) def split_on_punctuation(self, tokens: List[str]) -> List[str]: """ # Further splits tokens from a command line using punctuation characters # as word breaks when they are in unquoted strings. Each run of punctuation # characters is treated as a single token. :param initial_tokens: the tokens as parsed by shlex :return: the punctuated tokens """ punctuation = [] punctuation.extend(self.terminators) if self.allow_redirection: punctuation.extend(self.redirection_chars) punctuated_tokens = [] for cur_initial_token in tokens: # Save tokens up to 1 character in length or quoted tokens. No need to parse these. if len(cur_initial_token) <= 1 or cur_initial_token[0] in self.quotes: punctuated_tokens.append(cur_initial_token) continue # Iterate over each character in this token cur_index = 0 cur_char = cur_initial_token[cur_index] # Keep track of the token we are building new_token = '' while True: if cur_char not in punctuation: # Keep appending to new_token until we hit a punctuation char while cur_char not in punctuation: new_token += cur_char cur_index += 1 if cur_index < len(cur_initial_token): cur_char = cur_initial_token[cur_index] else: break else: cur_punc = cur_char # Keep appending to new_token until we hit something other than cur_punc while cur_char == cur_punc: new_token += cur_char cur_index += 1 if cur_index < len(cur_initial_token): cur_char = cur_initial_token[cur_index] else: break # Save the new token punctuated_tokens.append(new_token) new_token = '' # Check if we've viewed all characters if cur_index >= len(cur_initial_token): break return punctuated_tokens