summaryrefslogtreecommitdiff
path: root/cmd2/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'cmd2/parsing.py')
-rw-r--r--cmd2/parsing.py292
1 files changed, 292 insertions, 0 deletions
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
new file mode 100644
index 00000000..ffeb8bbe
--- /dev/null
+++ b/cmd2/parsing.py
@@ -0,0 +1,292 @@
+#
+# -*- coding: utf-8 -*-
+"""Command parsing classes for cmd2"""
+
+import re
+import shlex
+
+import cmd2
+
+BLANK_LINE = '\n'
+
+class Statement(str):
+ """String subclass with additional attributes to store the results of parsing.
+
+ The cmd module in the standard library passes commands around as a
+ string. To retain backwards compatibility, cmd2 does the same. However, we
+ need a place to capture the additional output of the command parsing, so we add
+ our own attributes to this subclass.
+
+ The string portion of the class contains the arguments, but not the command, nor
+ the output redirection clauses.
+ """
+ def __init__(self, object):
+ self.raw = str(object)
+ self.command = None
+ self.multilineCommand = None
+ # has to be an empty string for compatibility with standard library cmd
+ self.args = ''
+ self.terminator = None
+ self.suffix = None
+ self.pipeTo = None
+ self.output = None
+ self.outputTo = None
+
+class CommandParser():
+ """Parse raw text into command components.
+
+ Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion.
+ """
+ def __init__(
+ self,
+ quotes=['"', "'"],
+ allow_redirection=True,
+ redirection_chars=['|', '<', '>'],
+ terminators=[';'],
+ multilineCommands = [],
+ aliases = {},
+ shortcuts = [],
+ ):
+ self.quotes = quotes
+ self.allow_redirection = allow_redirection
+ self.redirection_chars = redirection_chars
+ self.terminators = terminators
+ self.multilineCommands = multilineCommands
+ self.aliases = aliases
+ self.shortcuts = shortcuts
+
+ def parseString(self, rawinput):
+ # strip C-style comments
+ # shlex will handle the python/shell style comments for us
+ def replacer(match):
+ s = match.group(0)
+ if s.startswith('/'):
+ # treat the removed comment as an empty string
+ return ''
+ else:
+ return s
+ pattern = re.compile(
+ #r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
+ r'/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
+ re.DOTALL | re.MULTILINE
+ )
+ rawinput = re.sub(pattern, replacer, rawinput)
+ line = rawinput
+
+ # expand shortcuts, have to do this first because
+ # a shortcut can expand into multiple tokens, ie '!ls' becomes
+ # 'shell ls'
+ for (shortcut, expansion) in self.shortcuts:
+ if line.startswith(shortcut):
+ # If the next character after the shortcut isn't a space, then insert one
+ shortcut_len = len(shortcut)
+ if len(line) == shortcut_len or line[shortcut_len] != ' ':
+ expansion += ' '
+
+ # Expand the shortcut
+ line = line.replace(shortcut, expansion, 1)
+ break
+
+ # handle the special case/hardcoded terminator of a blank line
+ # we have to do this before we shlex on whitespace because it
+ # destroys all unquoted whitespace in the input
+ terminator = None
+ if line[-1:] == BLANK_LINE:
+ terminator = BLANK_LINE
+
+ s = shlex.shlex(line, posix=False)
+ s.whitespace_split = True
+ tokens = self.split_on_punctuation(list(s))
+
+ # of the valid terminators, find the first one to occur in the input
+ terminator_pos = len(tokens)+1
+ for test_terminator in self.terminators:
+ try:
+ pos = tokens.index(test_terminator)
+ if pos < terminator_pos:
+ terminator_pos = pos
+ terminator = test_terminator
+ break
+ except ValueError:
+ # the terminator is not in the tokens
+ pass
+
+ if terminator:
+ if terminator == BLANK_LINE:
+ terminator_pos = len(tokens)+1
+ else:
+ terminator_pos = tokens.index(terminator)
+ # everything before the first terminator is the command and the args
+ (command, args) = self._command_and_args(tokens[:terminator_pos])
+ #terminator = tokens[terminator_pos]
+ # we will set the suffix later
+ # remove all the tokens before and including the terminator
+ tokens = tokens[terminator_pos+1:]
+
+ # check for input from file
+ inputFrom = None
+ try:
+ input_pos = tokens.index('<')
+ inputFrom = ' '.join(tokens[input_pos+1:])
+ tokens = tokens[:input_pos]
+ except ValueError:
+ pass
+
+
+ # check for output redirect
+ output = None
+ outputTo = None
+ try:
+ output_pos = tokens.index('>')
+ output = '>'
+ outputTo = ' '.join(tokens[output_pos+1:])
+ # remove all the tokens after the output redirect
+ tokens = tokens[:output_pos]
+ except ValueError:
+ pass
+
+ try:
+ output_pos = tokens.index('>>')
+ output = '>>'
+ outputTo = ' '.join(tokens[output_pos+1:])
+ # remove all tokens after the output redirect
+ tokens = tokens[:output_pos]
+ except ValueError:
+ pass
+
+ # check for pipes
+ try:
+ # find the first pipe if it exists
+ pipe_pos = tokens.index('|')
+ # set everything after the first pipe to result.pipeTo
+ pipeTo = ' '.join(tokens[pipe_pos+1:])
+ # remove all the tokens after the pipe
+ tokens = tokens[:pipe_pos]
+ except ValueError:
+ # no pipe in the tokens
+ pipeTo = None
+
+ if terminator:
+ # whatever is left is the suffix
+ suffix = ' '.join(tokens)
+ else:
+ # no terminator, so whatever is left is the command and the args
+ suffix = None
+ (command, args) = self._command_and_args(tokens)
+
+ # expand aliases
+ # make a copy of aliases so we can edit it
+ tmp_aliases = list(self.aliases.keys())
+ keep_expanding = len(tmp_aliases) > 0
+
+ while keep_expanding:
+ for cur_alias in tmp_aliases:
+ keep_expanding = False
+ if command == cur_alias:
+ command = self.aliases[cur_alias]
+ tmp_aliases.remove(cur_alias)
+ keep_expanding = len(tmp_aliases) > 0
+ break
+
+ # set multiline
+ if command in self.multilineCommands:
+ multilineCommand = command
+ # return no arguments if this is a "partial" command,
+ # i.e. we have a multiline command but no terminator yet
+ if not terminator:
+ args = ''
+ else:
+ multilineCommand = None
+
+ # build Statement object
+ result = Statement(args)
+ result.raw = rawinput
+ result.command = command
+ result.args = args
+ result.terminator = terminator
+ result.inputFrom = inputFrom
+ result.output = output
+ result.outputTo = outputTo
+ result.pipeTo = pipeTo
+ result.suffix = suffix
+ result.multilineCommand = multilineCommand
+ return result
+
+ def _command_and_args(self, tokens):
+ """given a list of tokens, and return a tuple of the command
+ and the args as a string.
+ """
+ command = None
+ args = ''
+
+ if tokens:
+ command = tokens[0]
+
+ if len(tokens) > 1:
+ args = ' '.join(tokens[1:])
+
+ return (command, args)
+
+ def split_on_punctuation(self, initial_tokens):
+ """
+ # Further splits tokens from a command line using punctuation characters
+ # as word breaks when they are in unquoted strings. Each run of punctuation
+ # characters is treated as a single token.
+
+ :param initial_tokens: the tokens as parsed by shlex
+ :return: the punctuated tokens
+ """
+ punctuation = []
+ punctuation.extend(self.terminators)
+ if self.allow_redirection:
+ punctuation.extend(self.redirection_chars)
+
+ punctuated_tokens = []
+
+ for cur_initial_token in initial_tokens:
+
+ # Save tokens up to 1 character in length or quoted tokens. No need to parse these.
+ if len(cur_initial_token) <= 1 or cur_initial_token[0] in self.quotes:
+ punctuated_tokens.append(cur_initial_token)
+ continue
+
+ # Iterate over each character in this token
+ cur_index = 0
+ cur_char = cur_initial_token[cur_index]
+
+ # Keep track of the token we are building
+ new_token = ''
+
+ while True:
+ if cur_char not in punctuation:
+
+ # Keep appending to new_token until we hit a punctuation char
+ while cur_char not in punctuation:
+ new_token += cur_char
+ cur_index += 1
+ if cur_index < len(cur_initial_token):
+ cur_char = cur_initial_token[cur_index]
+ else:
+ break
+
+ else:
+ cur_punc = cur_char
+
+ # Keep appending to new_token until we hit something other than cur_punc
+ while cur_char == cur_punc:
+ new_token += cur_char
+ cur_index += 1
+ if cur_index < len(cur_initial_token):
+ cur_char = cur_initial_token[cur_index]
+ else:
+ break
+
+ # Save the new token
+ punctuated_tokens.append(new_token)
+ new_token = ''
+
+ # Check if we've viewed all characters
+ if cur_index >= len(cur_initial_token):
+ break
+
+ return punctuated_tokens