summaryrefslogtreecommitdiff
path: root/cmd2/parsing.py
diff options
context:
space:
mode:
authorkotfu <kotfu@kotfu.net>2018-04-30 00:01:47 -0600
committerkotfu <kotfu@kotfu.net>2018-04-30 00:01:47 -0600
commit119a6e49f1c37ea43059c74f7a2441a275731554 (patch)
tree63d09de62683e518791fa963379960a1d2bfec10 /cmd2/parsing.py
parent0140cf9425d2512935e2c53913efc83d383ba79d (diff)
downloadcmd2-git-119a6e49f1c37ea43059c74f7a2441a275731554.tar.gz
Fix nested alias and shortcut expansion
Diffstat (limited to 'cmd2/parsing.py')
-rw-r--r--cmd2/parsing.py168
1 files changed, 80 insertions, 88 deletions
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
index 4da8d814..7046b674 100644
--- a/cmd2/parsing.py
+++ b/cmd2/parsing.py
@@ -8,7 +8,7 @@ from typing import List, Tuple
from . import constants
-BLANK_LINE = '\n'
+LINE_FEED = '\n'
class Statement(str):
@@ -129,10 +129,35 @@ class StatementParser():
re.DOTALL | re.MULTILINE
)
+ # aliases have to be a word, so make a regular expression
+ # that matches the first word in the line. This regex has two
+ # parts, the first parenthesis enclosed group matches one
+ # or more non-whitespace characters, and the second group
+ # matches either a whitespace character or the end of the
+ # string. We use \A and \Z to ensure we always match the
+ # beginning and end of a string that may have multiple
+ # lines
+ self.command_pattern = re.compile(r'\A(\S+)(\s|\Z)')
+
+
def tokenize(self, line: str) -> List[str]:
- """Tokenize a string into a list"""
+ """Lex a string into a list of tokens.
+
+ Comments are removed, and shortcuts and aliases are expanded.
+ """
+
+ # strip C-style comments
+ # shlex will handle the python/shell style comments for us
+ line = re.sub(self.comment_pattern, self._comment_replacer, line)
+
+ # expand shortcuts and aliases
+ line = self._expand(line)
+
+ # split on whitespace
lexer = shlex.shlex(line, posix=False)
lexer.whitespace_split = True
+
+ # custom lexing
tokens = self._split_on_punctuation(list(lexer))
return tokens
@@ -141,34 +166,19 @@ class StatementParser():
comments, expanding aliases and shortcuts, and extracting output
redirection directives.
"""
- # strip C-style comments
- # shlex will handle the python/shell style comments for us
- # save rawinput for later
- rawinput = re.sub(self.comment_pattern, self._comment_replacer, rawinput)
- # we are going to modify line, so create a copy of the raw input
- line = rawinput
-
- command = None
- args = ''
-
- # expand shortcuts, have to do this first because
- # a shortcut can expand into multiple tokens, ie '!ls' becomes
- # 'shell ls'
- line = self.expand_shortcuts(line)
# handle the special case/hardcoded terminator of a blank line
- # we have to do this before we shlex on whitespace because it
+ # we have to do this before we tokenize because tokenizing
# destroys all unquoted whitespace in the input
terminator = None
- if line[-1:] == BLANK_LINE:
- terminator = BLANK_LINE
+ if rawinput[-1:] == LINE_FEED:
+ terminator = LINE_FEED
- tokens = self.tokenize(line)
+ command = None
+ args = ''
- # expand aliases
- if tokens:
- command_to_expand = tokens[0]
- tokens[0] = self.expand_aliases(command_to_expand)
+ # lex the input into a list of tokens
+ tokens = self.tokenize(rawinput)
# of the valid terminators, find the first one to occur in the input
terminator_pos = len(tokens)+1
@@ -184,7 +194,7 @@ class StatementParser():
pass
if terminator:
- if terminator == BLANK_LINE:
+ if terminator == LINE_FEED:
terminator_pos = len(tokens)+1
else:
terminator_pos = tokens.index(terminator)
@@ -228,7 +238,7 @@ class StatementParser():
try:
# find the first pipe if it exists
pipe_pos = tokens.index('|')
- # set everything after the first pipe to result.pipe_to
+ # save everything after the first pipe
pipe_to = ' '.join(tokens[pipe_pos+1:])
# remove all the tokens after the pipe
tokens = tokens[:pipe_pos]
@@ -252,18 +262,18 @@ class StatementParser():
else:
multiline_command = None
- # build Statement object
- result = Statement(args)
- result.raw = rawinput
- result.command = command
- result.args = args
- result.terminator = terminator
- result.output = output
- result.output_to = output_to
- result.pipe_to = pipe_to
- result.suffix = suffix
- result.multiline_command = multiline_command
- return result
+ # build the statement
+ statement = Statement(args)
+ statement.raw = rawinput
+ statement.command = command
+ statement.args = args
+ statement.terminator = terminator
+ statement.output = output
+ statement.output_to = output_to
+ statement.pipe_to = pipe_to
+ statement.suffix = suffix
+ statement.multiline_command = multiline_command
+ return statement
def parse_command_only(self, rawinput: str) -> Statement:
"""Partially parse input into a Statement object. The command is
@@ -271,41 +281,42 @@ class StatementParser():
Terminators, multiline commands, and output redirection are not
parsed.
"""
- # strip C-style comments
- # shlex will handle the python/shell style comments for us
- # save rawinput for later
- rawinput = re.sub(self.comment_pattern, self._comment_replacer, rawinput)
- # we are going to modify line, so create a copy of the raw input
- line = rawinput
- command = None
- args = ''
-
- # expand shortcuts, have to do this first because
- # a shortcut can expand into multiple tokens, ie '!ls' becomes
- # 'shell ls'
- line = self.expand_shortcuts(line)
-
- # split the input on whitespace
- lexer = shlex.shlex(line, posix=False)
- lexer.whitespace_split = True
- tokens = self._split_on_punctuation(list(lexer))
-
- # expand aliases
- if tokens:
- command_to_expand = tokens[0]
- tokens[0] = self.expand_aliases(command_to_expand)
+ # lex the input into a list of tokens
+ tokens = self.tokenize(rawinput)
+ # parse out the command and everything else
(command, args) = self._command_and_args(tokens)
- # build Statement object
- result = Statement(args)
- result.raw = rawinput
- result.command = command
- result.args = args
- return result
+ # build the statement
+ statement = Statement(args)
+ statement.raw = rawinput
+ statement.command = command
+ statement.args = args
+ return statement
+
+ def _expand(self, line: str) -> str:
+ """Expand shortcuts and aliases"""
- def expand_shortcuts(self, line: str) -> str:
- """Expand shortcuts at the beginning of input."""
+ # expand aliases
+ # make a copy of aliases so we can edit it
+ tmp_aliases = list(self.aliases.keys())
+ keep_expanding = bool(tmp_aliases)
+ while keep_expanding:
+ for cur_alias in tmp_aliases:
+ keep_expanding = False
+ # apply our regex to line
+ match = self.command_pattern.search(line)
+ if match:
+ # we got a match, extract the command
+ command = match.group(1)
+ if command == cur_alias:
+ # rebuild line with the expanded alias
+ line = self.aliases[cur_alias] + match.group(2) + line[match.end(2):]
+ tmp_aliases.remove(cur_alias)
+ keep_expanding = bool(tmp_aliases)
+ break
+
+ # expand shortcuts
for (shortcut, expansion) in self.shortcuts:
if line.startswith(shortcut):
# If the next character after the shortcut isn't a space, then insert one
@@ -318,25 +329,6 @@ class StatementParser():
break
return line
- def expand_aliases(self, command: str) -> str:
- """Given a command, expand any aliases for the command.
-
- If an alias contains shortcuts, the shortcuts will be expanded too.
- """
- # make a copy of aliases so we can edit it
- tmp_aliases = list(self.aliases.keys())
- keep_expanding = bool(tmp_aliases)
-
- while keep_expanding:
- for cur_alias in tmp_aliases:
- keep_expanding = False
- if command == cur_alias:
- command = self.aliases[cur_alias]
- tmp_aliases.remove(cur_alias)
- keep_expanding = bool(tmp_aliases)
- break
- return self.expand_shortcuts(command)
-
@staticmethod
def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
"""given a list of tokens, and return a tuple of the command