diff options
author | kotfu <kotfu@kotfu.net> | 2018-04-29 20:05:16 -0600 |
---|---|---|
committer | kotfu <kotfu@kotfu.net> | 2018-04-29 20:05:16 -0600 |
commit | 0140cf9425d2512935e2c53913efc83d383ba79d (patch) | |
tree | e0b53f4ea9ab82af4c8660da7a7273b334bc158b /cmd2/parsing.py | |
parent | 4b903e0cc868a5410691f7c655efbad9d427124f (diff) | |
download | cmd2-git-0140cf9425d2512935e2c53913efc83d383ba79d.tar.gz |
Internal refactoring of parsing and tokenizing code
Diffstat (limited to 'cmd2/parsing.py')
-rw-r--r-- | cmd2/parsing.py | 50 |
1 files changed, 31 insertions, 19 deletions
diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 3c56bcc6..4da8d814 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -10,13 +10,6 @@ from . import constants BLANK_LINE = '\n' -def _comment_replacer(match): - matched_string = match.group(0) - if matched_string.startswith('/'): - # the matched string was a comment, so remove it - return '' - # the matched string was a quoted string, return the match - return matched_string class Statement(str): """String subclass with additional attributes to store the results of parsing. @@ -75,6 +68,7 @@ class Statement(str): """ return '{} {}'.format('' if self.command is None else self.command, self.args).strip() + class StatementParser(): """Parse raw text into command components. @@ -135,16 +129,25 @@ class StatementParser(): re.DOTALL | re.MULTILINE ) + def tokenize(self, line: str) -> List[str]: + """Tokenize a string into a list""" + lexer = shlex.shlex(line, posix=False) + lexer.whitespace_split = True + tokens = self._split_on_punctuation(list(lexer)) + return tokens + def parse(self, rawinput: str) -> Statement: - """Parse input into a Statement object, stripping comments, expanding - aliases and shortcuts, and extracting output redirection directives. + """Tokenize the input and parse it into a Statement object, stripping + comments, expanding aliases and shortcuts, and extracting output + redirection directives. """ # strip C-style comments # shlex will handle the python/shell style comments for us # save rawinput for later - rawinput = re.sub(self.comment_pattern, _comment_replacer, rawinput) + rawinput = re.sub(self.comment_pattern, self._comment_replacer, rawinput) # we are going to modify line, so create a copy of the raw input line = rawinput + command = None args = '' @@ -160,10 +163,7 @@ class StatementParser(): if line[-1:] == BLANK_LINE: terminator = BLANK_LINE - # split the input on whitespace - lexer = shlex.shlex(line, posix=False) - lexer.whitespace_split = True - tokens = self.split_on_punctuation(list(lexer)) + tokens = self.tokenize(line) # expand aliases if tokens: @@ -274,7 +274,7 @@ class StatementParser(): # strip C-style comments # shlex will handle the python/shell style comments for us # save rawinput for later - rawinput = re.sub(self.comment_pattern, _comment_replacer, rawinput) + rawinput = re.sub(self.comment_pattern, self._comment_replacer, rawinput) # we are going to modify line, so create a copy of the raw input line = rawinput command = None @@ -288,7 +288,7 @@ class StatementParser(): # split the input on whitespace lexer = shlex.shlex(line, posix=False) lexer.whitespace_split = True - tokens = self.split_on_punctuation(list(lexer)) + tokens = self._split_on_punctuation(list(lexer)) # expand aliases if tokens: @@ -319,7 +319,10 @@ class StatementParser(): return line def expand_aliases(self, command: str) -> str: - """Given a command, expand any aliases for the command""" + """Given a command, expand any aliases for the command. + + If an alias contains shortcuts, the shortcuts will be expanded too. + """ # make a copy of aliases so we can edit it tmp_aliases = list(self.aliases.keys()) keep_expanding = bool(tmp_aliases) @@ -332,7 +335,7 @@ class StatementParser(): tmp_aliases.remove(cur_alias) keep_expanding = bool(tmp_aliases) break - return command + return self.expand_shortcuts(command) @staticmethod def _command_and_args(tokens: List[str]) -> Tuple[str, str]: @@ -350,7 +353,16 @@ class StatementParser(): return (command, args) - def split_on_punctuation(self, tokens: List[str]) -> List[str]: + @staticmethod + def _comment_replacer(match): + matched_string = match.group(0) + if matched_string.startswith('/'): + # the matched string was a comment, so remove it + return '' + # the matched string was a quoted string, return the match + return matched_string + + def _split_on_punctuation(self, tokens: List[str]) -> List[str]: """ # Further splits tokens from a command line using punctuation characters # as word breaks when they are in unquoted strings. Each run of punctuation |