diff options
author | kotfu <jared@kotfu.net> | 2018-05-06 09:27:03 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-05-06 09:27:03 -0600 |
commit | bfdb482341efae823016ae7b1799cad06104309d (patch) | |
tree | d14932e0756fba6e5eca2c1ff0f0457622eadbbc | |
parent | 7f33f04c6f0c1f873dc1adba41bb3cc97d066a46 (diff) | |
parent | 2f102b911f1ec87d58039c9a5beca9b23a6c5474 (diff) | |
download | cmd2-git-bfdb482341efae823016ae7b1799cad06104309d.tar.gz |
Merge pull request #390 from python-cmd2/refactor_parseline
Refactor parseline()
-rwxr-xr-x | cmd2/cmd2.py | 70 | ||||
-rw-r--r-- | cmd2/parsing.py | 64 | ||||
-rw-r--r-- | tests/test_cmd2.py | 18 | ||||
-rw-r--r-- | tests/test_parsing.py | 44 |
4 files changed, 95 insertions, 101 deletions
diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py index ad2038d4..1378f052 100755 --- a/cmd2/cmd2.py +++ b/cmd2/cmd2.py @@ -1577,12 +1577,9 @@ class Cmd(cmd.Cmd): if begidx > 0: # Parse the command line - command, args, expanded_line = self.parseline(line) - - # use these lines instead of the one above - # statement = self.command_parser.parse_command_only(line) - # command = statement.command - # expanded_line = statement.command_and_args + statement = self.statement_parser.parse_command_only(line) + command = statement.command + expanded_line = statement.command_and_args # We overwrote line with a properly formatted but fully stripped version # Restore the end spaces since line is only supposed to be lstripped when @@ -1603,8 +1600,7 @@ class Cmd(cmd.Cmd): tokens, raw_tokens = self.tokens_for_completion(line, begidx, endidx) # Either had a parsing error or are trying to complete the command token - # The latter can happen if default_to_shell is True and parseline() allowed - # assumed something like " or ' was a command. + # The latter can happen if " or ' was entered as the command if tokens is None or len(tokens) == 1: self.completion_matches = [] return None @@ -1924,66 +1920,16 @@ class Cmd(cmd.Cmd): def parseline(self, line): """Parse the line into a command name and a string containing the arguments. - NOTE: This is an override of a parent class method. It is only used by other parent class methods. But - we do need to override it here so that the additional shortcuts present in cmd2 get properly expanded for - purposes of tab completion. + NOTE: This is an override of a parent class method. It is only used by other parent class methods. - Used for command tab completion. Returns a tuple containing (command, args, line). - 'command' and 'args' may be None if the line couldn't be parsed. + Different from the parent class method, this ignores self.identchars. :param line: str - line read by readline :return: (str, str, str) - tuple containing (command, args, line) """ - line = line.strip() - - if not line: - # Deal with empty line or all whitespace line - return None, None, line - - # Make a copy of aliases so we can edit it - tmp_aliases = list(self.aliases.keys()) - keep_expanding = len(tmp_aliases) > 0 - - # Expand aliases - while keep_expanding: - for cur_alias in tmp_aliases: - keep_expanding = False - - if line == cur_alias or line.startswith(cur_alias + ' '): - line = line.replace(cur_alias, self.aliases[cur_alias], 1) - - # Do not expand the same alias more than once - tmp_aliases.remove(cur_alias) - keep_expanding = len(tmp_aliases) > 0 - break - - # Expand command shortcut to its full command name - for (shortcut, expansion) in self.shortcuts: - if line.startswith(shortcut): - # If the next character after the shortcut isn't a space, then insert one - shortcut_len = len(shortcut) - if len(line) == shortcut_len or line[shortcut_len] != ' ': - expansion += ' ' - - # Expand the shortcut - line = line.replace(shortcut, expansion, 1) - break - - i, n = 0, len(line) - - # If we are allowing shell commands, then allow any character in the command - if self.default_to_shell: - while i < n and line[i] != ' ': - i += 1 - - # Otherwise only allow those in identchars - else: - while i < n and line[i] in self.identchars: - i += 1 - - command, arg = line[:i], line[i:].strip() - return command, arg, line + statement = self.statement_parser.parse_command_only(line) + return statement.command, statement.args, statement.command_and_args def onecmd_plus_hooks(self, line): """Top-level function called by cmdloop() to handle parsing a line and running the command and all of its hooks. diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 908e9272..f2c86ea8 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -81,7 +81,7 @@ class Statement(str): return rtn -class StatementParser(): +class StatementParser: """Parse raw text into command components. Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion. @@ -93,7 +93,7 @@ class StatementParser(): multiline_commands=None, aliases=None, shortcuts=None, - ): + ): self.allow_redirection = allow_redirection if terminators is None: self.terminators = [';'] @@ -144,18 +144,19 @@ class StatementParser(): # aliases have to be a word, so make a regular expression # that matches the first word in the line. This regex has two # parts, the first parenthesis enclosed group matches one - # or more non-whitespace characters, and the second group - # matches either a whitespace character or the end of the - # string. We use \A and \Z to ensure we always match the - # beginning and end of a string that may have multiple - # lines - self.command_pattern = re.compile(r'\A(\S+)(\s|\Z)') - + # or more non-whitespace characters (which may be preceeded + # by whitespace) and the second group matches either a whitespace + # character or the end of the string. We use \A and \Z to ensure + # we always match the beginning and end of a string that may have + # multiple lines + self.command_pattern = re.compile(r'\A\s*(\S+)(\s|\Z)+') def tokenize(self, line: str) -> List[str]: """Lex a string into a list of tokens. Comments are removed, and shortcuts and aliases are expanded. + + Raises ValueError if there are unclosed quotation marks. """ # strip C-style comments @@ -177,6 +178,8 @@ class StatementParser(): """Tokenize the input and parse it into a Statement object, stripping comments, expanding aliases and shortcuts, and extracting output redirection directives. + + Raises ValueError if there are unclosed quotation marks. """ # handle the special case/hardcoded terminator of a blank line @@ -297,16 +300,40 @@ class StatementParser(): return statement def parse_command_only(self, rawinput: str) -> Statement: - """Partially parse input into a Statement object. The command is - identified, and shortcuts and aliases are expanded. + """Partially parse input into a Statement object. + + The command is identified, and shortcuts and aliases are expanded. Terminators, multiline commands, and output redirection are not parsed. + + This method is used by tab completion code and therefore must not + generate an exception if there are unclosed quotes. + + The Statement object returned by this method can at most contained + values in the following attributes: + - raw + - command + - args + + Different from parse(), this method does not remove redundant whitespace + within statement.args. It does however, ensure args does not have leading + or trailing whitespace. """ - # lex the input into a list of tokens - tokens = self.tokenize(rawinput) + # expand shortcuts and aliases + line = self._expand(rawinput) - # parse out the command and everything else - (command, args) = self._command_and_args(tokens) + command = None + args = None + match = self.command_pattern.search(line) + if match: + # we got a match, extract the command + command = match.group(1) + # the command_pattern regex is designed to match the spaces + # between command and args with a second match group. Using + # the end of the second match group ensures that args has + # no leading whitespace. The rstrip() makes sure there is + # no trailing whitespace + args = line[match.end(2):].rstrip() # build the statement # string representation of args must be an empty string instead of @@ -315,7 +342,6 @@ class StatementParser(): statement.raw = rawinput statement.command = command statement.args = args - statement.argv = tokens return statement def _expand(self, line: str) -> str: @@ -342,7 +368,7 @@ class StatementParser(): # expand shortcuts for (shortcut, expansion) in self.shortcuts: - if line.startswith(shortcut): + if line.startswith(shortcut): # If the next character after the shortcut isn't a space, then insert one shortcut_len = len(shortcut) if len(line) == shortcut_len or line[shortcut_len] != ' ': @@ -370,7 +396,7 @@ class StatementParser(): if len(tokens) > 1: args = ' '.join(tokens[1:]) - return (command, args) + return command, args @staticmethod def _comment_replacer(match): @@ -387,7 +413,7 @@ class StatementParser(): # as word breaks when they are in unquoted strings. Each run of punctuation # characters is treated as a single token. - :param initial_tokens: the tokens as parsed by shlex + :param tokens: the tokens as parsed by shlex :return: the punctuated tokens """ punctuation = [] diff --git a/tests/test_cmd2.py b/tests/test_cmd2.py index b6416005..0da7e9d5 100644 --- a/tests/test_cmd2.py +++ b/tests/test_cmd2.py @@ -1718,3 +1718,21 @@ def test_ppaged(base_app): base_app.ppaged(msg) out = base_app.stdout.buffer assert out == msg + end + +# we override cmd.parseline() so we always get consistent +# command parsing by parent methods we don't override +# don't need to test all the parsing logic here, because +# parseline just calls StatementParser.parse_command_only() +def test_parseline_empty(base_app): + statement = '' + command, args, line = base_app.parseline(statement) + assert not command + assert not args + assert not line + +def test_parseline(base_app): + statement = " command with 'partially completed quotes " + command, args, line = base_app.parseline(statement) + assert command == 'command' + assert args == "with 'partially completed quotes" + assert line == statement.strip() diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 7940bbd8..19237f6e 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -44,6 +44,10 @@ def test_tokenize(parser, line, tokens): tokens_to_test = parser.tokenize(line) assert tokens_to_test == tokens +def test_tokenize_unclosed_quotes(parser): + with pytest.raises(ValueError): + tokens = parser.tokenize('command with "unclosed quotes') + @pytest.mark.parametrize('tokens,command,args', [ ([], None, None), (['command'], 'command', None), @@ -59,20 +63,20 @@ def test_command_and_args(parser, tokens, command, args): '"one word"', "'one word'", ]) -def test_single_word(parser, line): +def test_parse_single_word(parser, line): statement = parser.parse(line) assert statement.command == line assert not statement.args assert statement.argv == [utils.strip_quotes(line)] -def test_word_plus_terminator(parser): +def test_parse_word_plus_terminator(parser): line = 'termbare;' statement = parser.parse(line) assert statement.command == 'termbare' assert statement.terminator == ';' assert statement.argv == ['termbare'] -def test_suffix_after_terminator(parser): +def test_parse_suffix_after_terminator(parser): line = 'termbare; suffx' statement = parser.parse(line) assert statement.command == 'termbare' @@ -80,14 +84,14 @@ def test_suffix_after_terminator(parser): assert statement.suffix == 'suffx' assert statement.argv == ['termbare'] -def test_command_with_args(parser): +def test_parse_command_with_args(parser): line = 'command with args' statement = parser.parse(line) assert statement.command == 'command' assert statement.args == 'with args' assert statement.argv == ['command', 'with', 'args'] -def test_command_with_quoted_args(parser): +def test_parse_command_with_quoted_args(parser): line = 'command with "quoted args" and "some not"' statement = parser.parse(line) assert statement.command == 'command' @@ -103,20 +107,20 @@ def test_parse_command_with_args_terminator_and_suffix(parser): assert statement.suffix == 'and suffix' assert statement.argv == ['command', 'with', 'args', 'and', 'terminator'] -def test_hashcomment(parser): +def test_parse_hashcomment(parser): statement = parser.parse('hi # this is all a comment') assert statement.command == 'hi' assert not statement.args assert statement.argv == ['hi'] -def test_c_comment(parser): +def test_parse_c_comment(parser): statement = parser.parse('hi /* this is | all a comment */') assert statement.command == 'hi' assert not statement.args assert not statement.pipe_to assert statement.argv == ['hi'] -def test_c_comment_empty(parser): +def test_parse_c_comment_empty(parser): statement = parser.parse('/* this is | all a comment */') assert not statement.command assert not statement.args @@ -130,14 +134,14 @@ def test_parse_what_if_quoted_strings_seem_to_start_comments(parser): assert not statement.pipe_to assert statement.argv == ['what', 'if', 'quoted strings /* seem to ', 'start', 'comments?'] -def test_simple_piped(parser): +def test_parse_simple_piped(parser): statement = parser.parse('simple | piped') assert statement.command == 'simple' assert not statement.args assert statement.argv == ['simple'] assert statement.pipe_to == 'piped' -def test_double_pipe_is_not_a_pipe(parser): +def test_parse_double_pipe_is_not_a_pipe(parser): line = 'double-pipe || is not a pipe' statement = parser.parse(line) assert statement.command == 'double-pipe' @@ -145,7 +149,7 @@ def test_double_pipe_is_not_a_pipe(parser): assert statement.argv == ['double-pipe', '||', 'is', 'not', 'a', 'pipe'] assert not statement.pipe_to -def test_complex_pipe(parser): +def test_parse_complex_pipe(parser): line = 'command with args, terminator;sufx | piped' statement = parser.parse(line) assert statement.command == 'command' @@ -155,7 +159,7 @@ def test_complex_pipe(parser): assert statement.suffix == 'sufx' assert statement.pipe_to == 'piped' -def test_output_redirect(parser): +def test_parse_output_redirect(parser): line = 'output into > afile.txt' statement = parser.parse(line) assert statement.command == 'output' @@ -164,7 +168,7 @@ def test_output_redirect(parser): assert statement.output == '>' assert statement.output_to == 'afile.txt' -def test_output_redirect_with_dash_in_path(parser): +def test_parse_output_redirect_with_dash_in_path(parser): line = 'output into > python-cmd2/afile.txt' statement = parser.parse(line) assert statement.command == 'output' @@ -173,7 +177,7 @@ def test_output_redirect_with_dash_in_path(parser): assert statement.output == '>' assert statement.output_to == 'python-cmd2/afile.txt' -def test_output_redirect_append(parser): +def test_parse_output_redirect_append(parser): line = 'output appended to >> /tmp/afile.txt' statement = parser.parse(line) assert statement.command == 'output' @@ -182,7 +186,7 @@ def test_output_redirect_append(parser): assert statement.output == '>>' assert statement.output_to == '/tmp/afile.txt' -def test_pipe_and_redirect(parser): +def test_parse_pipe_and_redirect(parser): line = 'output into;sufx | pipethrume plz > afile.txt' statement = parser.parse(line) assert statement.command == 'output' @@ -202,7 +206,7 @@ def test_parse_output_to_paste_buffer(parser): assert statement.argv == ['output', 'to', 'paste', 'buffer'] assert statement.output == '>>' -def test_has_redirect_inside_terminator(parser): +def test_parse_redirect_inside_terminator(parser): """The terminator designates the end of the commmand/arguments portion. If a redirector occurs before a terminator, then it will be treated as part of the arguments and not as a redirector.""" line = 'has > inside;' @@ -290,6 +294,10 @@ def test_parse_redirect_to_unicode_filename(parser): assert statement.output == '>' assert statement.output_to == 'café' +def test_parse_unclosed_quotes(parser): + with pytest.raises(ValueError): + tokens = parser.tokenize("command with 'unclosed quotes") + def test_empty_statement_raises_exception(): app = cmd2.Cmd() with pytest.raises(cmd2.EmptyStatement): @@ -325,7 +333,6 @@ def test_parse_command_only_command_and_args(parser): statement = parser.parse_command_only(line) assert statement.command == 'help' assert statement.args == 'history' - assert statement.argv == ['help', 'history'] assert statement.command_and_args == line def test_parse_command_only_emptyline(parser): @@ -345,7 +352,6 @@ def test_parse_command_only_strips_line(parser): statement = parser.parse_command_only(line) assert statement.command == 'help' assert statement.args == 'history' - assert statement.argv == ['help', 'history'] assert statement.command_and_args == line.strip() def test_parse_command_only_expands_alias(parser): @@ -353,14 +359,12 @@ def test_parse_command_only_expands_alias(parser): statement = parser.parse_command_only(line) assert statement.command == 'pyscript' assert statement.args == 'foobar.py' - assert statement.argv == ['pyscript', 'foobar.py'] def test_parse_command_only_expands_shortcuts(parser): line = '!cat foobar.txt' statement = parser.parse_command_only(line) assert statement.command == 'shell' assert statement.args == 'cat foobar.txt' - assert statement.argv == ['shell', 'cat', 'foobar.txt'] assert statement.command_and_args == 'shell cat foobar.txt' def test_parse_command_only_quoted_args(parser): |