diff options
-rw-r--r-- | CHANGELOG.md | 5 | ||||
-rw-r--r-- | cmd2/cmd2.py | 4 | ||||
-rw-r--r-- | cmd2/constants.py | 1 | ||||
-rw-r--r-- | cmd2/parsing.py | 54 | ||||
-rw-r--r-- | tests/test_argparse.py | 4 | ||||
-rw-r--r-- | tests/test_cmd2.py | 4 | ||||
-rw-r--r-- | tests/test_parsing.py | 94 |
7 files changed, 39 insertions, 127 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 1139853f..6b2bb782 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ ``AutoCompleter`` which has since developed a dependency on ``cmd2`` methods. * Removed ability to call commands in ``pyscript`` as if they were functions (e.g ``app.help()``) in favor of only supporting one ``pyscript`` interface. This simplifies future maintenance. + * No longer supporting C-style comments. Hash (#) is the only valid comment marker. + * No longer supporting comments embedded in a command. Only strings where the first non-whitespace character + is a # will be treated as comments. All other # characters will be treated as literals. + * \# this is a comment + * this # is not a comment ## 0.9.10 (February 22, 2019) * Bug Fixes diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py index 24e140fd..08906055 100644 --- a/cmd2/cmd2.py +++ b/cmd2/cmd2.py @@ -160,7 +160,7 @@ def parse_quoted_string(string: str, preserve_quotes: bool) -> List[str]: lexed_arglist = string else: # Use shlex to split the command line into a list of arguments based on shell rules - lexed_arglist = shlex.split(string, posix=False) + lexed_arglist = shlex.split(string, comments=False, posix=False) if not preserve_quotes: lexed_arglist = [utils.strip_quotes(arg) for arg in lexed_arglist] @@ -761,7 +761,7 @@ class Cmd(cmd.Cmd): while True: try: # Use non-POSIX parsing to keep the quotes around the tokens - initial_tokens = shlex.split(tmp_line[:tmp_endidx], posix=False) + initial_tokens = shlex.split(tmp_line[:tmp_endidx], comments=False, posix=False) # If the cursor is at an empty token outside of a quoted string, # then that is the token being completed. Add it to the list. diff --git a/cmd2/constants.py b/cmd2/constants.py index 3c133b70..3e35a542 100644 --- a/cmd2/constants.py +++ b/cmd2/constants.py @@ -12,6 +12,7 @@ REDIRECTION_OUTPUT = '>' REDIRECTION_APPEND = '>>' REDIRECTION_CHARS = [REDIRECTION_PIPE, REDIRECTION_OUTPUT] REDIRECTION_TOKENS = [REDIRECTION_PIPE, REDIRECTION_OUTPUT, REDIRECTION_APPEND] +COMMENT_CHAR = '#' # Regular expression to match ANSI escape codes ANSI_ESCAPE_RE = re.compile(r'\x1b[^m]*m') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index d4f82ac9..bd3a6900 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -236,33 +236,6 @@ class StatementParser: else: self.shortcuts = shortcuts - # this regular expression matches C-style comments and quoted - # strings, i.e. stuff between single or double quote marks - # it's used with _comment_replacer() to strip out the C-style - # comments, while leaving C-style comments that are inside either - # double or single quotes. - # - # this big regular expression can be broken down into 3 regular - # expressions that are OR'ed together with a pipe character - # - # /\*.*\*/ Matches C-style comments (i.e. /* comment */) - # does not match unclosed comments. - # \'(?:\\.|[^\\\'])*\' Matches a single quoted string, allowing - # for embedded backslash escaped single quote - # marks. - # "(?:\\.|[^\\"])*" Matches a double quoted string, allowing - # for embedded backslash escaped double quote - # marks. - # - # by way of reminder the (?:...) regular expression syntax is just - # a non-capturing version of regular parenthesis. We need the non- - # capturing syntax because _comment_replacer() looks at match - # groups - self.comment_pattern = re.compile( - r'/\*.*\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', - re.DOTALL | re.MULTILINE - ) - # commands have to be a word, so make a regular expression # that matches the first word in the line. This regex has three # parts: @@ -315,6 +288,9 @@ class StatementParser: if not word: return False, 'cannot be an empty string' + if word.startswith(constants.COMMENT_CHAR): + return False, 'cannot start with the comment character' + for (shortcut, _) in self.shortcuts: if word.startswith(shortcut): # Build an error string with all shortcuts listed @@ -338,24 +314,23 @@ class StatementParser: def tokenize(self, line: str) -> List[str]: """Lex a string into a list of tokens. - Comments are removed, and shortcuts and aliases are expanded. + shortcuts and aliases are expanded and comments are removed Raises ValueError if there are unclosed quotation marks. """ - # strip C-style comments - # shlex will handle the python/shell style comments for us - line = re.sub(self.comment_pattern, self._comment_replacer, line) - # expand shortcuts and aliases line = self._expand(line) + # check if this line is a comment + if line.strip().startswith(constants.COMMENT_CHAR): + return [] + # split on whitespace - lexer = shlex.shlex(line, posix=False) - lexer.whitespace_split = True + tokens = shlex.split(line, comments=False, posix=False) # custom lexing - tokens = self._split_on_punctuation(list(lexer)) + tokens = self._split_on_punctuation(tokens) return tokens def parse(self, line: str) -> Statement: @@ -610,15 +585,6 @@ class StatementParser: return command, args - @staticmethod - def _comment_replacer(match): - matched_string = match.group(0) - if matched_string.startswith('/'): - # the matched string was a comment, so remove it - return '' - # the matched string was a quoted string, return the match - return matched_string - def _split_on_punctuation(self, tokens: List[str]) -> List[str]: """Further splits tokens from a command line using punctuation characters diff --git a/tests/test_argparse.py b/tests/test_argparse.py index 7db35c71..6b810b44 100644 --- a/tests/test_argparse.py +++ b/tests/test_argparse.py @@ -141,10 +141,6 @@ def test_argparse_with_list_and_empty_doc(argparse_app): out = run_cmd(argparse_app, 'speak -s hello world!') assert out == ['HELLO WORLD!'] -def test_argparse_comment_stripping(argparse_app): - out = run_cmd(argparse_app, 'speak it was /* not */ delicious! # Yuck!') - assert out == ['it was delicious!'] - def test_argparser_correct_args_with_quotes_and_midline_options(argparse_app): out = run_cmd(argparse_app, "speak 'This is a' -s test of the emergency broadcast system!") assert out == ['THIS IS A TEST OF THE EMERGENCY BROADCAST SYSTEM!'] diff --git a/tests/test_cmd2.py b/tests/test_cmd2.py index faef21f9..c8966f57 100644 --- a/tests/test_cmd2.py +++ b/tests/test_cmd2.py @@ -24,8 +24,7 @@ except ImportError: from unittest import mock import cmd2 -from cmd2 import clipboard -from cmd2 import utils +from cmd2 import clipboard, constants, utils from .conftest import run_cmd, normalize, BASE_HELP, BASE_HELP_VERBOSE, \ HELP_HISTORY, SHORTCUTS_TXT, SHOW_TXT, SHOW_LONG @@ -1828,6 +1827,7 @@ def test_poutput_color_never(base_app): # These are invalid names for aliases and macros invalid_command_name = [ '""', # Blank name + constants.COMMENT_CHAR, '!no_shortcut', '">"', '"no>pe"', diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 78adf880..de49d3f5 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -11,7 +11,7 @@ import pytest import cmd2 from cmd2.parsing import StatementParser -from cmd2 import utils +from cmd2 import constants, utils @pytest.fixture def parser(): @@ -70,8 +70,8 @@ def test_parse_empty_string_default(default_parser): @pytest.mark.parametrize('line,tokens', [ ('command', ['command']), - ('command /* with some comment */ arg', ['command', 'arg']), - ('command arg1 arg2 # comment at the end', ['command', 'arg1', 'arg2']), + (constants.COMMENT_CHAR + 'comment', []), + ('not ' + constants.COMMENT_CHAR + ' a comment', ['not', constants.COMMENT_CHAR, 'a', 'comment']), ('termbare ; > /tmp/output', ['termbare', ';', '>', '/tmp/output']), ('termbare; > /tmp/output', ['termbare', ';', '>', '/tmp/output']), ('termbare & > /tmp/output', ['termbare', '&', '>', '/tmp/output']), @@ -84,8 +84,8 @@ def test_tokenize_default(default_parser, line, tokens): @pytest.mark.parametrize('line,tokens', [ ('command', ['command']), - ('command /* with some comment */ arg', ['command', 'arg']), - ('command arg1 arg2 # comment at the end', ['command', 'arg1', 'arg2']), + ('# comment', []), + ('not ' + constants.COMMENT_CHAR + ' a comment', ['not', constants.COMMENT_CHAR, 'a', 'comment']), ('42 arg1 arg2', ['theanswer', 'arg1', 'arg2']), ('l', ['shell', 'ls', '-al']), ('termbare ; > /tmp/output', ['termbare', ';', '>', '/tmp/output']), @@ -193,59 +193,23 @@ def test_parse_command_with_args_terminator_and_suffix(parser): assert statement.terminator == ';' assert statement.suffix == 'and suffix' -def test_parse_hashcomment(parser): - statement = parser.parse('hi # this is all a comment') - assert statement.command == 'hi' - assert statement == '' - assert statement.args == statement - assert statement.argv == ['hi'] - assert not statement.arg_list - -def test_parse_c_comment(parser): - statement = parser.parse('hi /* this is | all a comment */') - assert statement.command == 'hi' - assert statement == '' - assert statement.args == statement - assert statement.argv == ['hi'] - assert not statement.arg_list - assert not statement.pipe_to - -def test_parse_c_comment_empty(parser): - statement = parser.parse('/* this is | all a comment */') +def test_parse_comment(parser): + statement = parser.parse(constants.COMMENT_CHAR + ' this is all a comment') assert statement.command == '' + assert statement == '' assert statement.args == statement - assert not statement.pipe_to assert not statement.argv assert not statement.arg_list - assert statement == '' -def test_parse_c_comment_no_closing(parser): - statement = parser.parse('cat /tmp/*.txt') - assert statement.command == 'cat' - assert statement == '/tmp/*.txt' - assert statement.args == statement - assert not statement.pipe_to - assert statement.argv == ['cat', '/tmp/*.txt'] - assert statement.arg_list == statement.argv[1:] - -def test_parse_c_comment_multiple_opening(parser): - statement = parser.parse('cat /tmp/*.txt /tmp/*.cfg') - assert statement.command == 'cat' - assert statement == '/tmp/*.txt /tmp/*.cfg' +def test_parse_embedded_comment_char(parser): + command_str = 'hi ' + constants.COMMENT_CHAR + ' not a comment' + statement = parser.parse(command_str) + assert statement.command == 'hi' + assert statement == constants.COMMENT_CHAR + ' not a comment' assert statement.args == statement - assert not statement.pipe_to - assert statement.argv == ['cat', '/tmp/*.txt', '/tmp/*.cfg'] + assert statement.argv == command_str.split() assert statement.arg_list == statement.argv[1:] -def test_parse_what_if_quoted_strings_seem_to_start_comments(parser): - statement = parser.parse('what if "quoted strings /* seem to " start comments?') - assert statement.command == 'what' - assert statement == 'if "quoted strings /* seem to " start comments?' - assert statement.args == statement - assert statement.argv == ['what', 'if', 'quoted strings /* seem to ', 'start', 'comments?'] - assert statement.arg_list == ['if', '"quoted strings /* seem to "', 'start', 'comments?'] - assert not statement.pipe_to - @pytest.mark.parametrize('line',[ 'simple | piped', 'simple|piped', @@ -411,30 +375,6 @@ def test_parse_multiline_command_ignores_redirectors_within_it(parser, line, ter assert statement.arg_list == statement.argv[1:] assert statement.terminator == terminator -def test_parse_multiline_with_incomplete_comment(parser): - """A terminator within a comment will be ignored and won't terminate a multiline command. - Un-closed comments effectively comment out everything after the start.""" - line = 'multiline command /* with unclosed comment;' - statement = parser.parse(line) - assert statement.multiline_command == 'multiline' - assert statement.command == 'multiline' - assert statement == 'command /* with unclosed comment' - assert statement.args == statement - assert statement.argv == ['multiline', 'command', '/*', 'with', 'unclosed', 'comment'] - assert statement.arg_list == statement.argv[1:] - assert statement.terminator == ';' - -def test_parse_multiline_with_complete_comment(parser): - line = 'multiline command /* with comment complete */ is done;' - statement = parser.parse(line) - assert statement.multiline_command == 'multiline' - assert statement.command == 'multiline' - assert statement == 'command is done' - assert statement.args == statement - assert statement.argv == ['multiline', 'command', 'is', 'done'] - assert statement.arg_list == statement.argv[1:] - assert statement.terminator == ';' - def test_parse_multiline_terminated_by_empty_line(parser): line = 'multiline command ends\n\n' statement = parser.parse(line) @@ -464,7 +404,7 @@ def test_parse_multiline_with_embedded_newline(parser, line, terminator): assert statement.arg_list == ['command', '"with\nembedded newline"'] assert statement.terminator == terminator -def test_parse_multiline_ignores_terminators_in_comments(parser): +def test_parse_multiline_ignores_terminators_in_quotes(parser): line = 'multiline command "with term; ends" now\n\n' statement = parser.parse(line) assert statement.multiline_command == 'multiline' @@ -762,6 +702,10 @@ def test_is_valid_command_invalid(parser): valid, errmsg = parser.is_valid_command('') assert not valid and 'cannot be an empty string' in errmsg + # Start with the comment character + valid, errmsg = parser.is_valid_command(constants.COMMENT_CHAR) + assert not valid and 'cannot start with the comment character' in errmsg + # Starts with shortcut valid, errmsg = parser.is_valid_command('!ls') assert not valid and 'cannot start with a shortcut' in errmsg |