From a0c0db15103a54dba20fb309956a7b3cf90bc645 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 00:20:46 -0600 Subject: Fix alias expansion when not followed by whitespace --- cmd2/parsing.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 908e9272..eff29843 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -144,13 +144,26 @@ class StatementParser(): # aliases have to be a word, so make a regular expression # that matches the first word in the line. This regex has two # parts, the first parenthesis enclosed group matches one - # or more non-whitespace characters, and the second group - # matches either a whitespace character or the end of the - # string. We use \A and \Z to ensure we always match the - # beginning and end of a string that may have multiple - # lines - self.command_pattern = re.compile(r'\A(\S+)(\s|\Z)') - + # or more non-whitespace characters with a non-greedy match + # (that's what the '+?' part does). The second group must be + # dynamically created because it needs to match either whitespace, + # something in REDIRECTION_CHARS, one of the terminators, + # or the end of the string. We use \A and \Z to ensure we always + # match the beginning and end of a string that may have multiple + # lines (if it's a multiline command) + second_group_items = [] + second_group_items.extend(constants.REDIRECTION_CHARS) + second_group_items.extend(terminators) + # escape each item so it will for sure get treated as a literal + second_group_items = [re.escape(x) for x in second_group_items] + # add the whitespace and end of string, not escaped because they + # are not literals + second_group_items.extend([r'\s', r'\Z']) + # join them up with a pipe + second_group = '|'.join(second_group_items) + # build the regular expression + expr = r'\A(\S+?)({})'.format(second_group) + self.command_pattern = re.compile(expr) def tokenize(self, line: str) -> List[str]: """Lex a string into a list of tokens. -- cgit v1.2.1 From 0efb62cfc2b80dabcf0e94ad3315e3ea32c02d4f Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 10:07:16 -0600 Subject: Fix bungled merge from master --- cmd2/parsing.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index a1e21175..d7feeb48 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -142,15 +142,21 @@ class StatementParser: ) # aliases have to be a word, so make a regular expression - # that matches the first word in the line. This regex has two - # parts, the first parenthesis enclosed group matches one - # or more non-whitespace characters with a non-greedy match - # (that's what the '+?' part does). The second group must be - # dynamically created because it needs to match either whitespace, - # something in REDIRECTION_CHARS, one of the terminators, - # or the end of the string. We use \A and \Z to ensure we always - # match the beginning and end of a string that may have multiple - # lines (if it's a multiline command) + # that matches the first word in the line. This regex has three + # parts: + # - the '\A\s*' matches the beginning of the string (even + # if contains multiple lines) and gobbles up any leading + # whitespace + # - the first parenthesis enclosed group matches one + # or more non-whitespace characters with a non-greedy match + # (that's what the '+?' part does). The non-greedy match + # ensures that this first group doesn't include anything + # matched by the second group + # - the second parenthesis group must be dynamically created + # because it needs to match either whitespace, something in + # REDIRECTION_CHARS, one of the terminators, or the end of + # the string (\Z matches the end of the string even if it + # contains multiple lines) second_group_items = [] second_group_items.extend(constants.REDIRECTION_CHARS) second_group_items.extend(terminators) @@ -162,7 +168,7 @@ class StatementParser: # join them up with a pipe second_group = '|'.join(second_group_items) # build the regular expression - expr = r'\A(\S+?)({})'.format(second_group) + expr = r'\A\s*(\S+?)({})'.format(second_group) self.command_pattern = re.compile(expr) def tokenize(self, line: str) -> List[str]: -- cgit v1.2.1 From f5f0c90aa44ec658b33da422c4f0dc1cea2e6b98 Mon Sep 17 00:00:00 2001 From: kotfu Date: Mon, 7 May 2018 21:01:56 -0600 Subject: Make alias checking and command parsing use the same regex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a new is_valid_command() method on StatementParser to determine whether a string of characters could be a valid command. That means it can’t include any redirection, quote chars, whitespace, or terminator characters. This method is used when someone tries to create an alias, to ensure when we try and parse the alias that it will actually parse. This nicely encapsulates and standardizes all the logic for parsing and expansion into the StatementParser class. Also fix a bug in the regex to match valid command names, and add a bunch of new unit tests to ensure the bug stays fixed. --- cmd2/parsing.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 11 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index d7feeb48..3a9b390b 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -141,7 +141,7 @@ class StatementParser: re.DOTALL | re.MULTILINE ) - # aliases have to be a word, so make a regular expression + # commands have to be a word, so make a regular expression # that matches the first word in the line. This regex has three # parts: # - the '\A\s*' matches the beginning of the string (even @@ -157,19 +157,51 @@ class StatementParser: # REDIRECTION_CHARS, one of the terminators, or the end of # the string (\Z matches the end of the string even if it # contains multiple lines) - second_group_items = [] - second_group_items.extend(constants.REDIRECTION_CHARS) - second_group_items.extend(terminators) + # + invalid_command_chars = [] + invalid_command_chars.extend(constants.QUOTES) + invalid_command_chars.extend(constants.REDIRECTION_CHARS) + invalid_command_chars.extend(terminators) # escape each item so it will for sure get treated as a literal - second_group_items = [re.escape(x) for x in second_group_items] + second_group_items = [re.escape(x) for x in invalid_command_chars] # add the whitespace and end of string, not escaped because they # are not literals second_group_items.extend([r'\s', r'\Z']) # join them up with a pipe second_group = '|'.join(second_group_items) # build the regular expression - expr = r'\A\s*(\S+?)({})'.format(second_group) - self.command_pattern = re.compile(expr) + expr = r'\A\s*(\S*?)({})'.format(second_group) + self._command_pattern = re.compile(expr) + + def is_valid_command(self, word: str) -> Tuple[bool, str]: + """Determine whether a word is a valid alias. + + Aliases can not include redirection characters, whitespace, + or termination characters. + + If word is not a valid command, return False and a comma + separated string of characters that can not appear in a command. + This string is suitable for inclusion in an error message of your + choice: + + valid, invalidchars = statement_parser.is_valid_command('>') + if not valid: + errmsg = "Aliases can not contain: {}".format(invalidchars) + """ + valid = False + + errmsg = 'whitespace, quotes, ' + errchars = [] + errchars.extend(constants.REDIRECTION_CHARS) + errchars.extend(self.terminators) + errmsg += ', '.join([shlex.quote(x) for x in errchars]) + + match = self._command_pattern.search(word) + if match: + if word == match.group(1): + valid = True + errmsg = None + return valid, errmsg def tokenize(self, line: str) -> List[str]: """Lex a string into a list of tokens. @@ -344,16 +376,24 @@ class StatementParser: command = None args = None - match = self.command_pattern.search(line) + match = self._command_pattern.search(line) if match: # we got a match, extract the command command = match.group(1) - # the command_pattern regex is designed to match the spaces + # the match could be an empty string, if so, turn it into none + if not command: + command = None + # the _command_pattern regex is designed to match the spaces # between command and args with a second match group. Using # the end of the second match group ensures that args has # no leading whitespace. The rstrip() makes sure there is # no trailing whitespace args = line[match.end(2):].rstrip() + # if the command is none that means the input was either empty + # or something wierd like '>'. args should be None if we couldn't + # parse a command + if not command or not args: + args = None # build the statement # string representation of args must be an empty string instead of @@ -375,11 +415,11 @@ class StatementParser: for cur_alias in tmp_aliases: keep_expanding = False # apply our regex to line - match = self.command_pattern.search(line) + match = self._command_pattern.search(line) if match: # we got a match, extract the command command = match.group(1) - if command == cur_alias: + if command and command == cur_alias: # rebuild line with the expanded alias line = self.aliases[cur_alias] + match.group(2) + line[match.end(2):] tmp_aliases.remove(cur_alias) -- cgit v1.2.1