From 2350ec2a9b137ee3026efff1b1b3537d99cf19f2 Mon Sep 17 00:00:00 2001 From: kotfu Date: Fri, 20 Apr 2018 15:36:15 -0600 Subject: Move CommandParser class into its own file --- cmd2/parsing.py | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 cmd2/parsing.py (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py new file mode 100644 index 00000000..41a3ed0b --- /dev/null +++ b/cmd2/parsing.py @@ -0,0 +1,212 @@ +# +# -*- coding: utf-8 -*- +"""Command parsing classes for cmd2""" + +import re +import shlex + +import cmd2 + +class Command(): + """Store the results of a parsed command.""" + pass + +class CommandParser(): + """Parse raw text into command components.""" + def __init__( + self, + quotes=['"', "'"], + allow_redirection=True, + redirection_chars=['|', '<', '>'], + terminators=[';'], + multilineCommands = [], + ): + self.quotes = quotes + self.allow_redirection = allow_redirection + self.redirection_chars = redirection_chars + self.terminators = terminators + self.multilineCommands = multilineCommands + + def parseString(self, rawinput): + result = Command() + result.raw = rawinput + result.command = None + result.multilineCommand = None + result.args = None + result.terminator = None + result.suffix = None + result.pipeTo = None + result.output = None + result.outputTo = None + + # strip C-style and C++-style comments + # shlex will handle the python/shell style comments for us + def replacer(match): + s = match.group(0) + if s.startswith('/'): + # treat the removed comment as an empty string + return '' + else: + return s + pattern = re.compile( + r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + re.DOTALL | re.MULTILINE + ) + rawinput = re.sub(pattern, replacer, rawinput) + + s = shlex.shlex(rawinput, posix=False) + s.whitespace_split = True + tokens = self.split_on_punctuation(list(s)) + + # of the valid terminators, find the first one to occur in the input + terminator_pos = len(tokens)+1 + terminator = None + for test_terminator in self.terminators: + try: + pos = tokens.index(test_terminator) + if pos < terminator_pos: + terminator_pos = pos + terminator = test_terminator + except ValueError: + # the terminator is not in the tokens + pass + + if terminator: + terminator_pos = tokens.index(terminator) + # everything before the first terminator is the command and the args + (result.command, result.args) = self._command_and_args(tokens[:terminator_pos]) + result.terminator = tokens[terminator_pos] + # we will set the suffix later + # remove all the tokens before and including the terminator + tokens = tokens[terminator_pos+1:] + + # check for input from file + try: + if tokens[0] == '<': + result.inputFrom = ' '.join(tokens[1:]) + tokens = [] + except IndexError: + # no input from file + pass + + # check for output redirect + try: + output_pos = tokens.index('>') + result.output = '>' + result.outputTo = ' '.join(tokens[output_pos+1:]) + # remove all the tokens after the output redirect + tokens = tokens[:output_pos] + except ValueError: + pass + + # check for paste buffer + try: + output_pos = tokens.index('>>') + result.output = '>>' + # remove all tokens after the output redirect + tokens = tokens[:output_pos] + except ValueError: + pass + + # check for pipes + try: + # find the first pipe if it exists + pipe_pos = tokens.index('|') + # set everything after the first pipe to result.pipeTo + result.pipeTo = ' '.join(tokens[pipe_pos+1:]) + # remove all the tokens after the pipe + tokens = tokens[:pipe_pos] + except ValueError: + # no pipe in the tokens + pass + + if result.terminator: + # whatever is left is the suffix + result.suffix = ' '.join(tokens) + else: + # no terminator, so whatever is left is the command and the args + (result.command, result.args) = self._command_and_args(tokens) + + if result.command in self.multilineCommands: + result.multilineCommand = result.command + + return result + + def _command_and_args(self, tokens): + """given a list of tokens, and return a tuple of the command + and the args as a string. + """ + command = None + args = None + + if tokens: + command = tokens[0] + + if len(tokens) > 1: + args = ' '.join(tokens[1:]) + + return (command, args) + + def split_on_punctuation(self, initial_tokens): + """ + # Further splits tokens from a command line using punctuation characters + # as word breaks when they are in unquoted strings. Each run of punctuation + # characters is treated as a single token. + + :param initial_tokens: the tokens as parsed by shlex + :return: the punctuated tokens + """ + punctuation = [] + punctuation.extend(self.terminators) + if self.allow_redirection: + punctuation.extend(self.redirection_chars) + + punctuated_tokens = [] + + for cur_initial_token in initial_tokens: + + # Save tokens up to 1 character in length or quoted tokens. No need to parse these. + if len(cur_initial_token) <= 1 or cur_initial_token[0] in self.quotes: + punctuated_tokens.append(cur_initial_token) + continue + + # Iterate over each character in this token + cur_index = 0 + cur_char = cur_initial_token[cur_index] + + # Keep track of the token we are building + new_token = '' + + while True: + if cur_char not in punctuation: + + # Keep appending to new_token until we hit a punctuation char + while cur_char not in punctuation: + new_token += cur_char + cur_index += 1 + if cur_index < len(cur_initial_token): + cur_char = cur_initial_token[cur_index] + else: + break + + else: + cur_punc = cur_char + + # Keep appending to new_token until we hit something other than cur_punc + while cur_char == cur_punc: + new_token += cur_char + cur_index += 1 + if cur_index < len(cur_initial_token): + cur_char = cur_initial_token[cur_index] + else: + break + + # Save the new token + punctuated_tokens.append(new_token) + new_token = '' + + # Check if we've viewed all characters + if cur_index >= len(cur_initial_token): + break + + return punctuated_tokens -- cgit v1.2.1 From 27b5ab6da0955fda521febada0070d4b53c7e255 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sat, 21 Apr 2018 16:24:45 -0600 Subject: A bit of renaming --- cmd2/parsing.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 41a3ed0b..41ce5743 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -9,7 +9,16 @@ import cmd2 class Command(): """Store the results of a parsed command.""" - pass + def __init__(self, rawinput): + self.raw = rawinput + self.command = None + self.multilineCommand = None + self.args = None + self.terminator = None + self.suffix = None + self.pipeTo = None + self.output = None + self.outputTo = None class CommandParser(): """Parse raw text into command components.""" @@ -28,16 +37,7 @@ class CommandParser(): self.multilineCommands = multilineCommands def parseString(self, rawinput): - result = Command() - result.raw = rawinput - result.command = None - result.multilineCommand = None - result.args = None - result.terminator = None - result.suffix = None - result.pipeTo = None - result.output = None - result.outputTo = None + result = Command(rawinput) # strip C-style and C++-style comments # shlex will handle the python/shell style comments for us -- cgit v1.2.1 From 829c36cb22fd7f3b71548c1b742d40e7609a3aca Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 22 Apr 2018 17:11:11 -0600 Subject: refactor Command() to Statement(str) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Command class renamed to Statement, and is now a subclass of str. - str’s are immutable, and the string needs to contain the arguments, so revise the parseString method --- cmd2/parsing.py | 67 +++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 21 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 41ce5743..164c7735 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -7,10 +7,19 @@ import shlex import cmd2 -class Command(): - """Store the results of a parsed command.""" - def __init__(self, rawinput): - self.raw = rawinput +class Statement(str): + """String subclass with additional attributes to store the results of parsing. + + The cmd module in the standard library passes commands around as a + string. To retain backwards compatibility, cmd2 does the same. However, we + need a place to capture the additional output of the command parsing, so we add + our own attributes to this subclass. + + The string portion of the class contains the arguments, but not the command, nor + the output redirection clauses. + """ + def __init__(self, object): + self.raw = str(object) self.command = None self.multilineCommand = None self.args = None @@ -37,7 +46,7 @@ class CommandParser(): self.multilineCommands = multilineCommands def parseString(self, rawinput): - result = Command(rawinput) + #result = Statement(rawinput) # strip C-style and C++-style comments # shlex will handle the python/shell style comments for us @@ -67,6 +76,7 @@ class CommandParser(): if pos < terminator_pos: terminator_pos = pos terminator = test_terminator + break except ValueError: # the terminator is not in the tokens pass @@ -74,35 +84,37 @@ class CommandParser(): if terminator: terminator_pos = tokens.index(terminator) # everything before the first terminator is the command and the args - (result.command, result.args) = self._command_and_args(tokens[:terminator_pos]) - result.terminator = tokens[terminator_pos] + (command, args) = self._command_and_args(tokens[:terminator_pos]) + #terminator = tokens[terminator_pos] # we will set the suffix later # remove all the tokens before and including the terminator tokens = tokens[terminator_pos+1:] # check for input from file + inputFrom = None try: if tokens[0] == '<': - result.inputFrom = ' '.join(tokens[1:]) + inputFrom = ' '.join(tokens[1:]) tokens = [] except IndexError: - # no input from file pass + # check for output redirect try: output_pos = tokens.index('>') - result.output = '>' - result.outputTo = ' '.join(tokens[output_pos+1:]) + output = '>' + outputTo = ' '.join(tokens[output_pos+1:]) # remove all the tokens after the output redirect tokens = tokens[:output_pos] except ValueError: - pass + output = None + outputTo = None # check for paste buffer try: output_pos = tokens.index('>>') - result.output = '>>' + output = '>>' # remove all tokens after the output redirect tokens = tokens[:output_pos] except ValueError: @@ -113,23 +125,36 @@ class CommandParser(): # find the first pipe if it exists pipe_pos = tokens.index('|') # set everything after the first pipe to result.pipeTo - result.pipeTo = ' '.join(tokens[pipe_pos+1:]) + pipeTo = ' '.join(tokens[pipe_pos+1:]) # remove all the tokens after the pipe tokens = tokens[:pipe_pos] except ValueError: # no pipe in the tokens - pass + pipeTo = None - if result.terminator: + if terminator: # whatever is left is the suffix - result.suffix = ' '.join(tokens) + suffix = ' '.join(tokens) else: # no terminator, so whatever is left is the command and the args - (result.command, result.args) = self._command_and_args(tokens) - - if result.command in self.multilineCommands: - result.multilineCommand = result.command + suffix = None + (command, args) = self._command_and_args(tokens) + if command in self.multilineCommands: + multilineCommand = command + else: + multilineCommand = None + + result = Statement(args) + result.command = command + result.args = args + result.terminator = terminator + result.inputFrom = inputFrom + result.output = output + result.outputTo = outputTo + result.pipeTo = pipeTo + result.suffix = suffix + result.multilineCommand = multilineCommand return result def _command_and_args(self, tokens): -- cgit v1.2.1 From f83154e2749d90bf4ec24c7c84b45dc0860e8b13 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 22 Apr 2018 19:11:29 -0600 Subject: =?UTF-8?q?args=20has=20to=20be=20=E2=80=98=E2=80=99=20not=20None?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd2/parsing.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 164c7735..79d57a32 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -22,7 +22,8 @@ class Statement(str): self.raw = str(object) self.command = None self.multilineCommand = None - self.args = None + # has to be an empty string for compatibility with standard library cmd + self.args = '' self.terminator = None self.suffix = None self.pipeTo = None @@ -58,7 +59,8 @@ class CommandParser(): else: return s pattern = re.compile( - r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + #r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + r'/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE ) rawinput = re.sub(pattern, replacer, rawinput) @@ -146,6 +148,7 @@ class CommandParser(): multilineCommand = None result = Statement(args) + result.raw = rawinput result.command = command result.args = args result.terminator = terminator @@ -162,7 +165,7 @@ class CommandParser(): and the args as a string. """ command = None - args = None + args = '' if tokens: command = tokens[0] -- cgit v1.2.1 From 65bf06a6e9712c87802bf8c319442a8b4cb00e6f Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 22 Apr 2018 21:55:33 -0600 Subject: Updates to comments and todo list --- cmd2/parsing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 79d57a32..5bb8d654 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -47,9 +47,7 @@ class CommandParser(): self.multilineCommands = multilineCommands def parseString(self, rawinput): - #result = Statement(rawinput) - - # strip C-style and C++-style comments + # strip C-style comments # shlex will handle the python/shell style comments for us def replacer(match): s = match.group(0) -- cgit v1.2.1 From 7ac59187ffbc14e2eb14a00866231c8b18e1a087 Mon Sep 17 00:00:00 2001 From: kotfu Date: Mon, 23 Apr 2018 18:34:08 -0600 Subject: Shortcut and alias processing added to CommandParser() --- cmd2/parsing.py | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 5bb8d654..dece2b5e 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -31,7 +31,10 @@ class Statement(str): self.outputTo = None class CommandParser(): - """Parse raw text into command components.""" + """Parse raw text into command components. + + Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion. + """ def __init__( self, quotes=['"', "'"], @@ -39,14 +42,18 @@ class CommandParser(): redirection_chars=['|', '<', '>'], terminators=[';'], multilineCommands = [], + aliases = {}, + shortcuts = [], ): self.quotes = quotes self.allow_redirection = allow_redirection self.redirection_chars = redirection_chars self.terminators = terminators self.multilineCommands = multilineCommands + self.aliases = aliases + self.shortcuts = shortcuts - def parseString(self, rawinput): + def parseString(self, line): # strip C-style comments # shlex will handle the python/shell style comments for us def replacer(match): @@ -61,7 +68,22 @@ class CommandParser(): r'/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE ) - rawinput = re.sub(pattern, replacer, rawinput) + line = re.sub(pattern, replacer, line) + rawinput = line + + # expand shortcuts, have to do this first because + # a shortcut can expand into multiple tokens, ie '!ls' becomes + # 'shell ls' + for (shortcut, expansion) in self.shortcuts: + if rawinput.startswith(shortcut): + # If the next character after the shortcut isn't a space, then insert one + shortcut_len = len(shortcut) + if len(rawinput) == shortcut_len or rawinput[shortcut_len] != ' ': + expansion += ' ' + + # Expand the shortcut + rawinput = rawinput.replace(shortcut, expansion, 1) + break s = shlex.shlex(rawinput, posix=False) s.whitespace_split = True @@ -140,11 +162,27 @@ class CommandParser(): suffix = None (command, args) = self._command_and_args(tokens) + # expand aliases + # make a copy of aliases so we can edit it + tmp_aliases = list(self.aliases.keys()) + keep_expanding = len(tmp_aliases) > 0 + + while keep_expanding: + for cur_alias in tmp_aliases: + keep_expanding = False + if command == cur_alias: + command = self.aliases[cur_alias] + tmp_aliases.remove(cur_alias) + keep_expanding = len(tmp_aliases) > 0 + break + + # set multiline if command in self.multilineCommands: multilineCommand = command else: multilineCommand = None + # build Statement object result = Statement(args) result.raw = rawinput result.command = command -- cgit v1.2.1 From f47568f8dfdf0a9c909c266b8de3233d1ae8a4fa Mon Sep 17 00:00:00 2001 From: kotfu Date: Mon, 23 Apr 2018 18:40:14 -0600 Subject: Shortcuts and aliases fully implemented. --- cmd2/parsing.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index dece2b5e..c8110667 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -53,7 +53,7 @@ class CommandParser(): self.aliases = aliases self.shortcuts = shortcuts - def parseString(self, line): + def parseString(self, rawinput): # strip C-style comments # shlex will handle the python/shell style comments for us def replacer(match): @@ -68,24 +68,24 @@ class CommandParser(): r'/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE ) - line = re.sub(pattern, replacer, line) - rawinput = line + rawinput = re.sub(pattern, replacer, rawinput) + line = rawinput # expand shortcuts, have to do this first because # a shortcut can expand into multiple tokens, ie '!ls' becomes # 'shell ls' for (shortcut, expansion) in self.shortcuts: - if rawinput.startswith(shortcut): + if line.startswith(shortcut): # If the next character after the shortcut isn't a space, then insert one shortcut_len = len(shortcut) - if len(rawinput) == shortcut_len or rawinput[shortcut_len] != ' ': + if len(line) == shortcut_len or line[shortcut_len] != ' ': expansion += ' ' # Expand the shortcut - rawinput = rawinput.replace(shortcut, expansion, 1) + line = line.replace(shortcut, expansion, 1) break - s = shlex.shlex(rawinput, posix=False) + s = shlex.shlex(line, posix=False) s.whitespace_split = True tokens = self.split_on_punctuation(list(s)) -- cgit v1.2.1 From b3d71457e951d9d382787cb82fdf77f32951337c Mon Sep 17 00:00:00 2001 From: kotfu Date: Mon, 23 Apr 2018 20:16:55 -0600 Subject: Fix parsing of input redirection and appending output --- cmd2/parsing.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index c8110667..ec8e2e84 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -115,14 +115,16 @@ class CommandParser(): # check for input from file inputFrom = None try: - if tokens[0] == '<': - inputFrom = ' '.join(tokens[1:]) - tokens = [] - except IndexError: + input_pos = tokens.index('<') + inputFrom = ' '.join(tokens[input_pos+1:]) + tokens = tokens[:input_pos] + except ValueError: pass # check for output redirect + output = None + outputTo = None try: output_pos = tokens.index('>') output = '>' @@ -130,13 +132,12 @@ class CommandParser(): # remove all the tokens after the output redirect tokens = tokens[:output_pos] except ValueError: - output = None - outputTo = None + pass - # check for paste buffer try: output_pos = tokens.index('>>') output = '>>' + outputTo = ' '.join(tokens[output_pos+1:]) # remove all tokens after the output redirect tokens = tokens[:output_pos] except ValueError: -- cgit v1.2.1 From 7f7adaf2fa211e877987aef075affe2a7082dbc5 Mon Sep 17 00:00:00 2001 From: kotfu Date: Mon, 23 Apr 2018 20:41:04 -0600 Subject: More work on multiline --- cmd2/parsing.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index ec8e2e84..f4f9a6a3 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -180,6 +180,10 @@ class CommandParser(): # set multiline if command in self.multilineCommands: multilineCommand = command + # return no arguments if this is a "partial" command, + # i.e. we have a multiline command but no terminator yet + if not terminator: + args = '' else: multilineCommand = None -- cgit v1.2.1 From 4411d8d68c57e8cfca323b80369a8d3c5f11c9d4 Mon Sep 17 00:00:00 2001 From: kotfu Date: Mon, 23 Apr 2018 21:13:33 -0600 Subject: Multiline support mostly done --- cmd2/parsing.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index f4f9a6a3..2c01fb70 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -7,6 +7,8 @@ import shlex import cmd2 +BLANK_LINE = '\n\n' + class Statement(str): """String subclass with additional attributes to store the results of parsing. @@ -85,13 +87,19 @@ class CommandParser(): line = line.replace(shortcut, expansion, 1) break + # handle the special case/hardcoded terminator of a blank line + # we have to do this before we shlex on whitespace because it + # destroys all unquoted whitespace in the input + terminator = None + if line[-2:] == BLANK_LINE: + terminator = BLANK_LINE + s = shlex.shlex(line, posix=False) s.whitespace_split = True tokens = self.split_on_punctuation(list(s)) # of the valid terminators, find the first one to occur in the input terminator_pos = len(tokens)+1 - terminator = None for test_terminator in self.terminators: try: pos = tokens.index(test_terminator) @@ -104,7 +112,10 @@ class CommandParser(): pass if terminator: - terminator_pos = tokens.index(terminator) + if terminator == BLANK_LINE: + terminator_pos = len(tokens)+1 + else: + terminator_pos = tokens.index(terminator) # everything before the first terminator is the command and the args (command, args) = self._command_and_args(tokens[:terminator_pos]) #terminator = tokens[terminator_pos] -- cgit v1.2.1 From 3441d1e5abcd95ff9100905a9184400d590c600d Mon Sep 17 00:00:00 2001 From: kotfu Date: Mon, 23 Apr 2018 22:55:24 -0600 Subject: Multiline now working --- cmd2/parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 2c01fb70..ffeb8bbe 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -7,7 +7,7 @@ import shlex import cmd2 -BLANK_LINE = '\n\n' +BLANK_LINE = '\n' class Statement(str): """String subclass with additional attributes to store the results of parsing. @@ -91,7 +91,7 @@ class CommandParser(): # we have to do this before we shlex on whitespace because it # destroys all unquoted whitespace in the input terminator = None - if line[-2:] == BLANK_LINE: + if line[-1:] == BLANK_LINE: terminator = BLANK_LINE s = shlex.shlex(line, posix=False) -- cgit v1.2.1 From 8297d4d1c0a4f56c6c952059fb7fc2b43b1050ed Mon Sep 17 00:00:00 2001 From: kotfu Date: Tue, 24 Apr 2018 21:15:54 -0600 Subject: Refactoring and code cleanup - rename CommandParser to StatementParser - move tests from test_shlexparsing.py to test_parsing.py - standardize the output of the parse() method into a variable called statement. --- cmd2/parsing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index ffeb8bbe..9204305b 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -1,6 +1,6 @@ # # -*- coding: utf-8 -*- -"""Command parsing classes for cmd2""" +"""Statement parsing classes for cmd2""" import re import shlex @@ -32,7 +32,7 @@ class Statement(str): self.output = None self.outputTo = None -class CommandParser(): +class StatementParser(): """Parse raw text into command components. Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion. @@ -55,7 +55,7 @@ class CommandParser(): self.aliases = aliases self.shortcuts = shortcuts - def parseString(self, rawinput): + def parse(self, rawinput): # strip C-style comments # shlex will handle the python/shell style comments for us def replacer(match): -- cgit v1.2.1 From 1862ac84672755e1ae0c7adf4bc2933c29021af8 Mon Sep 17 00:00:00 2001 From: kotfu Date: Tue, 24 Apr 2018 22:40:41 -0600 Subject: Add type hinting --- cmd2/parsing.py | 71 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 28 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 9204305b..9030a5f8 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -4,6 +4,7 @@ import re import shlex +from typing import List, Tuple import cmd2 @@ -55,7 +56,7 @@ class StatementParser(): self.aliases = aliases self.shortcuts = shortcuts - def parse(self, rawinput): + def parse(self, rawinput: str) -> Statement: # strip C-style comments # shlex will handle the python/shell style comments for us def replacer(match): @@ -67,12 +68,15 @@ class StatementParser(): return s pattern = re.compile( #r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', - r'/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + r'/\*.*?(\*/|$)|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE ) rawinput = re.sub(pattern, replacer, rawinput) line = rawinput + command = None + args = '' + # expand shortcuts, have to do this first because # a shortcut can expand into multiple tokens, ie '!ls' becomes # 'shell ls' @@ -97,7 +101,11 @@ class StatementParser(): s = shlex.shlex(line, posix=False) s.whitespace_split = True tokens = self.split_on_punctuation(list(s)) - + + if tokens: + command_to_expand = tokens[0] + tokens[0] = self.expand_aliases(command_to_expand) + # of the valid terminators, find the first one to occur in the input terminator_pos = len(tokens)+1 for test_terminator in self.terminators: @@ -118,10 +126,18 @@ class StatementParser(): terminator_pos = tokens.index(terminator) # everything before the first terminator is the command and the args (command, args) = self._command_and_args(tokens[:terminator_pos]) - #terminator = tokens[terminator_pos] # we will set the suffix later # remove all the tokens before and including the terminator tokens = tokens[terminator_pos+1:] + else: + (testcommand, testargs) = self._command_and_args(tokens) + if testcommand in self.multilineCommands: + # no terminator on this line but we have a multiline command + # everything else on the line is part of the args + # because redirectors can only be after a terminator + command = testcommand + args = testargs + tokens = [] # check for input from file inputFrom = None @@ -132,7 +148,6 @@ class StatementParser(): except ValueError: pass - # check for output redirect output = None outputTo = None @@ -172,29 +187,13 @@ class StatementParser(): else: # no terminator, so whatever is left is the command and the args suffix = None - (command, args) = self._command_and_args(tokens) - - # expand aliases - # make a copy of aliases so we can edit it - tmp_aliases = list(self.aliases.keys()) - keep_expanding = len(tmp_aliases) > 0 - - while keep_expanding: - for cur_alias in tmp_aliases: - keep_expanding = False - if command == cur_alias: - command = self.aliases[cur_alias] - tmp_aliases.remove(cur_alias) - keep_expanding = len(tmp_aliases) > 0 - break + if not command: + # command could already have been set, if so, don't set it again + (command, args) = self._command_and_args(tokens) # set multiline if command in self.multilineCommands: multilineCommand = command - # return no arguments if this is a "partial" command, - # i.e. we have a multiline command but no terminator yet - if not terminator: - args = '' else: multilineCommand = None @@ -211,8 +210,24 @@ class StatementParser(): result.suffix = suffix result.multilineCommand = multilineCommand return result - - def _command_and_args(self, tokens): + + def expand_aliases(self, command: str) -> str: + """Given a command, expand any aliases for the command""" + # make a copy of aliases so we can edit it + tmp_aliases = list(self.aliases.keys()) + keep_expanding = len(tmp_aliases) > 0 + + while keep_expanding: + for cur_alias in tmp_aliases: + keep_expanding = False + if command == cur_alias: + command = self.aliases[cur_alias] + tmp_aliases.remove(cur_alias) + keep_expanding = len(tmp_aliases) > 0 + break + return command + + def _command_and_args(self, tokens: List[str]) -> Tuple[str, str]: """given a list of tokens, and return a tuple of the command and the args as a string. """ @@ -227,7 +242,7 @@ class StatementParser(): return (command, args) - def split_on_punctuation(self, initial_tokens): + def split_on_punctuation(self, tokens: List[str]) -> List[str]: """ # Further splits tokens from a command line using punctuation characters # as word breaks when they are in unquoted strings. Each run of punctuation @@ -243,7 +258,7 @@ class StatementParser(): punctuated_tokens = [] - for cur_initial_token in initial_tokens: + for cur_initial_token in tokens: # Save tokens up to 1 character in length or quoted tokens. No need to parse these. if len(cur_initial_token) <= 1 or cur_initial_token[0] in self.quotes: -- cgit v1.2.1 From 739d3f42715e59b61432cd7fbedacae4a4f80a16 Mon Sep 17 00:00:00 2001 From: kotfu Date: Thu, 26 Apr 2018 20:21:52 -0600 Subject: First stage of refactoring cmd2.parseline() for tab completion --- cmd2/parsing.py | 104 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 80 insertions(+), 24 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 9030a5f8..45715b32 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -10,6 +10,14 @@ import cmd2 BLANK_LINE = '\n' +def _comment_replacer(match): + s = match.group(0) + if s.startswith('/'): + # treat the removed comment as an empty string + return '' + else: + return s + class Statement(str): """String subclass with additional attributes to store the results of parsing. @@ -32,6 +40,11 @@ class Statement(str): self.pipeTo = None self.output = None self.outputTo = None + + @property + def command_and_args(self): + """Combine command and args with a space separating them""" + return '{} {}'.format('' if self.command is None else self.command, self.args).strip() class StatementParser(): """Parse raw text into command components. @@ -56,40 +69,28 @@ class StatementParser(): self.aliases = aliases self.shortcuts = shortcuts - def parse(self, rawinput: str) -> Statement: - # strip C-style comments - # shlex will handle the python/shell style comments for us - def replacer(match): - s = match.group(0) - if s.startswith('/'): - # treat the removed comment as an empty string - return '' - else: - return s - pattern = re.compile( - #r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + self.comment_pattern = re.compile( r'/\*.*?(\*/|$)|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE ) - rawinput = re.sub(pattern, replacer, rawinput) - line = rawinput + def parse(self, rawinput: str) -> Statement: + """Parse input into a Statement object, stripping comments, expanding + aliases and shortcuts, and extracting output redirection directives. + """ + # strip C-style comments + # shlex will handle the python/shell style comments for us + # save rawinput for later + rawinput = re.sub(self.comment_pattern, _comment_replacer, rawinput) + # we are going to modify line, so create a copy of the raw input + line = rawinput command = None args = '' # expand shortcuts, have to do this first because # a shortcut can expand into multiple tokens, ie '!ls' becomes # 'shell ls' - for (shortcut, expansion) in self.shortcuts: - if line.startswith(shortcut): - # If the next character after the shortcut isn't a space, then insert one - shortcut_len = len(shortcut) - if len(line) == shortcut_len or line[shortcut_len] != ' ': - expansion += ' ' - - # Expand the shortcut - line = line.replace(shortcut, expansion, 1) - break + line = self.expand_shortcuts(line) # handle the special case/hardcoded terminator of a blank line # we have to do this before we shlex on whitespace because it @@ -98,10 +99,12 @@ class StatementParser(): if line[-1:] == BLANK_LINE: terminator = BLANK_LINE + # split the input on whitespace s = shlex.shlex(line, posix=False) s.whitespace_split = True tokens = self.split_on_punctuation(list(s)) + # expand aliases if tokens: command_to_expand = tokens[0] tokens[0] = self.expand_aliases(command_to_expand) @@ -211,6 +214,59 @@ class StatementParser(): result.multilineCommand = multilineCommand return result + def parse_command_only(self, rawinput: str) -> Statement: + """Partially parse input into a Statement object. The command is + identified, and shortcuts and aliases are expanded. + Terminators, multiline commands, and output redirection are not + parsed. + """ + # strip C-style comments + # shlex will handle the python/shell style comments for us + # save rawinput for later + rawinput = re.sub(self.comment_pattern, _comment_replacer, rawinput) + # we are going to modify line, so create a copy of the raw input + line = rawinput + command = None + args = '' + + # expand shortcuts, have to do this first because + # a shortcut can expand into multiple tokens, ie '!ls' becomes + # 'shell ls' + line = self.expand_shortcuts(line) + + # split the input on whitespace + s = shlex.shlex(line, posix=False) + s.whitespace_split = True + tokens = self.split_on_punctuation(list(s)) + + # expand aliases + if tokens: + command_to_expand = tokens[0] + tokens[0] = self.expand_aliases(command_to_expand) + + (command, args) = self._command_and_args(tokens) + + # build Statement object + result = Statement(args) + result.raw = rawinput + result.command = command + result.args = args + return result + + def expand_shortcuts(self, line: str) -> str: + """Expand shortcuts at the beginning of input.""" + for (shortcut, expansion) in self.shortcuts: + if line.startswith(shortcut): + # If the next character after the shortcut isn't a space, then insert one + shortcut_len = len(shortcut) + if len(line) == shortcut_len or line[shortcut_len] != ' ': + expansion += ' ' + + # Expand the shortcut + line = line.replace(shortcut, expansion, 1) + break + return line + def expand_aliases(self, command: str) -> str: """Given a command, expand any aliases for the command""" # make a copy of aliases so we can edit it -- cgit v1.2.1 From 16bf37bf0141446f46dfce2d1ba2b9ed0de7ec44 Mon Sep 17 00:00:00 2001 From: kotfu Date: Fri, 27 Apr 2018 07:52:14 -0600 Subject: Add description of comment-matching regex --- cmd2/parsing.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 45715b32..b6c58db7 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -69,6 +69,30 @@ class StatementParser(): self.aliases = aliases self.shortcuts = shortcuts + # this regular expression matches C-style comments and quoted + # strings, i.e. stuff between single or double quote marks + # it's used with _comment_replacer() to strip out the C-style + # comments, while leaving C-style comments that are inside either + # double or single quotes. + # + # this big regular expression can be broken down into 3 regular + # expressions that are OR'ed together. + # + # /\*.*?(\*/|$) matches C-style comments, with an optional + # closing '*/'. The optional closing '*/' is + # there to retain backward compatibility with + # the pyparsing implementation of cmd2 < 0.9.0 + # \'(?:\\.|[^\\\'])*\' matches a single quoted string, allowing + # for embedded backslash escaped single quote + # marks + # "(?:\\.|[^\\"])*" matches a double quoted string, allowing + # for embedded backslash escaped double quote + # marks + # + # by way of reminder the (?:...) regular expression syntax is just + # a non-capturing version of regular parenthesis. We need the non- + # capturing syntax because _comment_replacer() looks at match + # groups self.comment_pattern = re.compile( r'/\*.*?(\*/|$)|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE -- cgit v1.2.1 From 5c14b3845f6a872e3e5b236f8caab6b4f3472f8f Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 09:38:15 -0600 Subject: Cleanup requested changes in pull request --- cmd2/parsing.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index b6c58db7..22b558b3 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -6,8 +6,6 @@ import re import shlex from typing import List, Tuple -import cmd2 - BLANK_LINE = '\n' def _comment_replacer(match): @@ -20,7 +18,7 @@ def _comment_replacer(match): class Statement(str): """String subclass with additional attributes to store the results of parsing. - + The cmd module in the standard library passes commands around as a string. To retain backwards compatibility, cmd2 does the same. However, we need a place to capture the additional output of the command parsing, so we add @@ -29,8 +27,9 @@ class Statement(str): The string portion of the class contains the arguments, but not the command, nor the output redirection clauses. """ - def __init__(self, object): - self.raw = str(object) + def __init__(self, obj): + super().__init__() + self.raw = str(obj) self.command = None self.multilineCommand = None # has to be an empty string for compatibility with standard library cmd @@ -40,7 +39,7 @@ class Statement(str): self.pipeTo = None self.output = None self.outputTo = None - + @property def command_and_args(self): """Combine command and args with a space separating them""" @@ -48,7 +47,7 @@ class Statement(str): class StatementParser(): """Parse raw text into command components. - + Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion. """ def __init__( @@ -207,7 +206,7 @@ class StatementParser(): except ValueError: # no pipe in the tokens pipeTo = None - + if terminator: # whatever is left is the suffix suffix = ' '.join(tokens) -- cgit v1.2.1 From fd3512a80600a3262030d55ed03f4930757395f6 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 09:52:10 -0600 Subject: Move quotes and redirection_chars from arguments to constants Since the tab completion code relies on these same constants, if we allow them to be passed to the statement parser, we could have a situation where the statement parser and tab completion return different results. --- cmd2/parsing.py | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 22b558b3..b4f0e9c1 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -6,15 +6,17 @@ import re import shlex from typing import List, Tuple +from . import constants + BLANK_LINE = '\n' def _comment_replacer(match): - s = match.group(0) - if s.startswith('/'): - # treat the removed comment as an empty string + matched_string = match.group(0) + if matched_string.startswith('/'): + # the matched string was a comment, so remove it return '' - else: - return s + # the matched string was a quoted string, return the match + return matched_string class Statement(str): """String subclass with additional attributes to store the results of parsing. @@ -52,21 +54,29 @@ class StatementParser(): """ def __init__( self, - quotes=['"', "'"], allow_redirection=True, - redirection_chars=['|', '<', '>'], - terminators=[';'], - multilineCommands = [], - aliases = {}, + terminators=None, + multilineCommands = None, + aliases = None, shortcuts = [], ): - self.quotes = quotes self.allow_redirection = allow_redirection - self.redirection_chars = redirection_chars - self.terminators = terminators - self.multilineCommands = multilineCommands - self.aliases = aliases - self.shortcuts = shortcuts + if terminators is None: + self.terminators = [';'] + else: + self.terminators = terminators + if multilineCommands is None: + self.multilineCommands = [] + else: + self.multilineCommands = multilineCommands + if aliases is None: + self.aliases = {} + else: + self.aliases = aliases + if shortcuts is None: + self.shortcuts = [] + else: + self.shortcuts = shortcuts # this regular expression matches C-style comments and quoted # strings, i.e. stuff between single or double quote marks @@ -333,14 +343,14 @@ class StatementParser(): punctuation = [] punctuation.extend(self.terminators) if self.allow_redirection: - punctuation.extend(self.redirection_chars) + punctuation.extend(constants.REDIRECTION_CHARS) punctuated_tokens = [] for cur_initial_token in tokens: # Save tokens up to 1 character in length or quoted tokens. No need to parse these. - if len(cur_initial_token) <= 1 or cur_initial_token[0] in self.quotes: + if len(cur_initial_token) <= 1 or cur_initial_token[0] in constants.QUOTES: punctuated_tokens.append(cur_initial_token) continue -- cgit v1.2.1 From 85c63d41286e62460b3b80c465349a5a0476266c Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 09:59:13 -0600 Subject: multilineCommand -> multiline_command --- cmd2/parsing.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index b4f0e9c1..03937856 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -33,7 +33,7 @@ class Statement(str): super().__init__() self.raw = str(obj) self.command = None - self.multilineCommand = None + self.multiline_command = None # has to be an empty string for compatibility with standard library cmd self.args = '' self.terminator = None @@ -56,7 +56,7 @@ class StatementParser(): self, allow_redirection=True, terminators=None, - multilineCommands = None, + multiline_commands = None, aliases = None, shortcuts = [], ): @@ -65,10 +65,10 @@ class StatementParser(): self.terminators = [';'] else: self.terminators = terminators - if multilineCommands is None: + if multiline_commands is None: self.multilineCommands = [] else: - self.multilineCommands = multilineCommands + self.multiline_commands = multiline_commands if aliases is None: self.aliases = {} else: @@ -167,7 +167,7 @@ class StatementParser(): tokens = tokens[terminator_pos+1:] else: (testcommand, testargs) = self._command_and_args(tokens) - if testcommand in self.multilineCommands: + if testcommand in self.multiline_commands: # no terminator on this line but we have a multiline command # everything else on the line is part of the args # because redirectors can only be after a terminator @@ -228,10 +228,10 @@ class StatementParser(): (command, args) = self._command_and_args(tokens) # set multiline - if command in self.multilineCommands: - multilineCommand = command + if command in self.multiline_commands: + multiline_command = command else: - multilineCommand = None + multiline_command = None # build Statement object result = Statement(args) @@ -244,7 +244,7 @@ class StatementParser(): result.outputTo = outputTo result.pipeTo = pipeTo result.suffix = suffix - result.multilineCommand = multilineCommand + result.multiline_command = multiline_command return result def parse_command_only(self, rawinput: str) -> Statement: -- cgit v1.2.1 From 975818feb4f24c25e84b3586cfe68230f1ac84f5 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 10:00:54 -0600 Subject: _command_and_args switched to static method --- cmd2/parsing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 03937856..63766a8c 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -316,7 +316,8 @@ class StatementParser(): break return command - def _command_and_args(self, tokens: List[str]) -> Tuple[str, str]: + @staticmethod + def _command_and_args(tokens: List[str]) -> Tuple[str, str]: """given a list of tokens, and return a tuple of the command and the args as a string. """ -- cgit v1.2.1 From fbc6d0b39fa1e84ea3de3b38b700c45189146429 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 12:43:11 -0600 Subject: pipeTo -> pipe_to --- cmd2/parsing.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 63766a8c..9b548716 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -38,7 +38,7 @@ class Statement(str): self.args = '' self.terminator = None self.suffix = None - self.pipeTo = None + self.pipe_to = None self.output = None self.outputTo = None @@ -56,9 +56,9 @@ class StatementParser(): self, allow_redirection=True, terminators=None, - multiline_commands = None, - aliases = None, - shortcuts = [], + multiline_commands=None, + aliases=None, + shortcuts=[], ): self.allow_redirection = allow_redirection if terminators is None: @@ -209,13 +209,13 @@ class StatementParser(): try: # find the first pipe if it exists pipe_pos = tokens.index('|') - # set everything after the first pipe to result.pipeTo - pipeTo = ' '.join(tokens[pipe_pos+1:]) + # set everything after the first pipe to result.pipe_to + pipe_to = ' '.join(tokens[pipe_pos+1:]) # remove all the tokens after the pipe tokens = tokens[:pipe_pos] except ValueError: # no pipe in the tokens - pipeTo = None + pipe_to = None if terminator: # whatever is left is the suffix @@ -242,7 +242,7 @@ class StatementParser(): result.inputFrom = inputFrom result.output = output result.outputTo = outputTo - result.pipeTo = pipeTo + result.pipe_to = pipe_to result.suffix = suffix result.multiline_command = multiline_command return result -- cgit v1.2.1 From 83fd707dcf9916c9f483e4417a2c3b2b083c8da2 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 12:44:38 -0600 Subject: outputTo -> output_to --- cmd2/parsing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 9b548716..0ba0736d 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -40,7 +40,7 @@ class Statement(str): self.suffix = None self.pipe_to = None self.output = None - self.outputTo = None + self.output_to = None @property def command_and_args(self): @@ -186,11 +186,11 @@ class StatementParser(): # check for output redirect output = None - outputTo = None + output_to = None try: output_pos = tokens.index('>') output = '>' - outputTo = ' '.join(tokens[output_pos+1:]) + output_to = ' '.join(tokens[output_pos+1:]) # remove all the tokens after the output redirect tokens = tokens[:output_pos] except ValueError: @@ -199,7 +199,7 @@ class StatementParser(): try: output_pos = tokens.index('>>') output = '>>' - outputTo = ' '.join(tokens[output_pos+1:]) + output_to = ' '.join(tokens[output_pos+1:]) # remove all tokens after the output redirect tokens = tokens[:output_pos] except ValueError: @@ -241,7 +241,7 @@ class StatementParser(): result.terminator = terminator result.inputFrom = inputFrom result.output = output - result.outputTo = outputTo + result.output_to = output_to result.pipe_to = pipe_to result.suffix = suffix result.multiline_command = multiline_command -- cgit v1.2.1 From 7b2d8a23b978f408cc1fe949e23c0aae97ed54a3 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 15:29:54 -0600 Subject: multilineCommands -> multiline_commands --- cmd2/parsing.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 0ba0736d..75bbd1c4 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -66,7 +66,7 @@ class StatementParser(): else: self.terminators = terminators if multiline_commands is None: - self.multilineCommands = [] + self.multiline_commands = [] else: self.multiline_commands = multiline_commands if aliases is None: @@ -133,9 +133,9 @@ class StatementParser(): terminator = BLANK_LINE # split the input on whitespace - s = shlex.shlex(line, posix=False) - s.whitespace_split = True - tokens = self.split_on_punctuation(list(s)) + lexer = shlex.shlex(line, posix=False) + lexer.whitespace_split = True + tokens = self.split_on_punctuation(list(lexer)) # expand aliases if tokens: @@ -268,9 +268,9 @@ class StatementParser(): line = self.expand_shortcuts(line) # split the input on whitespace - s = shlex.shlex(line, posix=False) - s.whitespace_split = True - tokens = self.split_on_punctuation(list(s)) + lexer = shlex.shlex(line, posix=False) + lexer.whitespace_split = True + tokens = self.split_on_punctuation(list(lexer)) # expand aliases if tokens: @@ -304,7 +304,7 @@ class StatementParser(): """Given a command, expand any aliases for the command""" # make a copy of aliases so we can edit it tmp_aliases = list(self.aliases.keys()) - keep_expanding = len(tmp_aliases) > 0 + keep_expanding = bool(tmp_aliases) while keep_expanding: for cur_alias in tmp_aliases: @@ -312,7 +312,7 @@ class StatementParser(): if command == cur_alias: command = self.aliases[cur_alias] tmp_aliases.remove(cur_alias) - keep_expanding = len(tmp_aliases) > 0 + keep_expanding = bool(tmp_aliases) break return command -- cgit v1.2.1 From 1297be25d68b0c8e511b819c906f0d5e0a942606 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 16:09:54 -0600 Subject: Really get rid of the inputFrom stuff, including documentation --- cmd2/parsing.py | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 75bbd1c4..a6e67096 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -175,15 +175,6 @@ class StatementParser(): args = testargs tokens = [] - # check for input from file - inputFrom = None - try: - input_pos = tokens.index('<') - inputFrom = ' '.join(tokens[input_pos+1:]) - tokens = tokens[:input_pos] - except ValueError: - pass - # check for output redirect output = None output_to = None @@ -239,7 +230,6 @@ class StatementParser(): result.command = command result.args = args result.terminator = terminator - result.inputFrom = inputFrom result.output = output result.output_to = output_to result.pipe_to = pipe_to -- cgit v1.2.1 From eecdc5cd3e155e21b65c7e87c4b82832d5232430 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 19:07:05 -0600 Subject: Missed a mutable argument --- cmd2/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index a6e67096..c1795f33 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -58,7 +58,7 @@ class StatementParser(): terminators=None, multiline_commands=None, aliases=None, - shortcuts=[], + shortcuts=None, ): self.allow_redirection = allow_redirection if terminators is None: -- cgit v1.2.1 From 4b903e0cc868a5410691f7c655efbad9d427124f Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 19:09:20 -0600 Subject: Add some documentation --- cmd2/parsing.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index c1795f33..3c56bcc6 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -28,6 +28,31 @@ class Statement(str): The string portion of the class contains the arguments, but not the command, nor the output redirection clauses. + + :var raw: string containing exactly what we input by the user + :type raw: str + :var command: the command, i.e. the first whitespace delimited word + :type command: str or None + :var multiline_command: if the command is a multiline command, the name of the + command, otherwise None + :type command: str or None + :var args: the arguments to the command, not including any output + redirection or terminators. quoted arguments remain + quoted. + :type args: str + :var terminator: the charater which terminated the multiline command, if + there was one + :type terminator: str or None + :var suffix: characters appearing after the terminator but before output + redirection, if any + :type suffix: str or None + :var pipe_to: if output was piped to a shell command, the shell command + :type pipe_to: str or None + :var output: if output was redirected, the redirection token, i.e. '>>' + :type output: str or None + :var output_to: if output was redirected, the destination, usually a filename + :type output_to: str or None + """ def __init__(self, obj): super().__init__() @@ -44,7 +69,10 @@ class Statement(str): @property def command_and_args(self): - """Combine command and args with a space separating them""" + """Combine command and args with a space separating them. + + Quoted arguments remain quoted. + """ return '{} {}'.format('' if self.command is None else self.command, self.args).strip() class StatementParser(): -- cgit v1.2.1 From 0140cf9425d2512935e2c53913efc83d383ba79d Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 29 Apr 2018 20:05:16 -0600 Subject: Internal refactoring of parsing and tokenizing code --- cmd2/parsing.py | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 3c56bcc6..4da8d814 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -10,13 +10,6 @@ from . import constants BLANK_LINE = '\n' -def _comment_replacer(match): - matched_string = match.group(0) - if matched_string.startswith('/'): - # the matched string was a comment, so remove it - return '' - # the matched string was a quoted string, return the match - return matched_string class Statement(str): """String subclass with additional attributes to store the results of parsing. @@ -75,6 +68,7 @@ class Statement(str): """ return '{} {}'.format('' if self.command is None else self.command, self.args).strip() + class StatementParser(): """Parse raw text into command components. @@ -135,16 +129,25 @@ class StatementParser(): re.DOTALL | re.MULTILINE ) + def tokenize(self, line: str) -> List[str]: + """Tokenize a string into a list""" + lexer = shlex.shlex(line, posix=False) + lexer.whitespace_split = True + tokens = self._split_on_punctuation(list(lexer)) + return tokens + def parse(self, rawinput: str) -> Statement: - """Parse input into a Statement object, stripping comments, expanding - aliases and shortcuts, and extracting output redirection directives. + """Tokenize the input and parse it into a Statement object, stripping + comments, expanding aliases and shortcuts, and extracting output + redirection directives. """ # strip C-style comments # shlex will handle the python/shell style comments for us # save rawinput for later - rawinput = re.sub(self.comment_pattern, _comment_replacer, rawinput) + rawinput = re.sub(self.comment_pattern, self._comment_replacer, rawinput) # we are going to modify line, so create a copy of the raw input line = rawinput + command = None args = '' @@ -160,10 +163,7 @@ class StatementParser(): if line[-1:] == BLANK_LINE: terminator = BLANK_LINE - # split the input on whitespace - lexer = shlex.shlex(line, posix=False) - lexer.whitespace_split = True - tokens = self.split_on_punctuation(list(lexer)) + tokens = self.tokenize(line) # expand aliases if tokens: @@ -274,7 +274,7 @@ class StatementParser(): # strip C-style comments # shlex will handle the python/shell style comments for us # save rawinput for later - rawinput = re.sub(self.comment_pattern, _comment_replacer, rawinput) + rawinput = re.sub(self.comment_pattern, self._comment_replacer, rawinput) # we are going to modify line, so create a copy of the raw input line = rawinput command = None @@ -288,7 +288,7 @@ class StatementParser(): # split the input on whitespace lexer = shlex.shlex(line, posix=False) lexer.whitespace_split = True - tokens = self.split_on_punctuation(list(lexer)) + tokens = self._split_on_punctuation(list(lexer)) # expand aliases if tokens: @@ -319,7 +319,10 @@ class StatementParser(): return line def expand_aliases(self, command: str) -> str: - """Given a command, expand any aliases for the command""" + """Given a command, expand any aliases for the command. + + If an alias contains shortcuts, the shortcuts will be expanded too. + """ # make a copy of aliases so we can edit it tmp_aliases = list(self.aliases.keys()) keep_expanding = bool(tmp_aliases) @@ -332,7 +335,7 @@ class StatementParser(): tmp_aliases.remove(cur_alias) keep_expanding = bool(tmp_aliases) break - return command + return self.expand_shortcuts(command) @staticmethod def _command_and_args(tokens: List[str]) -> Tuple[str, str]: @@ -350,7 +353,16 @@ class StatementParser(): return (command, args) - def split_on_punctuation(self, tokens: List[str]) -> List[str]: + @staticmethod + def _comment_replacer(match): + matched_string = match.group(0) + if matched_string.startswith('/'): + # the matched string was a comment, so remove it + return '' + # the matched string was a quoted string, return the match + return matched_string + + def _split_on_punctuation(self, tokens: List[str]) -> List[str]: """ # Further splits tokens from a command line using punctuation characters # as word breaks when they are in unquoted strings. Each run of punctuation -- cgit v1.2.1 From 119a6e49f1c37ea43059c74f7a2441a275731554 Mon Sep 17 00:00:00 2001 From: kotfu Date: Mon, 30 Apr 2018 00:01:47 -0600 Subject: Fix nested alias and shortcut expansion --- cmd2/parsing.py | 168 +++++++++++++++++++++++++++----------------------------- 1 file changed, 80 insertions(+), 88 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 4da8d814..7046b674 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -8,7 +8,7 @@ from typing import List, Tuple from . import constants -BLANK_LINE = '\n' +LINE_FEED = '\n' class Statement(str): @@ -129,10 +129,35 @@ class StatementParser(): re.DOTALL | re.MULTILINE ) + # aliases have to be a word, so make a regular expression + # that matches the first word in the line. This regex has two + # parts, the first parenthesis enclosed group matches one + # or more non-whitespace characters, and the second group + # matches either a whitespace character or the end of the + # string. We use \A and \Z to ensure we always match the + # beginning and end of a string that may have multiple + # lines + self.command_pattern = re.compile(r'\A(\S+)(\s|\Z)') + + def tokenize(self, line: str) -> List[str]: - """Tokenize a string into a list""" + """Lex a string into a list of tokens. + + Comments are removed, and shortcuts and aliases are expanded. + """ + + # strip C-style comments + # shlex will handle the python/shell style comments for us + line = re.sub(self.comment_pattern, self._comment_replacer, line) + + # expand shortcuts and aliases + line = self._expand(line) + + # split on whitespace lexer = shlex.shlex(line, posix=False) lexer.whitespace_split = True + + # custom lexing tokens = self._split_on_punctuation(list(lexer)) return tokens @@ -141,34 +166,19 @@ class StatementParser(): comments, expanding aliases and shortcuts, and extracting output redirection directives. """ - # strip C-style comments - # shlex will handle the python/shell style comments for us - # save rawinput for later - rawinput = re.sub(self.comment_pattern, self._comment_replacer, rawinput) - # we are going to modify line, so create a copy of the raw input - line = rawinput - - command = None - args = '' - - # expand shortcuts, have to do this first because - # a shortcut can expand into multiple tokens, ie '!ls' becomes - # 'shell ls' - line = self.expand_shortcuts(line) # handle the special case/hardcoded terminator of a blank line - # we have to do this before we shlex on whitespace because it + # we have to do this before we tokenize because tokenizing # destroys all unquoted whitespace in the input terminator = None - if line[-1:] == BLANK_LINE: - terminator = BLANK_LINE + if rawinput[-1:] == LINE_FEED: + terminator = LINE_FEED - tokens = self.tokenize(line) + command = None + args = '' - # expand aliases - if tokens: - command_to_expand = tokens[0] - tokens[0] = self.expand_aliases(command_to_expand) + # lex the input into a list of tokens + tokens = self.tokenize(rawinput) # of the valid terminators, find the first one to occur in the input terminator_pos = len(tokens)+1 @@ -184,7 +194,7 @@ class StatementParser(): pass if terminator: - if terminator == BLANK_LINE: + if terminator == LINE_FEED: terminator_pos = len(tokens)+1 else: terminator_pos = tokens.index(terminator) @@ -228,7 +238,7 @@ class StatementParser(): try: # find the first pipe if it exists pipe_pos = tokens.index('|') - # set everything after the first pipe to result.pipe_to + # save everything after the first pipe pipe_to = ' '.join(tokens[pipe_pos+1:]) # remove all the tokens after the pipe tokens = tokens[:pipe_pos] @@ -252,18 +262,18 @@ class StatementParser(): else: multiline_command = None - # build Statement object - result = Statement(args) - result.raw = rawinput - result.command = command - result.args = args - result.terminator = terminator - result.output = output - result.output_to = output_to - result.pipe_to = pipe_to - result.suffix = suffix - result.multiline_command = multiline_command - return result + # build the statement + statement = Statement(args) + statement.raw = rawinput + statement.command = command + statement.args = args + statement.terminator = terminator + statement.output = output + statement.output_to = output_to + statement.pipe_to = pipe_to + statement.suffix = suffix + statement.multiline_command = multiline_command + return statement def parse_command_only(self, rawinput: str) -> Statement: """Partially parse input into a Statement object. The command is @@ -271,41 +281,42 @@ class StatementParser(): Terminators, multiline commands, and output redirection are not parsed. """ - # strip C-style comments - # shlex will handle the python/shell style comments for us - # save rawinput for later - rawinput = re.sub(self.comment_pattern, self._comment_replacer, rawinput) - # we are going to modify line, so create a copy of the raw input - line = rawinput - command = None - args = '' - - # expand shortcuts, have to do this first because - # a shortcut can expand into multiple tokens, ie '!ls' becomes - # 'shell ls' - line = self.expand_shortcuts(line) - - # split the input on whitespace - lexer = shlex.shlex(line, posix=False) - lexer.whitespace_split = True - tokens = self._split_on_punctuation(list(lexer)) - - # expand aliases - if tokens: - command_to_expand = tokens[0] - tokens[0] = self.expand_aliases(command_to_expand) + # lex the input into a list of tokens + tokens = self.tokenize(rawinput) + # parse out the command and everything else (command, args) = self._command_and_args(tokens) - # build Statement object - result = Statement(args) - result.raw = rawinput - result.command = command - result.args = args - return result + # build the statement + statement = Statement(args) + statement.raw = rawinput + statement.command = command + statement.args = args + return statement + + def _expand(self, line: str) -> str: + """Expand shortcuts and aliases""" - def expand_shortcuts(self, line: str) -> str: - """Expand shortcuts at the beginning of input.""" + # expand aliases + # make a copy of aliases so we can edit it + tmp_aliases = list(self.aliases.keys()) + keep_expanding = bool(tmp_aliases) + while keep_expanding: + for cur_alias in tmp_aliases: + keep_expanding = False + # apply our regex to line + match = self.command_pattern.search(line) + if match: + # we got a match, extract the command + command = match.group(1) + if command == cur_alias: + # rebuild line with the expanded alias + line = self.aliases[cur_alias] + match.group(2) + line[match.end(2):] + tmp_aliases.remove(cur_alias) + keep_expanding = bool(tmp_aliases) + break + + # expand shortcuts for (shortcut, expansion) in self.shortcuts: if line.startswith(shortcut): # If the next character after the shortcut isn't a space, then insert one @@ -318,25 +329,6 @@ class StatementParser(): break return line - def expand_aliases(self, command: str) -> str: - """Given a command, expand any aliases for the command. - - If an alias contains shortcuts, the shortcuts will be expanded too. - """ - # make a copy of aliases so we can edit it - tmp_aliases = list(self.aliases.keys()) - keep_expanding = bool(tmp_aliases) - - while keep_expanding: - for cur_alias in tmp_aliases: - keep_expanding = False - if command == cur_alias: - command = self.aliases[cur_alias] - tmp_aliases.remove(cur_alias) - keep_expanding = bool(tmp_aliases) - break - return self.expand_shortcuts(command) - @staticmethod def _command_and_args(tokens: List[str]) -> Tuple[str, str]: """given a list of tokens, and return a tuple of the command -- cgit v1.2.1 From 802000bc56ba41955cdd6ffa9043cdc715e023d6 Mon Sep 17 00:00:00 2001 From: kotfu Date: Wed, 2 May 2018 09:14:49 -0600 Subject: =?UTF-8?q?Ensure=20args=20is=20=E2=80=98=E2=80=99=20for=20backwar?= =?UTF-8?q?ds=20compatibility=20with=20cmd?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd2/parsing.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 7046b674..2af8ff01 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -32,7 +32,7 @@ class Statement(str): :var args: the arguments to the command, not including any output redirection or terminators. quoted arguments remain quoted. - :type args: str + :type args: str or None :var terminator: the charater which terminated the multiline command, if there was one :type terminator: str or None @@ -52,8 +52,7 @@ class Statement(str): self.raw = str(obj) self.command = None self.multiline_command = None - # has to be an empty string for compatibility with standard library cmd - self.args = '' + self.args = None self.terminator = None self.suffix = None self.pipe_to = None @@ -175,7 +174,7 @@ class StatementParser(): terminator = LINE_FEED command = None - args = '' + args = None # lex the input into a list of tokens tokens = self.tokenize(rawinput) @@ -263,9 +262,13 @@ class StatementParser(): multiline_command = None # build the statement - statement = Statement(args) + # string representation of args must be an empty string instead of + # None for compatibility with standard library cmd + statement = Statement('' if args is None else args) statement.raw = rawinput statement.command = command + # if there are no args we will use None since we don't have to worry + # about compatibility wiht standard library cmd statement.args = args statement.terminator = terminator statement.output = output @@ -331,8 +334,11 @@ class StatementParser(): @staticmethod def _command_and_args(tokens: List[str]) -> Tuple[str, str]: - """given a list of tokens, and return a tuple of the command + """Given a list of tokens, return a tuple of the command and the args as a string. + + The args string will be '' instead of None to retain backwards compatibility + with cmd in the standard library. """ command = None args = '' -- cgit v1.2.1 From ad634b2e7f68392727246f796647b92d67172011 Mon Sep 17 00:00:00 2001 From: kotfu Date: Wed, 2 May 2018 19:09:30 -0700 Subject: Add argv to Statement object --- cmd2/parsing.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'cmd2/parsing.py') diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 2af8ff01..908e9272 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -7,6 +7,7 @@ import shlex from typing import List, Tuple from . import constants +from . import utils LINE_FEED = '\n' @@ -33,6 +34,10 @@ class Statement(str): redirection or terminators. quoted arguments remain quoted. :type args: str or None + :var: argv: a list of arguments a la sys.argv. Quotes, if any, are removed + from the elements of the list, and aliases and shortcuts + are expanded + :type argv: list :var terminator: the charater which terminated the multiline command, if there was one :type terminator: str or None @@ -53,6 +58,7 @@ class Statement(str): self.command = None self.multiline_command = None self.args = None + self.argv = None self.terminator = None self.suffix = None self.pipe_to = None @@ -65,7 +71,14 @@ class Statement(str): Quoted arguments remain quoted. """ - return '{} {}'.format('' if self.command is None else self.command, self.args).strip() + if self.command and self.args: + rtn = '{} {}'.format(self.command, self.args) + elif self.command: + # we are trusting that if we get here that self.args is None + rtn = self.command + else: + rtn = None + return rtn class StatementParser(): @@ -175,6 +188,7 @@ class StatementParser(): command = None args = None + argv = None # lex the input into a list of tokens tokens = self.tokenize(rawinput) @@ -198,7 +212,8 @@ class StatementParser(): else: terminator_pos = tokens.index(terminator) # everything before the first terminator is the command and the args - (command, args) = self._command_and_args(tokens[:terminator_pos]) + argv = tokens[:terminator_pos] + (command, args) = self._command_and_args(argv) # we will set the suffix later # remove all the tokens before and including the terminator tokens = tokens[terminator_pos+1:] @@ -210,6 +225,7 @@ class StatementParser(): # because redirectors can only be after a terminator command = testcommand args = testargs + argv = tokens tokens = [] # check for output redirect @@ -253,7 +269,8 @@ class StatementParser(): suffix = None if not command: # command could already have been set, if so, don't set it again - (command, args) = self._command_and_args(tokens) + argv = tokens + (command, args) = self._command_and_args(argv) # set multiline if command in self.multiline_commands: @@ -268,8 +285,9 @@ class StatementParser(): statement.raw = rawinput statement.command = command # if there are no args we will use None since we don't have to worry - # about compatibility wiht standard library cmd + # about compatibility with standard library cmd statement.args = args + statement.argv = list(map(lambda x: utils.strip_quotes(x), argv)) statement.terminator = terminator statement.output = output statement.output_to = output_to @@ -291,10 +309,13 @@ class StatementParser(): (command, args) = self._command_and_args(tokens) # build the statement - statement = Statement(args) + # string representation of args must be an empty string instead of + # None for compatibility with standard library cmd + statement = Statement('' if args is None else args) statement.raw = rawinput statement.command = command statement.args = args + statement.argv = tokens return statement def _expand(self, line: str) -> str: @@ -341,7 +362,7 @@ class StatementParser(): with cmd in the standard library. """ command = None - args = '' + args = None if tokens: command = tokens[0] -- cgit v1.2.1