Move CommandParser class into its own file

author: kotfu <kotfu@kotfu.net> 2018-04-20 15:36:15 -0600
committer: kotfu <kotfu@kotfu.net> 2018-04-20 15:36:15 -0600
commit: 2350ec2a9b137ee3026efff1b1b3537d99cf19f2 (patch)
tree: b0073d576b75f0fd94eb79922e471cda280e527a
parent: 477666d0b3e097fb831729644b8861a983805981 (diff)
download: cmd2-git-2350ec2a9b137ee3026efff1b1b3537d99cf19f2.tar.gz
2 files changed, 227 insertions, 202 deletions
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
new file mode 100644
index 00000000..41a3ed0b
--- /dev/null
+++ b/cmd2/parsing.py
@@ -0,0 +1,212 @@
+#
+# -*- coding: utf-8 -*-
+"""Command parsing classes for cmd2"""
+
+import re
+import shlex
+
+import cmd2
+
+class Command():
+    """Store the results of a parsed command."""
+    pass
+
+class CommandParser():
+    """Parse raw text into command components."""
+    def __init__(
+            self,
+            quotes=['"', "'"],
+            allow_redirection=True,
+            redirection_chars=['|', '<', '>'],
+            terminators=[';'],
+            multilineCommands = [],
+        ):
+        self.quotes = quotes
+        self.allow_redirection = allow_redirection
+        self.redirection_chars = redirection_chars
+        self.terminators = terminators
+        self.multilineCommands = multilineCommands
+
+    def parseString(self, rawinput):
+        result = Command()
+        result.raw = rawinput
+        result.command = None
+        result.multilineCommand = None
+        result.args = None
+        result.terminator = None
+        result.suffix = None
+        result.pipeTo = None
+        result.output = None
+        result.outputTo = None
+
+        # strip C-style and C++-style comments
+        # shlex will handle the python/shell style comments for us
+        def replacer(match):
+                s = match.group(0)
+                if s.startswith('/'):
+                    # treat the removed comment as an empty string
+                    return ''
+                else:
+                    return s
+        pattern = re.compile(
+            r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
+            re.DOTALL | re.MULTILINE
+        )
+        rawinput = re.sub(pattern, replacer, rawinput)
+
+        s = shlex.shlex(rawinput, posix=False)
+        s.whitespace_split = True
+        tokens = self.split_on_punctuation(list(s))
+        
+        # of the valid terminators, find the first one to occur in the input
+        terminator_pos = len(tokens)+1
+        terminator = None
+        for test_terminator in self.terminators:
+            try:
+                pos = tokens.index(test_terminator)
+                if pos < terminator_pos:
+                    terminator_pos = pos
+                    terminator = test_terminator
+            except ValueError:
+                # the terminator is not in the tokens
+                pass
+
+        if terminator:
+            terminator_pos = tokens.index(terminator)
+            # everything before the first terminator is the command and the args
+            (result.command, result.args) = self._command_and_args(tokens[:terminator_pos])
+            result.terminator = tokens[terminator_pos]
+            # we will set the suffix later
+            # remove all the tokens before and including the terminator
+            tokens = tokens[terminator_pos+1:]
+
+        # check for input from file
+        try:
+            if tokens[0] == '<':
+                result.inputFrom = ' '.join(tokens[1:])
+                tokens = []
+        except IndexError:
+            # no input from file
+            pass
+
+        # check for output redirect
+        try:
+            output_pos = tokens.index('>')
+            result.output = '>'
+            result.outputTo = ' '.join(tokens[output_pos+1:])
+            # remove all the tokens after the output redirect
+            tokens = tokens[:output_pos]
+        except ValueError:
+            pass
+
+        # check for paste buffer
+        try:
+            output_pos = tokens.index('>>')
+            result.output = '>>'
+            # remove all tokens after the output redirect
+            tokens = tokens[:output_pos]
+        except ValueError:
+            pass
+
+        # check for pipes
+        try:
+            # find the first pipe if it exists
+            pipe_pos = tokens.index('|')
+            # set everything after the first pipe to result.pipeTo
+            result.pipeTo = ' '.join(tokens[pipe_pos+1:])
+            # remove all the tokens after the pipe
+            tokens = tokens[:pipe_pos]
+        except ValueError:
+            # no pipe in the tokens
+            pass
+        
+        if result.terminator:
+            # whatever is left is the suffix
+            result.suffix = ' '.join(tokens)
+        else:
+            # no terminator, so whatever is left is the command and the args
+            (result.command, result.args) = self._command_and_args(tokens)
+
+        if result.command in self.multilineCommands:
+            result.multilineCommand = result.command
+
+        return result
+    
+    def _command_and_args(self, tokens):
+        """given a list of tokens, and return a tuple of the command
+        and the args as a string.
+        """
+        command = None
+        args = None
+
+        if tokens:
+            command = tokens[0]
+
+        if len(tokens) > 1:
+            args = ' '.join(tokens[1:])
+
+        return (command, args)
+
+    def split_on_punctuation(self, initial_tokens):
+        """
+        # Further splits tokens from a command line using punctuation characters
+        # as word breaks when they are in unquoted strings. Each run of punctuation
+        # characters is treated as a single token.
+
+        :param initial_tokens: the tokens as parsed by shlex
+        :return: the punctuated tokens
+        """
+        punctuation = []
+        punctuation.extend(self.terminators)
+        if self.allow_redirection:
+            punctuation.extend(self.redirection_chars)
+
+        punctuated_tokens = []
+
+        for cur_initial_token in initial_tokens:
+
+            # Save tokens up to 1 character in length or quoted tokens. No need to parse these.
+            if len(cur_initial_token) <= 1 or cur_initial_token[0] in self.quotes:
+                punctuated_tokens.append(cur_initial_token)
+                continue
+
+            # Iterate over each character in this token
+            cur_index = 0
+            cur_char = cur_initial_token[cur_index]
+
+            # Keep track of the token we are building
+            new_token = ''
+
+            while True:
+                if cur_char not in punctuation:
+
+                    # Keep appending to new_token until we hit a punctuation char
+                    while cur_char not in punctuation:
+                        new_token += cur_char
+                        cur_index += 1
+                        if cur_index < len(cur_initial_token):
+                            cur_char = cur_initial_token[cur_index]
+                        else:
+                            break
+
+                else:
+                    cur_punc = cur_char
+
+                    # Keep appending to new_token until we hit something other than cur_punc
+                    while cur_char == cur_punc:
+                        new_token += cur_char
+                        cur_index += 1
+                        if cur_index < len(cur_initial_token):
+                            cur_char = cur_initial_token[cur_index]
+                        else:
+                            break
+
+                # Save the new token
+                punctuated_tokens.append(new_token)
+                new_token = ''
+
+                # Check if we've viewed all characters
+                if cur_index >= len(cur_initial_token):
+                    break
+
+        return punctuated_tokens
diff --git a/tests/test_shlexparsing.py b/tests/test_shlexparsing.py
index 305d80d9..5237fd80 100644
--- a/tests/test_shlexparsing.py
+++ b/tests/test_shlexparsing.py
@@ -17,207 +17,20 @@ Notes:
 
 """
 
-import re
-import shlex
+import cmd2
+from cmd2.parsing import CommandParser
 
 import pytest
 
-import cmd2
-
-class Cmd2Command():
-    pass
-
-class Cmd2Parser():
-    # settings or variables from cmd2.py
-    terminator = ';'
-    allow_redirection = True
-    REDIRECTION_CHARS = ['|', '<', '>']
-    QUOTES = ['"', "'"]
-    multilineCommands = ['multiline']
-
-    def parseString(self, rawinput):
-        result = Cmd2Command()
-        result.raw = rawinput
-        result.command = None
-        result.multilineCommand = None
-        result.args = None
-        result.terminator = None
-        result.suffix = None
-        result.pipeTo = None
-        result.output = None
-        result.outputTo = None
-
-        # strip C-style and C++-style comments
-        # shlex will handle the python/shell style comments for us
-        def replacer(match):
-                s = match.group(0)
-                if s.startswith('/'):
-                    # treat the removed comment as a space token, not an empty string
-                    # return ' '
-                    # jk, always return nothing
-                    return ''
-                else:
-                    return s
-        pattern = re.compile(
-            r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
-            re.DOTALL | re.MULTILINE
-        )
-        rawinput = re.sub(pattern, replacer, rawinput)
-
-        s = shlex.shlex(rawinput, posix=False)
-        s.whitespace_split = True
-        tokens = self.split_on_punctuation(list(s))
-        
-        # look for the semicolon terminator
-        try:
-            terminator_pos = tokens.index(self.terminator)
-            # everything before the first terminator is the command and the args
-            (result.command, result.args) = self._command_and_args(tokens[:terminator_pos])
-            result.terminator = tokens[terminator_pos]
-            # we will set the suffix later
-            # remove all the tokens before and including the terminator
-            tokens = tokens[terminator_pos+1:]
-        except ValueError:
-            # no terminator in the tokens
-            pass
-
-        # check for input from file
-        try:
-            if tokens[0] == '<':
-                result.inputFrom = ' '.join(tokens[1:])
-                tokens = []
-        except IndexError:
-            # no input from file
-            pass
-
-        # check for output redirect
-        try:
-            output_pos = tokens.index('>')
-            result.output = '>'
-            result.outputTo = ' '.join(tokens[output_pos+1:])
-            # remove all the tokens after the output redirect
-            tokens = tokens[:output_pos]
-        except ValueError:
-            pass
-
-        # check for paste buffer
-        try:
-            output_pos = tokens.index('>>')
-            result.output = '>>'
-            # remove all tokens after the output redirect
-            tokens = tokens[:output_pos]
-        except ValueError:
-            pass
-
-        # check for pipes
-        try:
-            # find the first pipe if it exists
-            pipe_pos = tokens.index('|')
-            # set everything after the first pipe to result.pipeTo
-            result.pipeTo = ' '.join(tokens[pipe_pos+1:])
-            # remove all the tokens after the pipe
-            tokens = tokens[:pipe_pos]
-        except ValueError:
-            # no pipe in the tokens
-            pass
-        
-        if result.terminator:
-            # whatever is left is the suffix
-            result.suffix = ' '.join(tokens)
-            if result.command in self.multilineCommands:
-                result.multilineCommand = result.command
-        else:
-            # no terminator, so whatever is left is the command and the args
-            (result.command, result.args) = self._command_and_args(tokens)            
-
-        return result
-    
-    def _command_and_args(self, tokens):
-        """given a list of tokens, and return a tuple of the command
-        and the args as a string.
-        """
-        command = None
-        args = None
-
-        if tokens:
-            command = tokens[0]
-
-        if len(tokens) > 1:
-            args = ' '.join(tokens[1:])
-
-        return (command, args)
-
-    def split_on_punctuation(self, initial_tokens):
-        """
-        # Further splits tokens from a command line using punctuation characters
-        # as word breaks when they are in unquoted strings. Each run of punctuation
-        # characters is treated as a single token.
-
-        :param initial_tokens: the tokens as parsed by shlex
-        :return: the punctuated tokens
-        """
-        punctuation = [self.terminator] # should be self.terminator from cmd2.py
-        if self.allow_redirection:  # should be self.allow_redirection from cmd2.py
-            punctuation += self.REDIRECTION_CHARS # should be REDIRECTION_CHARS from cmd2.py
-
-        punctuated_tokens = []
-
-        for cur_initial_token in initial_tokens:
-
-            # Save tokens up to 1 character in length or quoted tokens. No need to parse these.
-            if len(cur_initial_token) <= 1 or cur_initial_token[0] in self.QUOTES: # should be QUOTES in cmd2.py
-                punctuated_tokens.append(cur_initial_token)
-                continue
-
-            # Iterate over each character in this token
-            cur_index = 0
-            cur_char = cur_initial_token[cur_index]
-
-            # Keep track of the token we are building
-            new_token = ''
-
-            while True:
-                if cur_char not in punctuation:
-
-                    # Keep appending to new_token until we hit a punctuation char
-                    while cur_char not in punctuation:
-                        new_token += cur_char
-                        cur_index += 1
-                        if cur_index < len(cur_initial_token):
-                            cur_char = cur_initial_token[cur_index]
-                        else:
-                            break
-
-                else:
-                    cur_punc = cur_char
-
-                    # Keep appending to new_token until we hit something other than cur_punc
-                    while cur_char == cur_punc:
-                        new_token += cur_char
-                        cur_index += 1
-                        if cur_index < len(cur_initial_token):
-                            cur_char = cur_initial_token[cur_index]
-                        else:
-                            break
-
-                # Save the new token
-                punctuated_tokens.append(new_token)
-                new_token = ''
-
-                # Check if we've viewed all characters
-                if cur_index >= len(cur_initial_token):
-                    break
-
-        return punctuated_tokens
-
-######
-#
-# unit tests
-#
-######
 @pytest.fixture
 def parser():
-    parser = Cmd2Parser()
+    parser = CommandParser(
+        quotes=['"', "'"],
+        allow_redirection=True,
+        redirection_chars=['|', '<', '>'],        
+        terminators = [';'],
+        multilineCommands = ['multiline']
+    )
     return parser
 
 def test_parse_empty_string(parser):
@@ -397,12 +210,12 @@ def test_parse_multiline_command_ignores_redirectors_within_it(parser):
 #     assert results.multilineCommand == 'multiline'
 #     assert not 'args' in results
 
-# def test_parse_multiline_with_complete_comment(parser):
-#     line = 'multiline command /* with comment complete */ is done;'
-#     results = parser.parseString(line)
-#     assert results.multilineCommand == 'multiline'
-#     assert results.args == 'command /* with comment complete */ is done'
-#     assert results.terminator == ';'
+def test_parse_multiline_with_complete_comment(parser):
+    line = 'multiline command /* with comment complete */ is done;'
+    results = parser.parseString(line)
+    assert results.multilineCommand == 'multiline'
+    assert results.args == 'command is done'
+    assert results.terminator == ';'
 
 # def test_parse_multiline_termninated_by_empty_line(parser):
 #     line = 'multiline command ends\n\n'
author	kotfu <kotfu@kotfu.net>	2018-04-20 15:36:15 -0600
committer	kotfu <kotfu@kotfu.net>	2018-04-20 15:36:15 -0600
commit	2350ec2a9b137ee3026efff1b1b3537d99cf19f2 (patch)
tree	b0073d576b75f0fd94eb79922e471cda280e527a
parent	477666d0b3e097fb831729644b8861a983805981 (diff)
download	cmd2-git-2350ec2a9b137ee3026efff1b1b3537d99cf19f2.tar.gz