1 files changed, 156 insertions, 122 deletions
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
index b67cef10..8edfacb9 100644
--- a/cmd2/parsing.py
+++ b/cmd2/parsing.py
@@ -7,10 +7,13 @@ import re
 import shlex
 from typing import List, Tuple, Dict
 
+import attr
+
 from . import constants
 from . import utils
 
 
+@attr.s(frozen=True)
 class Statement(str):
     """String subclass with additional attributes to store the results of parsing.
 
@@ -26,98 +29,137 @@ class Statement(str):
     The string portion of the class contains the arguments, but not the command, nor
     the output redirection clauses.
 
-    :var raw:               string containing exactly what we input by the user
-    :type raw:              str
-    :var command:           the command, i.e. the first whitespace delimited word
-    :type command:          str or None
-    :var multiline_command: if the command is a multiline command, the name of the
-                            command, otherwise None
-    :type command:          str or None
-    :var args:              the arguments to the command, not including any output
-                            redirection or terminators. quoted arguments remain
-                            quoted.
-    :type args:             str or None
-    :var: argv:             a list of arguments a la sys.argv. Quotes, if any, are removed
-                            from the elements of the list, and aliases and shortcuts
-                            are expanded
-    :type argv:             list
-    :var terminator:        the character which terminated the multiline command, if
-                            there was one
-    :type terminator:       str or None
-    :var suffix:            characters appearing after the terminator but before output
-                            redirection, if any
-    :type suffix:           str or None
-    :var pipe_to:           if output was piped to a shell command, the shell command
-                            as a list of tokens
-    :type pipe_to:          list
-    :var output:            if output was redirected, the redirection token, i.e. '>>'
-    :type output:           str or None
-    :var output_to:         if output was redirected, the destination file
-    :type output_to:        str or None
-
+    Here's some suggestions and best practices for how to use the attributes of this
+    object:
+
+    command - the name of the command, shortcuts and aliases have already been
+              expanded
+
+    args - the arguments to the command, excluding output redirection and command
+           terminators. If the user used quotes in their input, they remain here,
+           and you will have to handle them on your own.
+
+    arg_list - the arguments to the command, excluding output redirection and
+               command terminators. Each argument is represented as an element
+               in the list. Quoted arguments remain quoted. If you want to
+               remove the quotes, use `cmd2.utils.strip_quotes()` or use
+               `argv[1:]`
+
+    command_and_args - join the args and the command together with a space. Output
+                       redirection is excluded.
+
+    argv - this is a list of arguments in the style of `sys.argv`. The first element
+           of the list is the command. Subsequent elements of the list contain any
+           additional arguments, with quotes removed, just like bash would. This
+           is very useful if you are going to use `argparse.parse_args()`:
+           ```
+           def do_mycommand(stmt):
+               mycommand_argparser.parse_args(stmt.argv)
+               ...
+            ```
+
+    raw - if you want full access to exactly what the user typed at the input prompt
+          you can get it, but you'll have to parse it on your own, including:
+             - shortcuts and aliases
+             - quoted commands and arguments
+             - output redirection
+             - multi-line command terminator handling
+          if you use multiline commands, all the input will be passed to you in
+          this string, but there will be embedded newlines where
+          the user hit return to continue the command on the next line.
+
+    Tips:
+
+    1. `argparse` is your friend for anything complex. `cmd2` has two decorators
+       (`with_argparser`, and `with_argparser_and_unknown_args`) which you can use
+       to make your command method receive a namespace of parsed arguments, whether
+       positional or denoted with switches.
+
+    2. For commands with simple positional arguments, use `args` or `arg_list`
+
+    3. If you don't want to have to worry about quoted arguments, use
+       argv[1:], which strips them all off for you.
     """
-    def __new__(cls,
-                obj: object,
-                *,
-                raw: str = None,
-                command: str = None,
-                args: str = None,
-                argv: List[str] = None,
-                multiline_command: str = None,
-                terminator: str = None,
-                suffix: str = None,
-                pipe_to: str = None,
-                output: str = None,
-                output_to: str = None
-                ):
-        """Create a new instance of Statement
+    # the arguments, but not the command, nor the output redirection clauses.
+    args = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+    # string containing exactly what we input by the user
+    raw = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+    # the command, i.e. the first whitespace delimited word
+    command = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+    # list of arguments to the command, not including any output redirection or terminators; quoted args remain quoted
+    arg_list = attr.ib(factory=list, validator=attr.validators.instance_of(list), type=List[str])
+
+    # if the command is a multiline command, the name of the command, otherwise empty
+    multiline_command = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+    # the character which terminated the multiline command, if there was one
+    terminator = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+    # characters appearing after the terminator but before output redirection, if any
+    suffix = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+    # if output was piped to a shell command, the shell command as a list of tokens
+    pipe_to = attr.ib(factory=list, validator=attr.validators.instance_of(list), type=List[str])
+
+    # if output was redirected, the redirection token, i.e. '>>'
+    output = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+    # if output was redirected, the destination file
+    output_to = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+    def __new__(cls, value: object, *pos_args, **kw_args):
+        """Create a new instance of Statement.
 
         We must override __new__ because we are subclassing `str` which is
-        immutable.
+        immutable and takes a different number of arguments as Statement.
+
+        NOTE:  attrs takes care of initializing other members in the __init__ it
+        generates.
         """
-        stmt = str.__new__(cls, obj)
-        object.__setattr__(stmt, "raw", raw)
-        object.__setattr__(stmt, "command", command)
-        object.__setattr__(stmt, "args", args)
-        if argv is None:
-            argv = []
-        object.__setattr__(stmt, "argv", argv)
-        object.__setattr__(stmt, "multiline_command", multiline_command)
-        object.__setattr__(stmt, "terminator", terminator)
-        object.__setattr__(stmt, "suffix", suffix)
-        object.__setattr__(stmt, "pipe_to", pipe_to)
-        object.__setattr__(stmt, "output", output)
-        object.__setattr__(stmt, "output_to", output_to)
+        stmt = super().__new__(cls, value)
         return stmt
 
     @property
-    def command_and_args(self):
+    def command_and_args(self) -> str:
         """Combine command and args with a space separating them.
 
-        Quoted arguments remain quoted.
+        Quoted arguments remain quoted. Output redirection and piping are
+        excluded, as are any multiline command terminators.
         """
         if self.command and self.args:
             rtn = '{} {}'.format(self.command, self.args)
         elif self.command:
-            # we are trusting that if we get here that self.args is None
+            # there were no arguments to the command
             rtn = self.command
         else:
-            rtn = None
+            rtn = ''
         return rtn
 
-    def __setattr__(self, name, value):
-        """Statement instances should feel immutable; raise ValueError"""
-        raise ValueError
+    @property
+    def argv(self) -> List[str]:
+        """a list of arguments a la sys.argv.
 
-    def __delattr__(self, name):
-        """Statement instances should feel immutable; raise ValueError"""
-        raise ValueError
+        Quotes, if any, are removed from the elements of the list, and aliases
+        and shortcuts are expanded
+        """
+        if self.command:
+            rtn = [utils.strip_quotes(self.command)]
+            for cur_token in self.arg_list:
+                rtn.append(utils.strip_quotes(cur_token))
+        else:
+            rtn = []
+
+        return rtn
 
 
 class StatementParser:
     """Parse raw text into command components.
 
-    Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion.
+    Shortcuts is a list of tuples with each tuple containing the shortcut and
+    the expansion.
     """
     def __init__(
             self,
@@ -231,7 +273,7 @@ class StatementParser:
         if match:
             if word == match.group(1):
                 valid = True
-                errmsg = None
+                errmsg = ''
         return valid, errmsg
 
     def tokenize(self, line: str) -> List[str]:
@@ -268,13 +310,13 @@ class StatementParser:
         # handle the special case/hardcoded terminator of a blank line
         # we have to do this before we tokenize because tokenizing
         # destroys all unquoted whitespace in the input
-        terminator = None
+        terminator = ''
         if line[-1:] == constants.LINE_FEED:
             terminator = constants.LINE_FEED
 
-        command = None
-        args = None
-        argv = None
+        command = ''
+        args = ''
+        arg_list = []
 
         # lex the input into a list of tokens
         tokens = self.tokenize(line)
@@ -302,8 +344,8 @@ class StatementParser:
                 terminator_pos = len(tokens)+1
 
             # everything before the first terminator is the command and the args
-            argv = tokens[:terminator_pos]
-            (command, args) = self._command_and_args(argv)
+            (command, args) = self._command_and_args(tokens[:terminator_pos])
+            arg_list = tokens[1:terminator_pos]
             # we will set the suffix later
             # remove all the tokens before and including the terminator
             tokens = tokens[terminator_pos+1:]
@@ -315,7 +357,7 @@ class StatementParser:
                 # because redirectors can only be after a terminator
                 command = testcommand
                 args = testargs
-                argv = tokens
+                arg_list = tokens[1:]
                 tokens = []
 
         # check for a pipe to a shell process
@@ -336,11 +378,11 @@ class StatementParser:
             tokens = tokens[:pipe_pos]
         except ValueError:
             # no pipe in the tokens
-            pipe_to = None
+            pipe_to = []
 
         # check for output redirect
-        output = None
-        output_to = None
+        output = ''
+        output_to = ''
         try:
             output_pos = tokens.index(constants.REDIRECTION_OUTPUT)
             output = constants.REDIRECTION_OUTPUT
@@ -374,26 +416,23 @@ class StatementParser:
             suffix = ' '.join(tokens)
         else:
             # no terminator, so whatever is left is the command and the args
-            suffix = None
+            suffix = ''
             if not command:
                 # command could already have been set, if so, don't set it again
-                argv = tokens
-                (command, args) = self._command_and_args(argv)
+                (command, args) = self._command_and_args(tokens)
+                arg_list = tokens[1:]
 
         # set multiline
         if command in self.multiline_commands:
             multiline_command = command
         else:
-            multiline_command = None
+            multiline_command = ''
 
         # build the statement
-        # string representation of args must be an empty string instead of
-        # None for compatibility with standard library cmd
-        statement = Statement('' if args is None else args,
+        statement = Statement(args,
                               raw=line,
                               command=command,
-                              args=args,
-                              argv=list(map(lambda x: utils.strip_quotes(x), argv)),
+                              arg_list=arg_list,
                               multiline_command=multiline_command,
                               terminator=terminator,
                               suffix=suffix,
@@ -413,53 +452,50 @@ class StatementParser:
         This method is used by tab completion code and therefore must not
         generate an exception if there are unclosed quotes.
 
-        The Statement object returned by this method can at most contained
-        values in the following attributes:
+        The `Statement` object returned by this method can at most contain values
+        in the following attributes:
+          - args
           - raw
           - command
-          - args
+          - multiline_command
+
+        `Statement.args` includes all output redirection clauses and command
+        terminators.
 
         Different from parse(), this method does not remove redundant whitespace
-        within statement.args. It does however, ensure args does not have
-        leading or trailing whitespace.
+        within args. However, it does ensure args has no leading or trailing
+        whitespace.
         """
         # expand shortcuts and aliases
         line = self._expand(rawinput)
 
-        command = None
-        args = None
+        command = ''
+        args = ''
         match = self._command_pattern.search(line)
         if match:
             # we got a match, extract the command
             command = match.group(1)
-            # the match could be an empty string, if so, turn it into none
-            if not command:
-                command = None
-            # the _command_pattern regex is designed to match the spaces
-            # between command and args with a second match group. Using
-            # the end of the second match group ensures that args has
-            # no leading whitespace. The rstrip() makes sure there is
-            # no trailing whitespace
-            args = line[match.end(2):].rstrip()
-            # if the command is none that means the input was either empty
-            # or something wierd like '>'. args should be None if we couldn't
+
+            # take everything from the end of the first match group to
+            # the end of the line as the arguments (stripping leading
+            # and trailing spaces)
+            args = line[match.end(1):].strip()
+            # if the command is empty that means the input was either empty
+            # or something weird like '>'. args should be empty if we couldn't
             # parse a command
             if not command or not args:
-                args = None
+                args = ''
 
         # set multiline
         if command in self.multiline_commands:
             multiline_command = command
         else:
-            multiline_command = None
+            multiline_command = ''
 
         # build the statement
-        # string representation of args must be an empty string instead of
-        # None for compatibility with standard library cmd
-        statement = Statement('' if args is None else args,
+        statement = Statement(args,
                               raw=rawinput,
                               command=command,
-                              args=args,
                               multiline_command=multiline_command,
                               )
         return statement
@@ -503,12 +539,9 @@ class StatementParser:
     def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
         """Given a list of tokens, return a tuple of the command
         and the args as a string.
-
-        The args string will be '' instead of None to retain backwards compatibility
-        with cmd in the standard library.
         """
-        command = None
-        args = None
+        command = ''
+        args = ''
 
         if tokens:
             command = tokens[0]
@@ -528,10 +561,11 @@ class StatementParser:
         return matched_string
 
     def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
-        """
-        # Further splits tokens from a command line using punctuation characters
-        # as word breaks when they are in unquoted strings. Each run of punctuation
-        # characters is treated as a single token.
+        """Further splits tokens from a command line using punctuation characters
+
+        Punctuation characters are treated as word breaks when they are in
+        unquoted strings. Each run of punctuation characters is treated as a
+        single token.
 
         :param tokens: the tokens as parsed by shlex
         :return: the punctuated tokens