summaryrefslogtreecommitdiff
path: root/cmd2/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'cmd2/parsing.py')
-rw-r--r--cmd2/parsing.py278
1 files changed, 156 insertions, 122 deletions
diff --git a/cmd2/parsing.py b/cmd2/parsing.py
index b67cef10..8edfacb9 100644
--- a/cmd2/parsing.py
+++ b/cmd2/parsing.py
@@ -7,10 +7,13 @@ import re
import shlex
from typing import List, Tuple, Dict
+import attr
+
from . import constants
from . import utils
+@attr.s(frozen=True)
class Statement(str):
"""String subclass with additional attributes to store the results of parsing.
@@ -26,98 +29,137 @@ class Statement(str):
The string portion of the class contains the arguments, but not the command, nor
the output redirection clauses.
- :var raw: string containing exactly what we input by the user
- :type raw: str
- :var command: the command, i.e. the first whitespace delimited word
- :type command: str or None
- :var multiline_command: if the command is a multiline command, the name of the
- command, otherwise None
- :type command: str or None
- :var args: the arguments to the command, not including any output
- redirection or terminators. quoted arguments remain
- quoted.
- :type args: str or None
- :var: argv: a list of arguments a la sys.argv. Quotes, if any, are removed
- from the elements of the list, and aliases and shortcuts
- are expanded
- :type argv: list
- :var terminator: the character which terminated the multiline command, if
- there was one
- :type terminator: str or None
- :var suffix: characters appearing after the terminator but before output
- redirection, if any
- :type suffix: str or None
- :var pipe_to: if output was piped to a shell command, the shell command
- as a list of tokens
- :type pipe_to: list
- :var output: if output was redirected, the redirection token, i.e. '>>'
- :type output: str or None
- :var output_to: if output was redirected, the destination file
- :type output_to: str or None
-
+ Here's some suggestions and best practices for how to use the attributes of this
+ object:
+
+ command - the name of the command, shortcuts and aliases have already been
+ expanded
+
+ args - the arguments to the command, excluding output redirection and command
+ terminators. If the user used quotes in their input, they remain here,
+ and you will have to handle them on your own.
+
+ arg_list - the arguments to the command, excluding output redirection and
+ command terminators. Each argument is represented as an element
+ in the list. Quoted arguments remain quoted. If you want to
+ remove the quotes, use `cmd2.utils.strip_quotes()` or use
+ `argv[1:]`
+
+ command_and_args - join the args and the command together with a space. Output
+ redirection is excluded.
+
+ argv - this is a list of arguments in the style of `sys.argv`. The first element
+ of the list is the command. Subsequent elements of the list contain any
+ additional arguments, with quotes removed, just like bash would. This
+ is very useful if you are going to use `argparse.parse_args()`:
+ ```
+ def do_mycommand(stmt):
+ mycommand_argparser.parse_args(stmt.argv)
+ ...
+ ```
+
+ raw - if you want full access to exactly what the user typed at the input prompt
+ you can get it, but you'll have to parse it on your own, including:
+ - shortcuts and aliases
+ - quoted commands and arguments
+ - output redirection
+ - multi-line command terminator handling
+ if you use multiline commands, all the input will be passed to you in
+ this string, but there will be embedded newlines where
+ the user hit return to continue the command on the next line.
+
+ Tips:
+
+ 1. `argparse` is your friend for anything complex. `cmd2` has two decorators
+ (`with_argparser`, and `with_argparser_and_unknown_args`) which you can use
+ to make your command method receive a namespace of parsed arguments, whether
+ positional or denoted with switches.
+
+ 2. For commands with simple positional arguments, use `args` or `arg_list`
+
+ 3. If you don't want to have to worry about quoted arguments, use
+ argv[1:], which strips them all off for you.
"""
- def __new__(cls,
- obj: object,
- *,
- raw: str = None,
- command: str = None,
- args: str = None,
- argv: List[str] = None,
- multiline_command: str = None,
- terminator: str = None,
- suffix: str = None,
- pipe_to: str = None,
- output: str = None,
- output_to: str = None
- ):
- """Create a new instance of Statement
+ # the arguments, but not the command, nor the output redirection clauses.
+ args = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+ # string containing exactly what we input by the user
+ raw = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+ # the command, i.e. the first whitespace delimited word
+ command = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+ # list of arguments to the command, not including any output redirection or terminators; quoted args remain quoted
+ arg_list = attr.ib(factory=list, validator=attr.validators.instance_of(list), type=List[str])
+
+ # if the command is a multiline command, the name of the command, otherwise empty
+ multiline_command = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+ # the character which terminated the multiline command, if there was one
+ terminator = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+ # characters appearing after the terminator but before output redirection, if any
+ suffix = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+ # if output was piped to a shell command, the shell command as a list of tokens
+ pipe_to = attr.ib(factory=list, validator=attr.validators.instance_of(list), type=List[str])
+
+ # if output was redirected, the redirection token, i.e. '>>'
+ output = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+ # if output was redirected, the destination file
+ output_to = attr.ib(default='', validator=attr.validators.instance_of(str), type=str)
+
+ def __new__(cls, value: object, *pos_args, **kw_args):
+ """Create a new instance of Statement.
We must override __new__ because we are subclassing `str` which is
- immutable.
+ immutable and takes a different number of arguments as Statement.
+
+ NOTE: attrs takes care of initializing other members in the __init__ it
+ generates.
"""
- stmt = str.__new__(cls, obj)
- object.__setattr__(stmt, "raw", raw)
- object.__setattr__(stmt, "command", command)
- object.__setattr__(stmt, "args", args)
- if argv is None:
- argv = []
- object.__setattr__(stmt, "argv", argv)
- object.__setattr__(stmt, "multiline_command", multiline_command)
- object.__setattr__(stmt, "terminator", terminator)
- object.__setattr__(stmt, "suffix", suffix)
- object.__setattr__(stmt, "pipe_to", pipe_to)
- object.__setattr__(stmt, "output", output)
- object.__setattr__(stmt, "output_to", output_to)
+ stmt = super().__new__(cls, value)
return stmt
@property
- def command_and_args(self):
+ def command_and_args(self) -> str:
"""Combine command and args with a space separating them.
- Quoted arguments remain quoted.
+ Quoted arguments remain quoted. Output redirection and piping are
+ excluded, as are any multiline command terminators.
"""
if self.command and self.args:
rtn = '{} {}'.format(self.command, self.args)
elif self.command:
- # we are trusting that if we get here that self.args is None
+ # there were no arguments to the command
rtn = self.command
else:
- rtn = None
+ rtn = ''
return rtn
- def __setattr__(self, name, value):
- """Statement instances should feel immutable; raise ValueError"""
- raise ValueError
+ @property
+ def argv(self) -> List[str]:
+ """a list of arguments a la sys.argv.
- def __delattr__(self, name):
- """Statement instances should feel immutable; raise ValueError"""
- raise ValueError
+ Quotes, if any, are removed from the elements of the list, and aliases
+ and shortcuts are expanded
+ """
+ if self.command:
+ rtn = [utils.strip_quotes(self.command)]
+ for cur_token in self.arg_list:
+ rtn.append(utils.strip_quotes(cur_token))
+ else:
+ rtn = []
+
+ return rtn
class StatementParser:
"""Parse raw text into command components.
- Shortcuts is a list of tuples with each tuple containing the shortcut and the expansion.
+ Shortcuts is a list of tuples with each tuple containing the shortcut and
+ the expansion.
"""
def __init__(
self,
@@ -231,7 +273,7 @@ class StatementParser:
if match:
if word == match.group(1):
valid = True
- errmsg = None
+ errmsg = ''
return valid, errmsg
def tokenize(self, line: str) -> List[str]:
@@ -268,13 +310,13 @@ class StatementParser:
# handle the special case/hardcoded terminator of a blank line
# we have to do this before we tokenize because tokenizing
# destroys all unquoted whitespace in the input
- terminator = None
+ terminator = ''
if line[-1:] == constants.LINE_FEED:
terminator = constants.LINE_FEED
- command = None
- args = None
- argv = None
+ command = ''
+ args = ''
+ arg_list = []
# lex the input into a list of tokens
tokens = self.tokenize(line)
@@ -302,8 +344,8 @@ class StatementParser:
terminator_pos = len(tokens)+1
# everything before the first terminator is the command and the args
- argv = tokens[:terminator_pos]
- (command, args) = self._command_and_args(argv)
+ (command, args) = self._command_and_args(tokens[:terminator_pos])
+ arg_list = tokens[1:terminator_pos]
# we will set the suffix later
# remove all the tokens before and including the terminator
tokens = tokens[terminator_pos+1:]
@@ -315,7 +357,7 @@ class StatementParser:
# because redirectors can only be after a terminator
command = testcommand
args = testargs
- argv = tokens
+ arg_list = tokens[1:]
tokens = []
# check for a pipe to a shell process
@@ -336,11 +378,11 @@ class StatementParser:
tokens = tokens[:pipe_pos]
except ValueError:
# no pipe in the tokens
- pipe_to = None
+ pipe_to = []
# check for output redirect
- output = None
- output_to = None
+ output = ''
+ output_to = ''
try:
output_pos = tokens.index(constants.REDIRECTION_OUTPUT)
output = constants.REDIRECTION_OUTPUT
@@ -374,26 +416,23 @@ class StatementParser:
suffix = ' '.join(tokens)
else:
# no terminator, so whatever is left is the command and the args
- suffix = None
+ suffix = ''
if not command:
# command could already have been set, if so, don't set it again
- argv = tokens
- (command, args) = self._command_and_args(argv)
+ (command, args) = self._command_and_args(tokens)
+ arg_list = tokens[1:]
# set multiline
if command in self.multiline_commands:
multiline_command = command
else:
- multiline_command = None
+ multiline_command = ''
# build the statement
- # string representation of args must be an empty string instead of
- # None for compatibility with standard library cmd
- statement = Statement('' if args is None else args,
+ statement = Statement(args,
raw=line,
command=command,
- args=args,
- argv=list(map(lambda x: utils.strip_quotes(x), argv)),
+ arg_list=arg_list,
multiline_command=multiline_command,
terminator=terminator,
suffix=suffix,
@@ -413,53 +452,50 @@ class StatementParser:
This method is used by tab completion code and therefore must not
generate an exception if there are unclosed quotes.
- The Statement object returned by this method can at most contained
- values in the following attributes:
+ The `Statement` object returned by this method can at most contain values
+ in the following attributes:
+ - args
- raw
- command
- - args
+ - multiline_command
+
+ `Statement.args` includes all output redirection clauses and command
+ terminators.
Different from parse(), this method does not remove redundant whitespace
- within statement.args. It does however, ensure args does not have
- leading or trailing whitespace.
+ within args. However, it does ensure args has no leading or trailing
+ whitespace.
"""
# expand shortcuts and aliases
line = self._expand(rawinput)
- command = None
- args = None
+ command = ''
+ args = ''
match = self._command_pattern.search(line)
if match:
# we got a match, extract the command
command = match.group(1)
- # the match could be an empty string, if so, turn it into none
- if not command:
- command = None
- # the _command_pattern regex is designed to match the spaces
- # between command and args with a second match group. Using
- # the end of the second match group ensures that args has
- # no leading whitespace. The rstrip() makes sure there is
- # no trailing whitespace
- args = line[match.end(2):].rstrip()
- # if the command is none that means the input was either empty
- # or something wierd like '>'. args should be None if we couldn't
+
+ # take everything from the end of the first match group to
+ # the end of the line as the arguments (stripping leading
+ # and trailing spaces)
+ args = line[match.end(1):].strip()
+ # if the command is empty that means the input was either empty
+ # or something weird like '>'. args should be empty if we couldn't
# parse a command
if not command or not args:
- args = None
+ args = ''
# set multiline
if command in self.multiline_commands:
multiline_command = command
else:
- multiline_command = None
+ multiline_command = ''
# build the statement
- # string representation of args must be an empty string instead of
- # None for compatibility with standard library cmd
- statement = Statement('' if args is None else args,
+ statement = Statement(args,
raw=rawinput,
command=command,
- args=args,
multiline_command=multiline_command,
)
return statement
@@ -503,12 +539,9 @@ class StatementParser:
def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
"""Given a list of tokens, return a tuple of the command
and the args as a string.
-
- The args string will be '' instead of None to retain backwards compatibility
- with cmd in the standard library.
"""
- command = None
- args = None
+ command = ''
+ args = ''
if tokens:
command = tokens[0]
@@ -528,10 +561,11 @@ class StatementParser:
return matched_string
def _split_on_punctuation(self, tokens: List[str]) -> List[str]:
- """
- # Further splits tokens from a command line using punctuation characters
- # as word breaks when they are in unquoted strings. Each run of punctuation
- # characters is treated as a single token.
+ """Further splits tokens from a command line using punctuation characters
+
+ Punctuation characters are treated as word breaks when they are in
+ unquoted strings. Each run of punctuation characters is treated as a
+ single token.
:param tokens: the tokens as parsed by shlex
:return: the punctuated tokens