diff options
author | CyberSaxosTiGER <cybersaxostiger@gmail.com> | 2020-05-18 21:41:35 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-18 19:41:35 +0100 |
commit | d71a6492dbd5434dfa6a0ad95e3ad98aa690887a (patch) | |
tree | 5d356a79c8e84a44ec2944b940b584625e9a6310 | |
parent | dc31800f86fbcd40ee616984820b885d8adaa6a7 (diff) | |
download | cpython-git-d71a6492dbd5434dfa6a0ad95e3ad98aa690887a.tar.gz |
bpo-38870: correctly escape unprintable characters on ast.unparse (GH-20166)
Unprintable characters such as `\x00` weren't correctly roundtripped
due to not using default string repr when generating docstrings. This
patch correctly encodes all unprintable characters (except `\n` and `\t`, which
are commonly used for formatting, and found unescaped).
Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
-rw-r--r-- | Lib/ast.py | 14 | ||||
-rw-r--r-- | Lib/test/test_unparse.py | 6 |
2 files changed, 16 insertions, 4 deletions
diff --git a/Lib/ast.py b/Lib/ast.py index 0d3b19d922..2edb7171e9 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1090,6 +1090,15 @@ class _Unparser(NodeVisitor): self.write(node.id) def _write_docstring(self, node): + def esc_char(c): + if c in ("\n", "\t"): + # In the AST form, we don't know the author's intentation + # about how this should be displayed. We'll only escape + # \n and \t, because they are more likely to be unescaped + # in the source + return c + return c.encode('unicode_escape').decode('ascii') + self.fill() if node.kind == "u": self.write("u") @@ -1097,11 +1106,10 @@ class _Unparser(NodeVisitor): value = node.value if value: # Preserve quotes in the docstring by escaping them - value = value.replace("\\", "\\\\") - value = value.replace('"""', '""\"') - value = value.replace("\r", "\\r") + value = "".join(map(esc_char, value)) if value[-1] == '"': value = value.replace('"', '\\"', -1) + value = value.replace('"""', '""\\"') self.write(f'"""{value}"""') diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 67dcb1dae7..6d828721b7 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -324,7 +324,11 @@ class UnparseTestCase(ASTTestCase): '\\t', '\n', '\\n', - '\r\\r\t\\t\n\\n' + '\r\\r\t\\t\n\\n', + '""">>> content = \"\"\"blabla\"\"\" <<<"""', + r'foo\n\x00', + '🐍⛎𩸽üéş^\X\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}' + ) for docstring in docstrings: # check as Module docstrings for easy testing |