diff options
Diffstat (limited to 'Cython/Compiler/StringEncoding.py')
-rw-r--r-- | Cython/Compiler/StringEncoding.py | 33 |
1 files changed, 31 insertions, 2 deletions
diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py index c37e8aab7..192fc3de3 100644 --- a/Cython/Compiler/StringEncoding.py +++ b/Cython/Compiler/StringEncoding.py @@ -138,6 +138,24 @@ class EncodedString(_unicode): def as_utf8_string(self): return bytes_literal(self.utf8encode(), 'utf8') + def as_c_string_literal(self): + # first encodes the string then produces a c string literal + if self.encoding is None: + s = self.as_utf8_string() + else: + s = bytes_literal(self.byteencode(), self.encoding) + return s.as_c_string_literal() + + if not hasattr(_unicode, "isascii"): + def isascii(self): + # not defined for Python3.7+ since the class already has it + try: + self.encode("ascii") + except UnicodeEncodeError: + return False + else: + return True + def string_contains_surrogates(ustring): """ @@ -211,6 +229,11 @@ class BytesLiteral(_bytes): value = split_string_literal(escape_byte_string(self)) return '"%s"' % value + if not hasattr(_bytes, "isascii"): + def isascii(self): + # already defined for Python3.7+ + return True + def bytes_literal(s, encoding): assert isinstance(s, bytes) @@ -226,6 +249,12 @@ def encoded_string(s, encoding): s.encoding = encoding return s +def encoded_string_or_bytes_literal(s, encoding): + if isinstance(s, bytes): + return bytes_literal(s, encoding) + else: + return encoded_string(s, encoding) + char_from_escape_sequence = { r'\a' : u'\a', @@ -291,7 +320,7 @@ def escape_byte_string(s): """ s = _replace_specials(s) try: - return s.decode("ASCII") # trial decoding: plain ASCII => done + return s.decode("ASCII") # trial decoding: plain ASCII => done except UnicodeDecodeError: pass if IS_PYTHON3: @@ -324,7 +353,7 @@ def split_string_literal(s, limit=2000): while start < len(s): end = start + limit if len(s) > end-4 and '\\' in s[end-4:end]: - end -= 4 - s[end-4:end].find('\\') # just before the backslash + end -= 4 - s[end-4:end].find('\\') # just before the backslash while s[end-1] == '\\': end -= 1 if end == start: |