summaryrefslogtreecommitdiff
path: root/Cython/Compiler/StringEncoding.py
diff options
context:
space:
mode:
Diffstat (limited to 'Cython/Compiler/StringEncoding.py')
-rw-r--r--Cython/Compiler/StringEncoding.py33
1 files changed, 31 insertions, 2 deletions
diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py
index c37e8aab7..192fc3de3 100644
--- a/Cython/Compiler/StringEncoding.py
+++ b/Cython/Compiler/StringEncoding.py
@@ -138,6 +138,24 @@ class EncodedString(_unicode):
def as_utf8_string(self):
return bytes_literal(self.utf8encode(), 'utf8')
+ def as_c_string_literal(self):
+ # first encodes the string then produces a c string literal
+ if self.encoding is None:
+ s = self.as_utf8_string()
+ else:
+ s = bytes_literal(self.byteencode(), self.encoding)
+ return s.as_c_string_literal()
+
+ if not hasattr(_unicode, "isascii"):
+ def isascii(self):
+ # not defined for Python3.7+ since the class already has it
+ try:
+ self.encode("ascii")
+ except UnicodeEncodeError:
+ return False
+ else:
+ return True
+
def string_contains_surrogates(ustring):
"""
@@ -211,6 +229,11 @@ class BytesLiteral(_bytes):
value = split_string_literal(escape_byte_string(self))
return '"%s"' % value
+ if not hasattr(_bytes, "isascii"):
+ def isascii(self):
+ # already defined for Python3.7+
+ return True
+
def bytes_literal(s, encoding):
assert isinstance(s, bytes)
@@ -226,6 +249,12 @@ def encoded_string(s, encoding):
s.encoding = encoding
return s
+def encoded_string_or_bytes_literal(s, encoding):
+ if isinstance(s, bytes):
+ return bytes_literal(s, encoding)
+ else:
+ return encoded_string(s, encoding)
+
char_from_escape_sequence = {
r'\a' : u'\a',
@@ -291,7 +320,7 @@ def escape_byte_string(s):
"""
s = _replace_specials(s)
try:
- return s.decode("ASCII") # trial decoding: plain ASCII => done
+ return s.decode("ASCII") # trial decoding: plain ASCII => done
except UnicodeDecodeError:
pass
if IS_PYTHON3:
@@ -324,7 +353,7 @@ def split_string_literal(s, limit=2000):
while start < len(s):
end = start + limit
if len(s) > end-4 and '\\' in s[end-4:end]:
- end -= 4 - s[end-4:end].find('\\') # just before the backslash
+ end -= 4 - s[end-4:end].find('\\') # just before the backslash
while s[end-1] == '\\':
end -= 1
if end == start: