summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHai Shi <shihai1992@gmail.com>2019-07-20 02:56:23 -0500
committerXiang Zhang <angwerzx@126.com>2019-07-20 15:56:23 +0800
commit5623ac87bbe5de481957eca5eeae06347612fbeb (patch)
tree396e4cbd51b5e0b59eb0cab1dbbbfb0d1dcd06eb
parenteed5e9a9562d4dcd137e9f0fc7157bc3373c98cc (diff)
downloadcpython-git-5623ac87bbe5de481957eca5eeae06347612fbeb.tar.gz
bpo-37476: Adding tests for asutf8 and asutf8andsize (GH-14531)
-rw-r--r--Lib/test/test_unicode.py28
-rw-r--r--Modules/_testcapimodule.c44
2 files changed, 72 insertions, 0 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 177d80d27e..8be16c8da9 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2819,6 +2819,34 @@ class CAPITest(unittest.TestCase):
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
+ # Test PyUnicode_AsUTF8()
+ @support.cpython_only
+ def test_asutf8(self):
+ from _testcapi import unicode_asutf8
+
+ bmp = '\u0100'
+ bmp2 = '\uffff'
+ nonbmp = chr(0x10ffff)
+
+ self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
+ self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
+ self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
+ self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')
+
+ # Test PyUnicode_AsUTF8AndSize()
+ @support.cpython_only
+ def test_asutf8andsize(self):
+ from _testcapi import unicode_asutf8andsize
+
+ bmp = '\u0100'
+ bmp2 = '\uffff'
+ nonbmp = chr(0x10ffff)
+
+ self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
+ self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
+ self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
+ self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
+
# Test PyUnicode_FindChar()
@support.cpython_only
def test_findchar(self):
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 8f34e93535..8a6e741d28 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1922,6 +1922,48 @@ unicode_asucs4(PyObject *self, PyObject *args)
}
static PyObject *
+unicode_asutf8(PyObject *self, PyObject *args)
+{
+ PyObject *unicode;
+ const char *buffer;
+
+ if (!PyArg_ParseTuple(args, "U", &unicode)) {
+ return NULL;
+ }
+
+ buffer = PyUnicode_AsUTF8(unicode);
+ if (buffer == NULL) {
+ return NULL;
+ }
+
+ return PyBytes_FromString(buffer);
+}
+
+static PyObject *
+unicode_asutf8andsize(PyObject *self, PyObject *args)
+{
+ PyObject *unicode, *result;
+ const char *buffer;
+ Py_ssize_t utf8_len;
+
+ if(!PyArg_ParseTuple(args, "U", &unicode)) {
+ return NULL;
+ }
+
+ buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len);
+ if (buffer == NULL) {
+ return NULL;
+ }
+
+ result = PyBytes_FromString(buffer);
+ if (result == NULL) {
+ return NULL;
+ }
+
+ return Py_BuildValue("(Nn)", result, utf8_len);
+}
+
+static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
PyObject *str;
@@ -5174,6 +5216,8 @@ static PyMethodDef TestMethods[] = {
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
+ {"unicode_asutf8", unicode_asutf8, METH_VARARGS},
+ {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},