summaryrefslogtreecommitdiff
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2015-10-01 21:54:51 +0200
committerVictor Stinner <victor.stinner@gmail.com>2015-10-01 21:54:51 +0200
commit01ada3996bded57d1baf9c54b050fc55907d9b13 (patch)
tree5f795a91278beb5ae831a75eef3f4b20c5c3a6f8 /Objects/unicodeobject.c
parent29a1445136c7353543b516a085c38b8be9ce5109 (diff)
downloadcpython-git-01ada3996bded57d1baf9c54b050fc55907d9b13.tar.gz
Issue #25267: The UTF-8 encoder is now up to 75 times as fast for error
handlers: ``ignore``, ``replace``, ``surrogateescape``, ``surrogatepass``. Patch co-written with Serhiy Storchaka.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c7
1 files changed, 5 insertions, 2 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 6657cd4310..df3a1b5069 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -297,6 +297,7 @@ typedef enum {
_Py_ERROR_UNKNOWN=0,
_Py_ERROR_STRICT,
_Py_ERROR_SURROGATEESCAPE,
+ _Py_ERROR_SURROGATEPASS,
_Py_ERROR_REPLACE,
_Py_ERROR_IGNORE,
_Py_ERROR_XMLCHARREFREPLACE,
@@ -312,6 +313,8 @@ get_error_handler(const char *errors)
return _Py_ERROR_STRICT;
if (strcmp(errors, "surrogateescape") == 0)
return _Py_ERROR_SURROGATEESCAPE;
+ if (strcmp(errors, "surrogatepass") == 0)
+ return _Py_ERROR_SURROGATEPASS;
if (strcmp(errors, "ignore") == 0)
return _Py_ERROR_IGNORE;
if (strcmp(errors, "replace") == 0)
@@ -6479,8 +6482,8 @@ unicode_encode_ucs1(PyObject *unicode,
goto onError;
case _Py_ERROR_REPLACE:
- while (collstart++ < collend)
- *str++ = '?';
+ memset(str, '?', collend - collstart);
+ str += (collend - collstart);
/* fall through ignore error handler */
case _Py_ERROR_IGNORE:
pos = collend;