diff options
| author | Victor Stinner <victor.stinner@gmail.com> | 2013-04-14 18:45:39 +0200 | 
|---|---|---|
| committer | Victor Stinner <victor.stinner@gmail.com> | 2013-04-14 18:45:39 +0200 | 
| commit | 55c08781e8bcbba18e016060e47aabc754fddc79 (patch) | |
| tree | 1d80075bfaeb527ecf60a07c9a1cf8e73f9f6a07 /Objects/unicodeobject.c | |
| parent | af03757d20a13b4090d06e0a198122be194aa6b0 (diff) | |
| download | cpython-git-55c08781e8bcbba18e016060e47aabc754fddc79.tar.gz | |
Optimize repr(str): use _PyUnicode_FastCopyCharacters() when no character is escaped
Diffstat (limited to 'Objects/unicodeobject.c')
| -rw-r--r-- | Objects/unicodeobject.c | 147 | 
1 files changed, 78 insertions, 69 deletions
| diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0996afbbb8..907fa5ad6a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11968,7 +11968,7 @@ unicode_repr(PyObject *unicode)      Py_ssize_t isize;      Py_ssize_t osize, squote, dquote, i, o;      Py_UCS4 max, quote; -    int ikind, okind; +    int ikind, okind, unchanged;      void *idata, *odata;      if (PyUnicode_READY(unicode) == -1) @@ -11979,7 +11979,7 @@ unicode_repr(PyObject *unicode)      /* Compute length of output, quote characters, and         maximum character */ -    osize = 2; /* quotes */ +    osize = 0;      max = 127;      squote = dquote = 0;      ikind = PyUnicode_KIND(unicode); @@ -12010,7 +12010,9 @@ unicode_repr(PyObject *unicode)      }      quote = '\''; +    unchanged = (osize == isize);      if (squote) { +        unchanged = 0;          if (dquote)              /* Both squote and dquote present. Use squote,                 and escape them */ @@ -12018,6 +12020,7 @@ unicode_repr(PyObject *unicode)          else              quote = '"';      } +    osize += 2;   /* quotes */      repr = PyUnicode_New(osize, max);      if (repr == NULL) @@ -12027,82 +12030,88 @@ unicode_repr(PyObject *unicode)      PyUnicode_WRITE(okind, odata, 0, quote);      PyUnicode_WRITE(okind, odata, osize-1, quote); +    if (unchanged) { +        _PyUnicode_FastCopyCharacters(repr, 1, +                                      unicode, 0, +                                      isize); +    } +    else { +        for (i = 0, o = 1; i < isize; i++) { +            Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); -    for (i = 0, o = 1; i < isize; i++) { -        Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); - -        /* Escape quotes and backslashes */ -        if ((ch == quote) || (ch == '\\')) { -            PyUnicode_WRITE(okind, odata, o++, '\\'); -            PyUnicode_WRITE(okind, odata, o++, ch); -            continue; -        } +            /* Escape quotes and backslashes */ +            if ((ch == quote) || (ch == '\\')) { +                PyUnicode_WRITE(okind, odata, o++, '\\'); +                PyUnicode_WRITE(okind, odata, o++, ch); +                continue; +            } -        /* Map special whitespace to '\t', \n', '\r' */ -        if (ch == '\t') { -            PyUnicode_WRITE(okind, odata, o++, '\\'); -            PyUnicode_WRITE(okind, odata, o++, 't'); -        } -        else if (ch == '\n') { -            PyUnicode_WRITE(okind, odata, o++, '\\'); -            PyUnicode_WRITE(okind, odata, o++, 'n'); -        } -        else if (ch == '\r') { -            PyUnicode_WRITE(okind, odata, o++, '\\'); -            PyUnicode_WRITE(okind, odata, o++, 'r'); -        } +            /* Map special whitespace to '\t', \n', '\r' */ +            if (ch == '\t') { +                PyUnicode_WRITE(okind, odata, o++, '\\'); +                PyUnicode_WRITE(okind, odata, o++, 't'); +            } +            else if (ch == '\n') { +                PyUnicode_WRITE(okind, odata, o++, '\\'); +                PyUnicode_WRITE(okind, odata, o++, 'n'); +            } +            else if (ch == '\r') { +                PyUnicode_WRITE(okind, odata, o++, '\\'); +                PyUnicode_WRITE(okind, odata, o++, 'r'); +            } -        /* Map non-printable US ASCII to '\xhh' */ -        else if (ch < ' ' || ch == 0x7F) { -            PyUnicode_WRITE(okind, odata, o++, '\\'); -            PyUnicode_WRITE(okind, odata, o++, 'x'); -            PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); -            PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); -        } +            /* Map non-printable US ASCII to '\xhh' */ +            else if (ch < ' ' || ch == 0x7F) { +                PyUnicode_WRITE(okind, odata, o++, '\\'); +                PyUnicode_WRITE(okind, odata, o++, 'x'); +                PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); +                PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); +            } -        /* Copy ASCII characters as-is */ -        else if (ch < 0x7F) { -            PyUnicode_WRITE(okind, odata, o++, ch); -        } +            /* Copy ASCII characters as-is */ +            else if (ch < 0x7F) { +                PyUnicode_WRITE(okind, odata, o++, ch); +            } -        /* Non-ASCII characters */ -        else { -            /* Map Unicode whitespace and control characters -               (categories Z* and C* except ASCII space) -            */ -            if (!Py_UNICODE_ISPRINTABLE(ch)) { -                PyUnicode_WRITE(okind, odata, o++, '\\'); -                /* Map 8-bit characters to '\xhh' */ -                if (ch <= 0xff) { -                    PyUnicode_WRITE(okind, odata, o++, 'x'); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); -                } -                /* Map 16-bit characters to '\uxxxx' */ -                else if (ch <= 0xffff) { -                    PyUnicode_WRITE(okind, odata, o++, 'u'); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); +            /* Non-ASCII characters */ +            else { +                /* Map Unicode whitespace and control characters +                   (categories Z* and C* except ASCII space) +                */ +                if (!Py_UNICODE_ISPRINTABLE(ch)) { +                    PyUnicode_WRITE(okind, odata, o++, '\\'); +                    /* Map 8-bit characters to '\xhh' */ +                    if (ch <= 0xff) { +                        PyUnicode_WRITE(okind, odata, o++, 'x'); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); +                    } +                    /* Map 16-bit characters to '\uxxxx' */ +                    else if (ch <= 0xffff) { +                        PyUnicode_WRITE(okind, odata, o++, 'u'); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); +                    } +                    /* Map 21-bit characters to '\U00xxxxxx' */ +                    else { +                        PyUnicode_WRITE(okind, odata, o++, 'U'); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); +                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); +                    }                  } -                /* Map 21-bit characters to '\U00xxxxxx' */ +                /* Copy characters as-is */                  else { -                    PyUnicode_WRITE(okind, odata, o++, 'U'); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); -                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); +                    PyUnicode_WRITE(okind, odata, o++, ch);                  }              } -            /* Copy characters as-is */ -            else { -                PyUnicode_WRITE(okind, odata, o++, ch); -            }          }      }      /* Closing quote already added at the beginning */ | 
