diff options
Diffstat (limited to 'Objects/unicodeobject.c')
| -rw-r--r-- | Objects/unicodeobject.c | 1166 | 
1 files changed, 613 insertions, 553 deletions
| diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a5ae454b49..0b79c5bd1a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -49,9 +49,23 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  #endif  /*[clinic input] -class str "PyUnicodeObject *" "&PyUnicode_Type" +class str "PyObject *" "&PyUnicode_Type"  [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=604e916854800fa8]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4884c934de622cf6]*/ + +/*[python input] +class Py_UCS4_converter(CConverter): +    type = 'Py_UCS4' +    converter = 'convert_uc' + +    def converter_init(self): +        if self.default is not unspecified: +            self.c_default = ascii(self.default) +            if len(self.c_default) > 4 or self.c_default[0] != "'": +                self.c_default = hex(ord(self.default)) + +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=88f5dd06cd8e7a61]*/  /* --- Globals ------------------------------------------------------------ @@ -299,6 +313,8 @@ static const unsigned char ascii_linebreak[] = {      0, 0, 0, 0, 0, 0, 0, 0  }; +static int convert_uc(PyObject *obj, void *addr); +  #include "clinic/unicodeobject.c.h"  typedef enum { @@ -1029,8 +1045,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length)      if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) {          PyObject *copy; -        if (PyUnicode_READY(unicode) == -1) -            return NULL; +        assert(PyUnicode_IS_READY(unicode));          copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode));          if (copy == NULL) @@ -1974,14 +1989,11 @@ unicode_char(Py_UCS4 ch)      unicode = PyUnicode_New(1, ch);      if (unicode == NULL)          return NULL; -    switch (PyUnicode_KIND(unicode)) { -    case PyUnicode_1BYTE_KIND: -        PyUnicode_1BYTE_DATA(unicode)[0] = (Py_UCS1)ch; -        break; -    case PyUnicode_2BYTE_KIND: + +    assert(PyUnicode_KIND(unicode) != PyUnicode_1BYTE_KIND); +    if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {          PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch; -        break; -    default: +    } else {          assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);          PyUnicode_4BYTE_DATA(unicode)[0] = ch;      } @@ -1992,12 +2004,32 @@ unicode_char(Py_UCS4 ch)  PyObject *  PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)  { +    if (u == NULL) +        return (PyObject*)_PyUnicode_New(size); + +    if (size < 0) { +        PyErr_BadInternalCall(); +        return NULL; +    } + +    return PyUnicode_FromWideChar(u, size); +} + +PyObject * +PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size) +{      PyObject *unicode;      Py_UCS4 maxchar = 0;      Py_ssize_t num_surrogates; -    if (u == NULL) -        return (PyObject*)_PyUnicode_New(size); +    if (u == NULL && size != 0) { +        PyErr_BadInternalCall(); +        return NULL; +    } + +    if (size == -1) { +        size = wcslen(u); +    }      /* If the Unicode data is known at construction time, we can apply         some optimizations which share commonly used objects. */ @@ -2482,27 +2514,6 @@ PyUnicode_AsUCS4Copy(PyObject *string)      return as_ucs4(string, NULL, 0, 1);  } -#ifdef HAVE_WCHAR_H - -PyObject * -PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size) -{ -    if (w == NULL) { -        if (size == 0) -            _Py_RETURN_UNICODE_EMPTY(); -        PyErr_BadInternalCall(); -        return NULL; -    } - -    if (size == -1) { -        size = wcslen(w); -    } - -    return PyUnicode_FromUnicode(w, size); -} - -#endif /* HAVE_WCHAR_H */ -  /* maximum number of characters required for output of %lld or %p.     We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,     plus 1 for the sign.  53/22 is an upper bound for log10(256). */ @@ -3300,7 +3311,7 @@ PyUnicode_Encode(const Py_UNICODE *s,  {      PyObject *v, *unicode; -    unicode = PyUnicode_FromUnicode(s, size); +    unicode = PyUnicode_FromWideChar(s, size);      if (unicode == NULL)          return NULL;      v = PyUnicode_AsEncodedString(unicode, encoding, errors); @@ -3412,11 +3423,9 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)  {      Py_ssize_t wlen, wlen2;      wchar_t *wstr; -    PyObject *bytes = NULL;      char *errmsg; -    PyObject *reason = NULL; -    PyObject *exc; -    size_t error_pos; +    PyObject *bytes, *reason, *exc; +    size_t error_pos, errlen;      int surrogateescape;      if (locale_error_handler(errors, &surrogateescape) < 0) @@ -3471,6 +3480,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)          len2 = wcstombs(PyBytes_AS_STRING(bytes), wstr, len+1);          if (len2 == (size_t)-1 || len2 > len) { +            Py_DECREF(bytes);              error_pos = (size_t)-1;              goto encode_error;          } @@ -3486,17 +3496,15 @@ encode_error:          error_pos = wcstombs_errorpos(wstr);      PyMem_Free(wstr); -    Py_XDECREF(bytes); - -    if (errmsg != NULL) { -        size_t errlen; -        wstr = Py_DecodeLocale(errmsg, &errlen); -        if (wstr != NULL) { -            reason = PyUnicode_FromWideChar(wstr, errlen); -            PyMem_RawFree(wstr); -        } else -            errmsg = NULL; + +    wstr = Py_DecodeLocale(errmsg, &errlen); +    if (wstr != NULL) { +        reason = PyUnicode_FromWideChar(wstr, errlen); +        PyMem_RawFree(wstr); +    } else { +        errmsg = NULL;      } +      if (errmsg == NULL)          reason = PyUnicode_FromString(              "wcstombs() encountered an unencodable " @@ -3512,7 +3520,7 @@ encode_error:      Py_DECREF(reason);      if (exc != NULL) {          PyCodec_StrictErrors(exc); -        Py_XDECREF(exc); +        Py_DECREF(exc);      }      return NULL;  } @@ -3719,10 +3727,9 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,      size_t wlen, wlen2;      PyObject *unicode;      int surrogateescape; -    size_t error_pos; +    size_t error_pos, errlen;      char *errmsg; -    PyObject *reason = NULL;   /* initialize to prevent gcc warning */ -    PyObject *exc; +    PyObject *exc, *reason = NULL;   /* initialize to prevent gcc warning */      if (locale_error_handler(errors, &surrogateescape) < 0)          return NULL; @@ -3780,19 +3787,16 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,      return unicode;  decode_error: -    reason = NULL;      errmsg = strerror(errno);      assert(errmsg != NULL);      error_pos = mbstowcs_errorpos(str, len); -    if (errmsg != NULL) { -        size_t errlen; -        wstr = Py_DecodeLocale(errmsg, &errlen); -        if (wstr != NULL) { -            reason = PyUnicode_FromWideChar(wstr, errlen); -            PyMem_RawFree(wstr); -        } +    wstr = Py_DecodeLocale(errmsg, &errlen); +    if (wstr != NULL) { +        reason = PyUnicode_FromWideChar(wstr, errlen); +        PyMem_RawFree(wstr);      } +      if (reason == NULL)          reason = PyUnicode_FromString(              "mbstowcs() encountered an invalid multibyte sequence"); @@ -3807,7 +3811,7 @@ decode_error:      Py_DECREF(reason);      if (exc != NULL) {          PyCodec_StrictErrors(exc); -        Py_XDECREF(exc); +        Py_DECREF(exc);      }      return NULL;  } @@ -3968,7 +3972,7 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)  } -char* +const char *  PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)  {      PyObject *bytes; @@ -4003,7 +4007,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)      return PyUnicode_UTF8(unicode);  } -char* +const char *  PyUnicode_AsUTF8(PyObject *unicode)  {      return PyUnicode_AsUTF8AndSize(unicode, NULL); @@ -4140,7 +4144,11 @@ PyUnicode_GetSize(PyObject *unicode)          PyErr_BadArgument();          goto onError;      } -    return PyUnicode_GET_SIZE(unicode); +    if (_PyUnicode_WSTR(unicode) == NULL) { +        if (PyUnicode_AsUnicode(unicode) == NULL) +            goto onError; +    } +    return PyUnicode_WSTR_LENGTH(unicode);    onError:      return -1; @@ -4248,7 +4256,7 @@ unicode_decode_call_errorhandler_wchar(      Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,      PyObject **output, Py_ssize_t *outpos)  { -    static const char *argparse = "O!n;decoding error handler must return (str, int) tuple"; +    static const char *argparse = "Un;decoding error handler must return (str, int) tuple";      PyObject *restuple = NULL;      PyObject *repunicode = NULL; @@ -4281,10 +4289,10 @@ unicode_decode_call_errorhandler_wchar(      if (restuple == NULL)          goto onError;      if (!PyTuple_Check(restuple)) { -        PyErr_SetString(PyExc_TypeError, &argparse[4]); +        PyErr_SetString(PyExc_TypeError, &argparse[3]);          goto onError;      } -    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) +    if (!PyArg_ParseTuple(restuple, argparse, &repunicode, &newpos))          goto onError;      /* Copy back the bytes variables, which might have been modified by the @@ -4292,9 +4300,6 @@ unicode_decode_call_errorhandler_wchar(      inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);      if (!inputobj)          goto onError; -    if (!PyBytes_Check(inputobj)) { -        PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); -    }      *input = PyBytes_AS_STRING(inputobj);      insize = PyBytes_GET_SIZE(inputobj);      *inend = *input + insize; @@ -4335,7 +4340,7 @@ unicode_decode_call_errorhandler_wchar(      *inptr = *input + newpos;      /* we made it! */ -    Py_XDECREF(restuple); +    Py_DECREF(restuple);      return 0;    overflow: @@ -4356,7 +4361,7 @@ unicode_decode_call_errorhandler_writer(      Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,      _PyUnicodeWriter *writer /* PyObject **output, Py_ssize_t *outpos */)  { -    static const char *argparse = "O!n;decoding error handler must return (str, int) tuple"; +    static const char *argparse = "Un;decoding error handler must return (str, int) tuple";      PyObject *restuple = NULL;      PyObject *repunicode = NULL; @@ -4383,10 +4388,10 @@ unicode_decode_call_errorhandler_writer(      if (restuple == NULL)          goto onError;      if (!PyTuple_Check(restuple)) { -        PyErr_SetString(PyExc_TypeError, &argparse[4]); +        PyErr_SetString(PyExc_TypeError, &argparse[3]);          goto onError;      } -    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) +    if (!PyArg_ParseTuple(restuple, argparse, &repunicode, &newpos))          goto onError;      /* Copy back the bytes variables, which might have been modified by the @@ -4394,9 +4399,6 @@ unicode_decode_call_errorhandler_writer(      inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);      if (!inputobj)          goto onError; -    if (!PyBytes_Check(inputobj)) { -        PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); -    }      *input = PyBytes_AS_STRING(inputobj);      insize = PyBytes_GET_SIZE(inputobj);      *inend = *input + insize; @@ -4411,8 +4413,6 @@ unicode_decode_call_errorhandler_writer(          goto onError;      } -    if (PyUnicode_READY(repunicode) < 0) -        goto onError;      replen = PyUnicode_GET_LENGTH(repunicode);      if (replen > 1) {          writer->min_length += replen - 1; @@ -4428,7 +4428,7 @@ unicode_decode_call_errorhandler_writer(      *inptr = *input + newpos;      /* we made it! */ -    Py_XDECREF(restuple); +    Py_DECREF(restuple);      return 0;    onError: @@ -4834,7 +4834,7 @@ PyUnicode_EncodeUTF7(const Py_UNICODE *s,                       const char *errors)  {      PyObject *result; -    PyObject *tmp = PyUnicode_FromUnicode(s, size); +    PyObject *tmp = PyUnicode_FromWideChar(s, size);      if (tmp == NULL)          return NULL;      result = _PyUnicode_EncodeUTF7(tmp, base64SetO, @@ -5190,7 +5190,7 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,  {      PyObject *v, *unicode; -    unicode = PyUnicode_FromUnicode(s, size); +    unicode = PyUnicode_FromWideChar(s, size);      if (unicode == NULL)          return NULL;      v = _PyUnicode_AsUTF8String(unicode, errors); @@ -5515,7 +5515,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,                        int byteorder)  {      PyObject *result; -    PyObject *tmp = PyUnicode_FromUnicode(s, size); +    PyObject *tmp = PyUnicode_FromWideChar(s, size);      if (tmp == NULL)          return NULL;      result = _PyUnicode_EncodeUTF32(tmp, errors, byteorder); @@ -5868,7 +5868,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,                        int byteorder)  {      PyObject *result; -    PyObject *tmp = PyUnicode_FromUnicode(s, size); +    PyObject *tmp = PyUnicode_FromWideChar(s, size);      if (tmp == NULL)          return NULL;      result = _PyUnicode_EncodeUTF16(tmp, errors, byteorder); @@ -6259,7 +6259,7 @@ PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,                                Py_ssize_t size)  {      PyObject *result; -    PyObject *tmp = PyUnicode_FromUnicode(s, size); +    PyObject *tmp = PyUnicode_FromWideChar(s, size);      if (tmp == NULL) {          return NULL;      } @@ -6476,7 +6476,7 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,                                   Py_ssize_t size)  {      PyObject *result; -    PyObject *tmp = PyUnicode_FromUnicode(s, size); +    PyObject *tmp = PyUnicode_FromWideChar(s, size);      if (tmp == NULL)          return NULL;      result = PyUnicode_AsRawUnicodeEscapeString(tmp); @@ -6814,7 +6814,7 @@ unicode_encode_ucs1(PyObject *unicode,                      goto onError;                  /* subtract preallocated bytes */ -                writer.min_size -= 1; +                writer.min_size -= newpos - collstart;                  if (PyBytes_Check(rep)) {                      /* Directly copy bytes result to output. */ @@ -6830,33 +6830,19 @@ unicode_encode_ucs1(PyObject *unicode,                      if (PyUnicode_READY(rep) < 0)                          goto onError; -                    if (PyUnicode_IS_ASCII(rep)) { -                        /* Fast path: all characters are smaller than limit */ -                        assert(limit >= 128); -                        assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); -                        str = _PyBytesWriter_WriteBytes(&writer, str, -                                                        PyUnicode_DATA(rep), -                                                        PyUnicode_GET_LENGTH(rep)); -                    } -                    else { -                        Py_ssize_t repsize = PyUnicode_GET_LENGTH(rep); - -                        str = _PyBytesWriter_Prepare(&writer, str, repsize); -                        if (str == NULL) -                            goto onError; - -                        /* check if there is anything unencodable in the -                           replacement and copy it to the output */ -                        for (i = 0; repsize-->0; ++i, ++str) { -                            ch = PyUnicode_READ_CHAR(rep, i); -                            if (ch >= limit) { -                                raise_encode_exception(&exc, encoding, unicode, -                                                       pos, pos+1, reason); -                                goto onError; -                            } -                            *str = (char)ch; -                        } +                    if (limit == 256 ? +                        PyUnicode_KIND(rep) != PyUnicode_1BYTE_KIND : +                        !PyUnicode_IS_ASCII(rep)) +                    { +                        /* Not all characters are smaller than limit */ +                        raise_encode_exception(&exc, encoding, unicode, +                                               collstart, collend, reason); +                        goto onError;                      } +                    assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); +                    str = _PyBytesWriter_WriteBytes(&writer, str, +                                                    PyUnicode_DATA(rep), +                                                    PyUnicode_GET_LENGTH(rep));                  }                  pos = newpos;                  Py_CLEAR(rep); @@ -6887,7 +6873,7 @@ PyUnicode_EncodeLatin1(const Py_UNICODE *p,                         const char *errors)  {      PyObject *result; -    PyObject *unicode = PyUnicode_FromUnicode(p, size); +    PyObject *unicode = PyUnicode_FromWideChar(p, size);      if (unicode == NULL)          return NULL;      result = unicode_encode_ucs1(unicode, errors, 256); @@ -7028,7 +7014,7 @@ PyUnicode_EncodeASCII(const Py_UNICODE *p,                        const char *errors)  {      PyObject *result; -    PyObject *unicode = PyUnicode_FromUnicode(p, size); +    PyObject *unicode = PyUnicode_FromWideChar(p, size);      if (unicode == NULL)          return NULL;      result = unicode_encode_ucs1(unicode, errors, 128); @@ -7754,7 +7740,7 @@ PyUnicode_EncodeMBCS(const Py_UNICODE *p,                       const char *errors)  {      PyObject *unicode, *res; -    unicode = PyUnicode_FromUnicode(p, size); +    unicode = PyUnicode_FromWideChar(p, size);      if (unicode == NULL)          return NULL;      res = encode_code_page(CP_ACP, unicode, errors); @@ -8268,9 +8254,7 @@ charmapencode_lookup(Py_UCS4 c, PyObject *mapping)          if (PyErr_ExceptionMatches(PyExc_LookupError)) {              /* No mapping found means: mapping is undefined. */              PyErr_Clear(); -            x = Py_None; -            Py_INCREF(x); -            return x; +            Py_RETURN_NONE;          } else              return NULL;      } @@ -8602,7 +8586,7 @@ PyUnicode_EncodeCharmap(const Py_UNICODE *p,                          const char *errors)  {      PyObject *result; -    PyObject *unicode = PyUnicode_FromUnicode(p, size); +    PyObject *unicode = PyUnicode_FromWideChar(p, size);      if (unicode == NULL)          return NULL;      result = _PyUnicode_EncodeCharmap(unicode, mapping, errors); @@ -8657,7 +8641,7 @@ unicode_translate_call_errorhandler(const char *errors,                                      Py_ssize_t startpos, Py_ssize_t endpos,                                      Py_ssize_t *newpos)  { -    static const char *argparse = "O!n;translating error handler must return (str, int) tuple"; +    static const char *argparse = "Un;translating error handler must return (str, int) tuple";      Py_ssize_t i_newpos;      PyObject *restuple; @@ -8679,11 +8663,11 @@ unicode_translate_call_errorhandler(const char *errors,      if (restuple == NULL)          return NULL;      if (!PyTuple_Check(restuple)) { -        PyErr_SetString(PyExc_TypeError, &argparse[4]); +        PyErr_SetString(PyExc_TypeError, &argparse[3]);          Py_DECREF(restuple);          return NULL;      } -    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, +    if (!PyArg_ParseTuple(restuple, argparse,                            &resunicode, &i_newpos)) {          Py_DECREF(restuple);          return NULL; @@ -9042,7 +9026,7 @@ PyUnicode_TranslateCharmap(const Py_UNICODE *p,                             const char *errors)  {      PyObject *result; -    PyObject *unicode = PyUnicode_FromUnicode(p, size); +    PyObject *unicode = PyUnicode_FromWideChar(p, size);      if (!unicode)          return NULL;      result = _PyUnicode_TranslateCharmap(unicode, mapping, errors); @@ -9170,14 +9154,10 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,          return -1;      } -    unicode = PyUnicode_FromUnicode(s, length); +    unicode = PyUnicode_FromWideChar(s, length);      if (unicode == NULL)          return -1; -    if (PyUnicode_READY(unicode) == -1) { -        Py_DECREF(unicode); -        return -1; -    }      kind = PyUnicode_KIND(unicode);      data = PyUnicode_DATA(unicode); @@ -9495,16 +9475,12 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,                     int direction)  {      int kind; -    Py_ssize_t result; +    Py_ssize_t len, result;      if (PyUnicode_READY(str) == -1)          return -2; -    if (start < 0 || end < 0) { -        PyErr_SetString(PyExc_IndexError, "string index out of range"); -        return -2; -    } -    if (end > PyUnicode_GET_LENGTH(str)) -        end = PyUnicode_GET_LENGTH(str); -    if (start >= end) +    len = PyUnicode_GET_LENGTH(str); +    ADJUST_INDICES(start, end, len); +    if (end - start < 1)          return -1;      kind = PyUnicode_KIND(str);      result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start, @@ -10748,28 +10724,36 @@ replace(PyObject *self, PyObject *str1,  /* --- Unicode Object Methods --------------------------------------------- */ -PyDoc_STRVAR(title__doc__, -             "S.title() -> str\n\ -\n\ -Return a titlecased version of S, i.e. words start with title case\n\ -characters, all remaining cased characters have lower case."); +/*[clinic input] +str.title as unicode_title -static PyObject* -unicode_title(PyObject *self) +Return a version of the string where each word is titlecased. + +More specifically, words start with uppercased characters and all remaining +cased characters have lower case. +[clinic start generated code]*/ + +static PyObject * +unicode_title_impl(PyObject *self) +/*[clinic end generated code: output=c75ae03809574902 input=fa945d669b26e683]*/  {      if (PyUnicode_READY(self) == -1)          return NULL;      return case_operation(self, do_title);  } -PyDoc_STRVAR(capitalize__doc__, -             "S.capitalize() -> str\n\ -\n\ -Return a capitalized version of S, i.e. make the first character\n\ -have upper case and the rest lower case."); +/*[clinic input] +str.capitalize as unicode_capitalize -static PyObject* -unicode_capitalize(PyObject *self) +Return a capitalized version of the string. + +More specifically, make the first character have upper case and the rest lower +case. +[clinic start generated code]*/ + +static PyObject * +unicode_capitalize_impl(PyObject *self) +/*[clinic end generated code: output=e49a4c333cdb7667 input=f4cbf1016938da6d]*/  {      if (PyUnicode_READY(self) == -1)          return NULL; @@ -10778,13 +10762,15 @@ unicode_capitalize(PyObject *self)      return case_operation(self, do_capitalize);  } -PyDoc_STRVAR(casefold__doc__, -             "S.casefold() -> str\n\ -\n\ -Return a version of S suitable for caseless comparisons."); +/*[clinic input] +str.casefold as unicode_casefold + +Return a version of the string suitable for caseless comparisons. +[clinic start generated code]*/  static PyObject * -unicode_casefold(PyObject *self) +unicode_casefold_impl(PyObject *self) +/*[clinic end generated code: output=0120daf657ca40af input=384d66cc2ae30daf]*/  {      if (PyUnicode_READY(self) == -1)          return NULL; @@ -10818,21 +10804,23 @@ convert_uc(PyObject *obj, void *addr)      return 1;  } -PyDoc_STRVAR(center__doc__, -             "S.center(width[, fillchar]) -> str\n\ -\n\ -Return S centered in a string of length width. Padding is\n\ -done using the specified fill character (default is a space)"); +/*[clinic input] +str.center as unicode_center + +    width: Py_ssize_t +    fillchar: Py_UCS4 = ' ' +    / + +Return a centered string of length width. + +Padding is done using the specified fill character (default is a space). +[clinic start generated code]*/  static PyObject * -unicode_center(PyObject *self, PyObject *args) +unicode_center_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar) +/*[clinic end generated code: output=420c8859effc7c0c input=b42b247eb26e6519]*/  {      Py_ssize_t marg, left; -    Py_ssize_t width; -    Py_UCS4 fillchar = ' '; - -    if (!PyArg_ParseTuple(args, "n|O&:center", &width, convert_uc, &fillchar)) -        return NULL;      if (PyUnicode_READY(self) == -1)          return NULL; @@ -11503,51 +11491,49 @@ unicode_count(PyObject *self, PyObject *args)      return result;  } -PyDoc_STRVAR(encode__doc__, -             "S.encode(encoding='utf-8', errors='strict') -> bytes\n\ -\n\ -Encode S using the codec registered for encoding. Default encoding\n\ -is 'utf-8'. errors may be given to set a different error\n\ -handling scheme. Default is 'strict' meaning that encoding errors raise\n\ -a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ -'xmlcharrefreplace' as well as any other name registered with\n\ -codecs.register_error that can handle UnicodeEncodeErrors."); +/*[clinic input] +str.encode as unicode_encode + +    encoding: str(c_default="NULL") = 'utf-8' +        The encoding in which to encode the string. +    errors: str(c_default="NULL") = 'strict' +        The error handling scheme to use for encoding errors. +        The default is 'strict' meaning that encoding errors raise a +        UnicodeEncodeError.  Other possible values are 'ignore', 'replace' and +        'xmlcharrefreplace' as well as any other name registered with +        codecs.register_error that can handle UnicodeEncodeErrors. + +Encode the string using the codec registered for encoding. +[clinic start generated code]*/  static PyObject * -unicode_encode(PyObject *self, PyObject *args, PyObject *kwargs) +unicode_encode_impl(PyObject *self, const char *encoding, const char *errors) +/*[clinic end generated code: output=bf78b6e2a9470e3c input=f0a9eb293d08fe02]*/  { -    static char *kwlist[] = {"encoding", "errors", 0}; -    char *encoding = NULL; -    char *errors = NULL; - -    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", -                                     kwlist, &encoding, &errors)) -        return NULL;      return PyUnicode_AsEncodedString(self, encoding, errors);  } -PyDoc_STRVAR(expandtabs__doc__, -             "S.expandtabs(tabsize=8) -> str\n\ -\n\ -Return a copy of S where all tab characters are expanded using spaces.\n\ -If tabsize is not given, a tab size of 8 characters is assumed."); +/*[clinic input] +str.expandtabs as unicode_expandtabs -static PyObject* -unicode_expandtabs(PyObject *self, PyObject *args, PyObject *kwds) +    tabsize: int = 8 + +Return a copy where all tab characters are expanded using spaces. + +If tabsize is not given, a tab size of 8 characters is assumed. +[clinic start generated code]*/ + +static PyObject * +unicode_expandtabs_impl(PyObject *self, int tabsize) +/*[clinic end generated code: output=3457c5dcee26928f input=8a01914034af4c85]*/  {      Py_ssize_t i, j, line_pos, src_len, incr;      Py_UCS4 ch;      PyObject *u;      void *src_data, *dest_data; -    static char *kwlist[] = {"tabsize", 0}; -    int tabsize = 8;      int kind;      int found; -    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs", -                                     kwlist, &tabsize)) -        return NULL; -      if (PyUnicode_READY(self) == -1)          return NULL; @@ -11731,14 +11717,18 @@ unicode_index(PyObject *self, PyObject *args)      return PyLong_FromSsize_t(result);  } -PyDoc_STRVAR(islower__doc__, -             "S.islower() -> bool\n\ -\n\ -Return True if all cased characters in S are lowercase and there is\n\ -at least one cased character in S, False otherwise."); +/*[clinic input] +str.islower as unicode_islower -static PyObject* -unicode_islower(PyObject *self) +Return True if the string is a lowercase string, False otherwise. + +A string is lowercase if all cased characters in the string are lowercase and +there is at least one cased character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_islower_impl(PyObject *self) +/*[clinic end generated code: output=dbd41995bd005b81 input=acec65ac6821ae47]*/  {      Py_ssize_t i, length;      int kind; @@ -11772,14 +11762,18 @@ unicode_islower(PyObject *self)      return PyBool_FromLong(cased);  } -PyDoc_STRVAR(isupper__doc__, -             "S.isupper() -> bool\n\ -\n\ -Return True if all cased characters in S are uppercase and there is\n\ -at least one cased character in S, False otherwise."); +/*[clinic input] +str.isupper as unicode_isupper -static PyObject* -unicode_isupper(PyObject *self) +Return True if the string is an uppercase string, False otherwise. + +A string is uppercase if all cased characters in the string are uppercase and +there is at least one cased character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isupper_impl(PyObject *self) +/*[clinic end generated code: output=049209c8e7f15f59 input=e9b1feda5d17f2d3]*/  {      Py_ssize_t i, length;      int kind; @@ -11813,16 +11807,18 @@ unicode_isupper(PyObject *self)      return PyBool_FromLong(cased);  } -PyDoc_STRVAR(istitle__doc__, -             "S.istitle() -> bool\n\ -\n\ -Return True if S is a titlecased string and there is at least one\n\ -character in S, i.e. upper- and titlecase characters may only\n\ -follow uncased characters and lowercase characters only cased ones.\n\ -Return False otherwise."); +/*[clinic input] +str.istitle as unicode_istitle -static PyObject* -unicode_istitle(PyObject *self) +Return True if the string is a title-cased string, False otherwise. + +In a title-cased string, upper- and title-case characters may only +follow uncased characters and lowercase characters only cased ones. +[clinic start generated code]*/ + +static PyObject * +unicode_istitle_impl(PyObject *self) +/*[clinic end generated code: output=e9bf6eb91f5d3f0e input=98d32bd2e1f06f8c]*/  {      Py_ssize_t i, length;      int kind; @@ -11869,14 +11865,18 @@ unicode_istitle(PyObject *self)      return PyBool_FromLong(cased);  } -PyDoc_STRVAR(isspace__doc__, -             "S.isspace() -> bool\n\ -\n\ -Return True if all characters in S are whitespace\n\ -and there is at least one character in S, False otherwise."); +/*[clinic input] +str.isspace as unicode_isspace -static PyObject* -unicode_isspace(PyObject *self) +Return True if the string is a whitespace string, False otherwise. + +A string is whitespace if all characters in the string are whitespace and there +is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isspace_impl(PyObject *self) +/*[clinic end generated code: output=163a63bfa08ac2b9 input=fe462cb74f8437d8]*/  {      Py_ssize_t i, length;      int kind; @@ -11905,14 +11905,18 @@ unicode_isspace(PyObject *self)      return PyBool_FromLong(1);  } -PyDoc_STRVAR(isalpha__doc__, -             "S.isalpha() -> bool\n\ -\n\ -Return True if all characters in S are alphabetic\n\ -and there is at least one character in S, False otherwise."); +/*[clinic input] +str.isalpha as unicode_isalpha -static PyObject* -unicode_isalpha(PyObject *self) +Return True if the string is an alphabetic string, False otherwise. + +A string is alphabetic if all characters in the string are alphabetic and there +is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isalpha_impl(PyObject *self) +/*[clinic end generated code: output=cc81b9ac3883ec4f input=d0fd18a96cbca5eb]*/  {      Py_ssize_t i, length;      int kind; @@ -11940,14 +11944,18 @@ unicode_isalpha(PyObject *self)      return PyBool_FromLong(1);  } -PyDoc_STRVAR(isalnum__doc__, -             "S.isalnum() -> bool\n\ -\n\ -Return True if all characters in S are alphanumeric\n\ -and there is at least one character in S, False otherwise."); +/*[clinic input] +str.isalnum as unicode_isalnum -static PyObject* -unicode_isalnum(PyObject *self) +Return True if the string is an alpha-numeric string, False otherwise. + +A string is alpha-numeric if all characters in the string are alpha-numeric and +there is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isalnum_impl(PyObject *self) +/*[clinic end generated code: output=a5a23490ffc3660c input=5c6579bf2e04758c]*/  {      int kind;      void *data; @@ -11978,14 +11986,18 @@ unicode_isalnum(PyObject *self)      return PyBool_FromLong(1);  } -PyDoc_STRVAR(isdecimal__doc__, -             "S.isdecimal() -> bool\n\ -\n\ -Return True if there are only decimal characters in S,\n\ -False otherwise."); +/*[clinic input] +str.isdecimal as unicode_isdecimal -static PyObject* -unicode_isdecimal(PyObject *self) +Return True if the string is a decimal string, False otherwise. + +A string is a decimal string if all characters in the string are decimal and +there is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isdecimal_impl(PyObject *self) +/*[clinic end generated code: output=fb2dcdb62d3fc548 input=336bc97ab4c8268f]*/  {      Py_ssize_t i, length;      int kind; @@ -12013,14 +12025,18 @@ unicode_isdecimal(PyObject *self)      return PyBool_FromLong(1);  } -PyDoc_STRVAR(isdigit__doc__, -             "S.isdigit() -> bool\n\ -\n\ -Return True if all characters in S are digits\n\ -and there is at least one character in S, False otherwise."); +/*[clinic input] +str.isdigit as unicode_isdigit -static PyObject* -unicode_isdigit(PyObject *self) +Return True if the string is a digit string, False otherwise. + +A string is a digit string if all characters in the string are digits and there +is at least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isdigit_impl(PyObject *self) +/*[clinic end generated code: output=10a6985311da6858 input=901116c31deeea4c]*/  {      Py_ssize_t i, length;      int kind; @@ -12049,14 +12065,18 @@ unicode_isdigit(PyObject *self)      return PyBool_FromLong(1);  } -PyDoc_STRVAR(isnumeric__doc__, -             "S.isnumeric() -> bool\n\ -\n\ -Return True if there are only numeric characters in S,\n\ -False otherwise."); +/*[clinic input] +str.isnumeric as unicode_isnumeric -static PyObject* -unicode_isnumeric(PyObject *self) +Return True if the string is a numeric string, False otherwise. + +A string is numeric if all characters in the string are numeric and there is at +least one character in the string. +[clinic start generated code]*/ + +static PyObject * +unicode_isnumeric_impl(PyObject *self) +/*[clinic end generated code: output=9172a32d9013051a input=722507db976f826c]*/  {      Py_ssize_t i, length;      int kind; @@ -12121,29 +12141,34 @@ PyUnicode_IsIdentifier(PyObject *self)      return 1;  } -PyDoc_STRVAR(isidentifier__doc__, -             "S.isidentifier() -> bool\n\ -\n\ -Return True if S is a valid identifier according\n\ -to the language definition.\n\ -\n\ -Use keyword.iskeyword() to test for reserved identifiers\n\ -such as \"def\" and \"class\".\n"); +/*[clinic input] +str.isidentifier as unicode_isidentifier -static PyObject* -unicode_isidentifier(PyObject *self) +Return True if the string is a valid Python identifier, False otherwise. + +Use keyword.iskeyword() to test for reserved identifiers such as "def" and +"class". +[clinic start generated code]*/ + +static PyObject * +unicode_isidentifier_impl(PyObject *self) +/*[clinic end generated code: output=fe585a9666572905 input=916b0a3c9f57e919]*/  {      return PyBool_FromLong(PyUnicode_IsIdentifier(self));  } -PyDoc_STRVAR(isprintable__doc__, -             "S.isprintable() -> bool\n\ -\n\ -Return True if all characters in S are considered\n\ -printable in repr() or S is empty, False otherwise."); +/*[clinic input] +str.isprintable as unicode_isprintable -static PyObject* -unicode_isprintable(PyObject *self) +Return True if the string is printable, False otherwise. + +A string is printable if all of its characters are considered printable in +repr() or if it is empty. +[clinic start generated code]*/ + +static PyObject * +unicode_isprintable_impl(PyObject *self) +/*[clinic end generated code: output=3ab9626cd32dd1a0 input=98a0e1c2c1813209]*/  {      Py_ssize_t i, length;      int kind; @@ -12168,16 +12193,25 @@ unicode_isprintable(PyObject *self)      Py_RETURN_TRUE;  } -PyDoc_STRVAR(join__doc__, -             "S.join(iterable) -> str\n\ -\n\ -Return a string which is the concatenation of the strings in the\n\ -iterable.  The separator between elements is S."); +/*[clinic input] +str.join as unicode_join -static PyObject* -unicode_join(PyObject *self, PyObject *data) +    iterable: object +    / + +Concatenate any number of strings. + +The string whose method is called is inserted in between each given strings. +The result is returned as a new string. + +Example: '.'.join(['ab', 'pq', 'rs']) -> 'ab.pq.rs' +[clinic start generated code]*/ + +static PyObject * +unicode_join(PyObject *self, PyObject *iterable) +/*[clinic end generated code: output=6857e7cecfe7bf98 input=d8311e5ccbafbeb6]*/  { -    return PyUnicode_Join(self, data); +    return PyUnicode_Join(self, iterable);  }  static Py_ssize_t @@ -12188,21 +12222,22 @@ unicode_length(PyObject *self)      return PyUnicode_GET_LENGTH(self);  } -PyDoc_STRVAR(ljust__doc__, -             "S.ljust(width[, fillchar]) -> str\n\ -\n\ -Return S left-justified in a Unicode string of length width. Padding is\n\ -done using the specified fill character (default is a space)."); +/*[clinic input] +str.ljust as unicode_ljust -static PyObject * -unicode_ljust(PyObject *self, PyObject *args) -{ -    Py_ssize_t width; -    Py_UCS4 fillchar = ' '; +    width: Py_ssize_t +    fillchar: Py_UCS4 = ' ' +    / -    if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar)) -        return NULL; +Return a left-justified string of length width. + +Padding is done using the specified fill character (default is a space). +[clinic start generated code]*/ +static PyObject * +unicode_ljust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar) +/*[clinic end generated code: output=1cce0e0e0a0b84b3 input=3ab599e335e60a32]*/ +{      if (PyUnicode_READY(self) == -1)          return NULL; @@ -12212,13 +12247,15 @@ unicode_ljust(PyObject *self, PyObject *args)      return pad(self, 0, width - PyUnicode_GET_LENGTH(self), fillchar);  } -PyDoc_STRVAR(lower__doc__, -             "S.lower() -> str\n\ -\n\ -Return a copy of the string S converted to lowercase."); +/*[clinic input] +str.lower as unicode_lower -static PyObject* -unicode_lower(PyObject *self) +Return a copy of the string converted to lowercase. +[clinic start generated code]*/ + +static PyObject * +unicode_lower_impl(PyObject *self) +/*[clinic end generated code: output=84ef9ed42efad663 input=60a2984b8beff23a]*/  {      if (PyUnicode_READY(self) == -1)          return NULL; @@ -12232,9 +12269,9 @@ unicode_lower(PyObject *self)  #define BOTHSTRIP 2  /* Arrays indexed by above */ -static const char * const stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; +static const char *stripfuncnames[] = {"lstrip", "rstrip", "strip"}; -#define STRIPNAME(i) (stripformat[i]+3) +#define STRIPNAME(i) (stripfuncnames[i])  /* externally visible for str.strip(unicode) */  PyObject * @@ -12391,13 +12428,8 @@ do_strip(PyObject *self, int striptype)  static PyObject * -do_argstrip(PyObject *self, int striptype, PyObject *args) +do_argstrip(PyObject *self, int striptype, PyObject *sep)  { -    PyObject *sep = NULL; - -    if (!PyArg_ParseTuple(args, stripformat[striptype], &sep)) -        return NULL; -      if (sep != NULL && sep != Py_None) {          if (PyUnicode_Check(sep))              return _PyUnicode_XStrip(self, striptype, sep); @@ -12413,52 +12445,60 @@ do_argstrip(PyObject *self, int striptype, PyObject *args)  } -PyDoc_STRVAR(strip__doc__, -             "S.strip([chars]) -> str\n\ -\n\ -Return a copy of the string S with leading and trailing\n\ -whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead."); +/*[clinic input] +str.strip as unicode_strip + +    chars: object = None +    / + +Return a copy of the string with leading and trailing whitespace remove. + +If chars is given and not None, remove characters in chars instead. +[clinic start generated code]*/  static PyObject * -unicode_strip(PyObject *self, PyObject *args) +unicode_strip_impl(PyObject *self, PyObject *chars) +/*[clinic end generated code: output=ca19018454345d57 input=eefe24a1059c352b]*/  { -    if (PyTuple_GET_SIZE(args) == 0) -        return do_strip(self, BOTHSTRIP); /* Common case */ -    else -        return do_argstrip(self, BOTHSTRIP, args); +    return do_argstrip(self, BOTHSTRIP, chars);  } -PyDoc_STRVAR(lstrip__doc__, -             "S.lstrip([chars]) -> str\n\ -\n\ -Return a copy of the string S with leading whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead."); +/*[clinic input] +str.lstrip as unicode_lstrip + +    chars: object = NULL +    / + +Return a copy of the string with leading whitespace removed. + +If chars is given and not None, remove characters in chars instead. +[clinic start generated code]*/  static PyObject * -unicode_lstrip(PyObject *self, PyObject *args) +unicode_lstrip_impl(PyObject *self, PyObject *chars) +/*[clinic end generated code: output=3b43683251f79ca7 input=9e56f3c45f5ff4c3]*/  { -    if (PyTuple_GET_SIZE(args) == 0) -        return do_strip(self, LEFTSTRIP); /* Common case */ -    else -        return do_argstrip(self, LEFTSTRIP, args); +    return do_argstrip(self, LEFTSTRIP, chars);  } -PyDoc_STRVAR(rstrip__doc__, -             "S.rstrip([chars]) -> str\n\ -\n\ -Return a copy of the string S with trailing whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead."); +/*[clinic input] +str.rstrip as unicode_rstrip + +    chars: object = NULL +    / + +Return a copy of the string with trailing whitespace removed. + +If chars is given and not None, remove characters in chars instead. +[clinic start generated code]*/  static PyObject * -unicode_rstrip(PyObject *self, PyObject *args) +unicode_rstrip_impl(PyObject *self, PyObject *chars) +/*[clinic end generated code: output=4a59230017cc3b7a input=ac89d0219cb411ee]*/  { -    if (PyTuple_GET_SIZE(args) == 0) -        return do_strip(self, RIGHTSTRIP); /* Common case */ -    else -        return do_argstrip(self, RIGHTSTRIP, args); +    return do_argstrip(self, RIGHTSTRIP, chars);  } @@ -12538,25 +12578,30 @@ PyUnicode_Replace(PyObject *str,      return replace(str, substr, replstr, maxcount);  } -PyDoc_STRVAR(replace__doc__, -             "S.replace(old, new[, count]) -> str\n\ -\n\ -Return a copy of S with all occurrences of substring\n\ -old replaced by new.  If the optional argument count is\n\ -given, only the first count occurrences are replaced."); +/*[clinic input] +str.replace as unicode_replace -static PyObject* -unicode_replace(PyObject *self, PyObject *args) -{ -    PyObject *str1; -    PyObject *str2; -    Py_ssize_t maxcount = -1; +    old: unicode +    new: unicode +    count: Py_ssize_t = -1 +        Maximum number of occurrences to replace. +        -1 (the default value) means replace all occurrences. +    / -    if (!PyArg_ParseTuple(args, "UU|n:replace", &str1, &str2, &maxcount)) -        return NULL; +Return a copy with all occurrences of substring old replaced by new. + +If the optional argument count is given, only the first count occurrences are +replaced. +[clinic start generated code]*/ + +static PyObject * +unicode_replace_impl(PyObject *self, PyObject *old, PyObject *new, +                     Py_ssize_t count) +/*[clinic end generated code: output=b63f1a8b5eebf448 input=147d12206276ebeb]*/ +{      if (PyUnicode_READY(self) == -1)          return NULL; -    return replace(self, str1, str2, maxcount); +    return replace(self, old, new, count);  }  static PyObject * @@ -12788,21 +12833,22 @@ unicode_rindex(PyObject *self, PyObject *args)      return PyLong_FromSsize_t(result);  } -PyDoc_STRVAR(rjust__doc__, -             "S.rjust(width[, fillchar]) -> str\n\ -\n\ -Return S right-justified in a string of length width. Padding is\n\ -done using the specified fill character (default is a space)."); +/*[clinic input] +str.rjust as unicode_rjust -static PyObject * -unicode_rjust(PyObject *self, PyObject *args) -{ -    Py_ssize_t width; -    Py_UCS4 fillchar = ' '; +    width: Py_ssize_t +    fillchar: Py_UCS4 = ' ' +    / -    if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar)) -        return NULL; +Return a right-justified string of length width. +Padding is done using the specified fill character (default is a space). +[clinic start generated code]*/ + +static PyObject * +unicode_rjust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar) +/*[clinic end generated code: output=804a1a57fbe8d5cf input=d05f550b5beb1f72]*/ +{      if (PyUnicode_READY(self) == -1)          return NULL; @@ -12821,35 +12867,32 @@ PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)      return split(s, sep, maxsplit);  } -PyDoc_STRVAR(split__doc__, -             "S.split(sep=None, maxsplit=-1) -> list of strings\n\ -\n\ -Return a list of the words in S, using sep as the\n\ -delimiter string.  If maxsplit is given, at most maxsplit\n\ -splits are done. If sep is not specified or is None, any\n\ -whitespace string is a separator and empty strings are\n\ -removed from the result."); - -static PyObject* -unicode_split(PyObject *self, PyObject *args, PyObject *kwds) -{ -    static char *kwlist[] = {"sep", "maxsplit", 0}; -    PyObject *substring = Py_None; -    Py_ssize_t maxcount = -1; +/*[clinic input] +str.split as unicode_split -    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split", -                                     kwlist, &substring, &maxcount)) -        return NULL; +    sep: object = None +        The delimiter according which to split the string. +        None (the default value) means split according to any whitespace, +        and discard empty strings from the result. +    maxsplit: Py_ssize_t = -1 +        Maximum number of splits to do. +        -1 (the default value) means no limit. -    if (substring == Py_None) -        return split(self, NULL, maxcount); +Return a list of the words in the string, using sep as the delimiter string. +[clinic start generated code]*/ -    if (PyUnicode_Check(substring)) -        return split(self, substring, maxcount); +static PyObject * +unicode_split_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit) +/*[clinic end generated code: output=3a65b1db356948dc input=606e750488a82359]*/ +{ +    if (sep == Py_None) +        return split(self, NULL, maxsplit); +    if (PyUnicode_Check(sep)) +        return split(self, sep, maxsplit);      PyErr_Format(PyExc_TypeError,                   "must be str or None, not %.100s", -                 Py_TYPE(substring)->tp_name); +                 Py_TYPE(sep)->tp_name);      return NULL;  } @@ -12968,30 +13011,47 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)      return out;  } -PyDoc_STRVAR(partition__doc__, -             "S.partition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in S, and return the part before it,\n\ -the separator itself, and the part after it.  If the separator is not\n\ -found, return S and two empty strings."); +/*[clinic input] +str.partition as unicode_partition -static PyObject* -unicode_partition(PyObject *self, PyObject *separator) +    sep: object +    / + +Partition the string into three parts using the given separator. + +This will search for the separator in the string.  If the separator is found, +returns a 3-tuple containing the part before the separator, the separator +itself, and the part after it. + +If the separator is not found, returns a 3-tuple containing the original string +and two empty strings. +[clinic start generated code]*/ + +static PyObject * +unicode_partition(PyObject *self, PyObject *sep) +/*[clinic end generated code: output=e4ced7bd253ca3c4 input=f29b8d06c63e50be]*/  { -    return PyUnicode_Partition(self, separator); +    return PyUnicode_Partition(self, sep);  } -PyDoc_STRVAR(rpartition__doc__, -             "S.rpartition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in S, starting at the end of S, and return\n\ -the part before it, the separator itself, and the part after it.  If the\n\ -separator is not found, return two empty strings and S."); +/*[clinic input] +str.rpartition as unicode_rpartition = str.partition -static PyObject* -unicode_rpartition(PyObject *self, PyObject *separator) +Partition the string into three parts using the given separator. + +This will search for the separator in the string, starting and the end. If +the separator is found, returns a 3-tuple containing the part before the +separator, the separator itself, and the part after it. + +If the separator is not found, returns a 3-tuple containing two empty strings +and the original string. +[clinic start generated code]*/ + +static PyObject * +unicode_rpartition(PyObject *self, PyObject *sep) +/*[clinic end generated code: output=1aa13cf1156572aa input=e77c7acb69bdfca6]*/  { -    return PyUnicode_RPartition(self, separator); +    return PyUnicode_RPartition(self, sep);  }  PyObject * @@ -13003,55 +13063,44 @@ PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)      return rsplit(s, sep, maxsplit);  } -PyDoc_STRVAR(rsplit__doc__, -             "S.rsplit(sep=None, maxsplit=-1) -> list of strings\n\ -\n\ -Return a list of the words in S, using sep as the\n\ -delimiter string, starting at the end of the string and\n\ -working to the front.  If maxsplit is given, at most maxsplit\n\ -splits are done. If sep is not specified, any whitespace string\n\ -is a separator."); - -static PyObject* -unicode_rsplit(PyObject *self, PyObject *args, PyObject *kwds) -{ -    static char *kwlist[] = {"sep", "maxsplit", 0}; -    PyObject *substring = Py_None; -    Py_ssize_t maxcount = -1; +/*[clinic input] +str.rsplit as unicode_rsplit = str.split -    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit", -                                     kwlist, &substring, &maxcount)) -        return NULL; +Return a list of the words in the string, using sep as the delimiter string. -    if (substring == Py_None) -        return rsplit(self, NULL, maxcount); +Splits are done starting at the end of the string and working to the front. +[clinic start generated code]*/ -    if (PyUnicode_Check(substring)) -        return rsplit(self, substring, maxcount); +static PyObject * +unicode_rsplit_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit) +/*[clinic end generated code: output=c2b815c63bcabffc input=12ad4bf57dd35f15]*/ +{ +    if (sep == Py_None) +        return rsplit(self, NULL, maxsplit); +    if (PyUnicode_Check(sep)) +        return rsplit(self, sep, maxsplit);      PyErr_Format(PyExc_TypeError,                   "must be str or None, not %.100s", -                 Py_TYPE(substring)->tp_name); +                 Py_TYPE(sep)->tp_name);      return NULL;  } -PyDoc_STRVAR(splitlines__doc__, -             "S.splitlines([keepends]) -> list of strings\n\ -\n\ -Return a list of the lines in S, breaking at line boundaries.\n\ -Line breaks are not included in the resulting list unless keepends\n\ -is given and true."); +/*[clinic input] +str.splitlines as unicode_splitlines -static PyObject* -unicode_splitlines(PyObject *self, PyObject *args, PyObject *kwds) -{ -    static char *kwlist[] = {"keepends", 0}; -    int keepends = 0; +    keepends: int(c_default="0") = False -    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines", -                                     kwlist, &keepends)) -        return NULL; +Return a list of the lines in the string, breaking at line boundaries. +Line breaks are not included in the resulting list unless keepends is given and +true. +[clinic start generated code]*/ + +static PyObject * +unicode_splitlines_impl(PyObject *self, int keepends) +/*[clinic end generated code: output=f664dcdad153ec40 input=d6ff99fe43465b0f]*/ +{      return PyUnicode_Splitlines(self, keepends);  } @@ -13061,14 +13110,15 @@ PyObject *unicode_str(PyObject *self)      return unicode_result_unchanged(self);  } -PyDoc_STRVAR(swapcase__doc__, -             "S.swapcase() -> str\n\ -\n\ -Return a copy of S with uppercase characters converted to lowercase\n\ -and vice versa."); +/*[clinic input] +str.swapcase as unicode_swapcase -static PyObject* -unicode_swapcase(PyObject *self) +Convert uppercase characters to lowercase and lowercase characters to uppercase. +[clinic start generated code]*/ + +static PyObject * +unicode_swapcase_impl(PyObject *self) +/*[clinic end generated code: output=5d28966bf6d7b2af input=3f3ef96d5798a7bb]*/  {      if (PyUnicode_READY(self) == -1)          return NULL; @@ -13205,29 +13255,37 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z)      return NULL;  } -PyDoc_STRVAR(translate__doc__, -             "S.translate(table) -> str\n\ -\n\ -Return a copy of the string S in which each character has been mapped\n\ -through the given translation table. The table must implement\n\ -lookup/indexing via __getitem__, for instance a dictionary or list,\n\ -mapping Unicode ordinals to Unicode ordinals, strings, or None. If\n\ -this operation raises LookupError, the character is left untouched.\n\ -Characters mapped to None are deleted."); +/*[clinic input] +str.translate as unicode_translate -static PyObject* +    table: object +        Translation table, which must be a mapping of Unicode ordinals to +        Unicode ordinals, strings, or None. +    / + +Replace each character in the string using the given translation table. + +The table must implement lookup/indexing via __getitem__, for instance a +dictionary or list.  If this operation raises LookupError, the character is +left untouched.  Characters mapped to None are deleted. +[clinic start generated code]*/ + +static PyObject *  unicode_translate(PyObject *self, PyObject *table) +/*[clinic end generated code: output=3cb448ff2fd96bf3 input=6d38343db63d8eb0]*/  {      return _PyUnicode_TranslateCharmap(self, table, "ignore");  } -PyDoc_STRVAR(upper__doc__, -             "S.upper() -> str\n\ -\n\ -Return a copy of S converted to uppercase."); +/*[clinic input] +str.upper as unicode_upper -static PyObject* -unicode_upper(PyObject *self) +Return a copy of the string converted to uppercase. +[clinic start generated code]*/ + +static PyObject * +unicode_upper_impl(PyObject *self) +/*[clinic end generated code: output=1b7ddd16bbcdc092 input=db3d55682dfe2e6c]*/  {      if (PyUnicode_READY(self) == -1)          return NULL; @@ -13236,25 +13294,27 @@ unicode_upper(PyObject *self)      return case_operation(self, do_upper);  } -PyDoc_STRVAR(zfill__doc__, -             "S.zfill(width) -> str\n\ -\n\ -Pad a numeric string S with zeros on the left, to fill a field\n\ -of the specified width. The string S is never truncated."); +/*[clinic input] +str.zfill as unicode_zfill + +    width: Py_ssize_t +    / + +Pad a numeric string with zeros on the left, to fill a field of the given width. + +The string is never truncated. +[clinic start generated code]*/  static PyObject * -unicode_zfill(PyObject *self, PyObject *args) +unicode_zfill_impl(PyObject *self, Py_ssize_t width) +/*[clinic end generated code: output=e13fb6bdf8e3b9df input=c6b2f772c6f27799]*/  {      Py_ssize_t fill;      PyObject *u; -    Py_ssize_t width;      int kind;      void *data;      Py_UCS4 chr; -    if (!PyArg_ParseTuple(args, "n:zfill", &width)) -        return NULL; -      if (PyUnicode_READY(self) == -1)          return NULL; @@ -13741,16 +13801,22 @@ PyDoc_STRVAR(format_map__doc__,  Return a formatted version of S, using substitutions from mapping.\n\  The substitutions are identified by braces ('{' and '}')."); +/*[clinic input] +str.__format__ as unicode___format__ + +    format_spec: unicode +    / + +Return a formatted version of the string as described by format_spec. +[clinic start generated code]*/ +  static PyObject * -unicode__format__(PyObject* self, PyObject* args) +unicode___format___impl(PyObject *self, PyObject *format_spec) +/*[clinic end generated code: output=45fceaca6d2ba4c8 input=5e135645d167a214]*/  { -    PyObject *format_spec;      _PyUnicodeWriter writer;      int ret; -    if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) -        return NULL; -      if (PyUnicode_READY(self) == -1)          return NULL;      _PyUnicodeWriter_Init(&writer); @@ -13764,44 +13830,43 @@ unicode__format__(PyObject* self, PyObject* args)      return _PyUnicodeWriter_Finish(&writer);  } -PyDoc_STRVAR(p_format__doc__, -             "S.__format__(format_spec) -> str\n\ -\n\ -Return a formatted version of S as described by format_spec."); +/*[clinic input] +str.__sizeof__ as unicode_sizeof + +Return the size of the string in memory, in bytes. +[clinic start generated code]*/  static PyObject * -unicode__sizeof__(PyObject *v) +unicode_sizeof_impl(PyObject *self) +/*[clinic end generated code: output=6dbc2f5a408b6d4f input=6dd011c108e33fb0]*/  {      Py_ssize_t size;      /* If it's a compact object, account for base structure +         character data. */ -    if (PyUnicode_IS_COMPACT_ASCII(v)) -        size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(v) + 1; -    else if (PyUnicode_IS_COMPACT(v)) +    if (PyUnicode_IS_COMPACT_ASCII(self)) +        size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(self) + 1; +    else if (PyUnicode_IS_COMPACT(self))          size = sizeof(PyCompactUnicodeObject) + -            (PyUnicode_GET_LENGTH(v) + 1) * PyUnicode_KIND(v); +            (PyUnicode_GET_LENGTH(self) + 1) * PyUnicode_KIND(self);      else {          /* If it is a two-block object, account for base object, and             for character block if present. */          size = sizeof(PyUnicodeObject); -        if (_PyUnicode_DATA_ANY(v)) -            size += (PyUnicode_GET_LENGTH(v) + 1) * -                PyUnicode_KIND(v); +        if (_PyUnicode_DATA_ANY(self)) +            size += (PyUnicode_GET_LENGTH(self) + 1) * +                PyUnicode_KIND(self);      }      /* If the wstr pointer is present, account for it unless it is shared         with the data pointer. Check if the data is not shared. */ -    if (_PyUnicode_HAS_WSTR_MEMORY(v)) -        size += (PyUnicode_WSTR_LENGTH(v) + 1) * sizeof(wchar_t); -    if (_PyUnicode_HAS_UTF8_MEMORY(v)) -        size += PyUnicode_UTF8_LENGTH(v) + 1; +    if (_PyUnicode_HAS_WSTR_MEMORY(self)) +        size += (PyUnicode_WSTR_LENGTH(self) + 1) * sizeof(wchar_t); +    if (_PyUnicode_HAS_UTF8_MEMORY(self)) +        size += PyUnicode_UTF8_LENGTH(self) + 1;      return PyLong_FromSsize_t(size);  } -PyDoc_STRVAR(sizeof__doc__, -             "S.__sizeof__() -> size of S in memory, in bytes"); -  static PyObject *  unicode_getnewargs(PyObject *v)  { @@ -13812,54 +13877,52 @@ unicode_getnewargs(PyObject *v)  }  static PyMethodDef unicode_methods[] = { -    {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, -    {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__}, -    {"split", (PyCFunction) unicode_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, -    {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__}, -    {"join", (PyCFunction) unicode_join, METH_O, join__doc__}, -    {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__}, -    {"casefold", (PyCFunction) unicode_casefold, METH_NOARGS, casefold__doc__}, -    {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__}, -    {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__}, +    UNICODE_ENCODE_METHODDEF +    UNICODE_REPLACE_METHODDEF +    UNICODE_SPLIT_METHODDEF +    UNICODE_RSPLIT_METHODDEF +    UNICODE_JOIN_METHODDEF +    UNICODE_CAPITALIZE_METHODDEF +    UNICODE_CASEFOLD_METHODDEF +    UNICODE_TITLE_METHODDEF +    UNICODE_CENTER_METHODDEF      {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__}, -    {"expandtabs", (PyCFunction) unicode_expandtabs, -     METH_VARARGS | METH_KEYWORDS, expandtabs__doc__}, +    UNICODE_EXPANDTABS_METHODDEF      {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, -    {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__}, +    UNICODE_PARTITION_METHODDEF      {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, -    {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, -    {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, -    {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__}, +    UNICODE_LJUST_METHODDEF +    UNICODE_LOWER_METHODDEF +    UNICODE_LSTRIP_METHODDEF      {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},      {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, -    {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__}, -    {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__}, -    {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__}, -    {"splitlines", (PyCFunction) unicode_splitlines, -     METH_VARARGS | METH_KEYWORDS, splitlines__doc__}, -    {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__}, -    {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__}, -    {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__}, -    {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__}, +    UNICODE_RJUST_METHODDEF +    UNICODE_RSTRIP_METHODDEF +    UNICODE_RPARTITION_METHODDEF +    UNICODE_SPLITLINES_METHODDEF +    UNICODE_STRIP_METHODDEF +    UNICODE_SWAPCASE_METHODDEF +    UNICODE_TRANSLATE_METHODDEF +    UNICODE_UPPER_METHODDEF      {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},      {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__}, -    {"islower", (PyCFunction) unicode_islower, METH_NOARGS, islower__doc__}, -    {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS, isupper__doc__}, -    {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS, istitle__doc__}, -    {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS, isspace__doc__}, -    {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS, isdecimal__doc__}, -    {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS, isdigit__doc__}, -    {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__}, -    {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, -    {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, -    {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__}, -    {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS, isprintable__doc__}, -    {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, +    UNICODE_ISLOWER_METHODDEF +    UNICODE_ISUPPER_METHODDEF +    UNICODE_ISTITLE_METHODDEF +    UNICODE_ISSPACE_METHODDEF +    UNICODE_ISDECIMAL_METHODDEF +    UNICODE_ISDIGIT_METHODDEF +    UNICODE_ISNUMERIC_METHODDEF +    UNICODE_ISALPHA_METHODDEF +    UNICODE_ISALNUM_METHODDEF +    UNICODE_ISIDENTIFIER_METHODDEF +    UNICODE_ISPRINTABLE_METHODDEF +    UNICODE_ZFILL_METHODDEF      {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},      {"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__}, -    {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, +    UNICODE___FORMAT___METHODDEF      UNICODE_MAKETRANS_METHODDEF -    {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__}, +    UNICODE_SIZEOF_METHODDEF  #if 0      /* These methods are just used for debugging the implementation. */      {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS}, @@ -15362,7 +15425,7 @@ unicodeiter_reduce(unicodeiterobject *it)          return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),                               it->it_seq, it->it_index);      } else { -        PyObject *u = PyUnicode_FromUnicode(NULL, 0); +        PyObject *u = (PyObject *)_PyUnicode_New(0);          if (u == NULL)              return NULL;          return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u); @@ -15457,10 +15520,7 @@ unicode_iter(PyObject *seq)  size_t  Py_UNICODE_strlen(const Py_UNICODE *u)  { -    int res = 0; -    while(*u++) -        res++; -    return res; +    return wcslen(u);  }  Py_UNICODE* @@ -15485,8 +15545,8 @@ Py_UNICODE*  Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2)  {      Py_UNICODE *u1 = s1; -    u1 += Py_UNICODE_strlen(u1); -    Py_UNICODE_strcpy(u1, s2); +    u1 += wcslen(u1); +    while ((*u1++ = *s2++));      return s1;  } @@ -15535,7 +15595,7 @@ Py_UNICODE*  Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c)  {      const Py_UNICODE *p; -    p = s + Py_UNICODE_strlen(s); +    p = s + wcslen(s);      while (p != s) {          p--;          if (*p == c) | 
