diff options
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r-- | Objects/bytesobject.c | 1668 |
1 files changed, 1403 insertions, 265 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index b9b49acebe..2ceba486f0 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -7,6 +7,11 @@ #include "bytes_methods.h" #include <stddef.h> +/*[clinic input] +class bytes "PyBytesObject*" "&PyBytes_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/ + #ifdef COUNT_ALLOCS Py_ssize_t null_strings, one_strings; #endif @@ -44,15 +49,12 @@ static PyBytesObject *nullstring; PyBytes_FromStringAndSize()) or the length of the string in the `str' parameter (for PyBytes_FromString()). */ -PyObject * -PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) +static PyObject * +_PyBytes_FromSize(Py_ssize_t size, int use_calloc) { PyBytesObject *op; - if (size < 0) { - PyErr_SetString(PyExc_SystemError, - "Negative size passed to PyBytes_FromStringAndSize"); - return NULL; - } + assert(size >= 0); + if (size == 0 && (op = nullstring) != NULL) { #ifdef COUNT_ALLOCS null_strings++; @@ -60,36 +62,60 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) Py_INCREF(op); return (PyObject *)op; } - if (size == 1 && str != NULL && - (op = characters[*str & UCHAR_MAX]) != NULL) - { -#ifdef COUNT_ALLOCS - one_strings++; -#endif - Py_INCREF(op); - return (PyObject *)op; - } - if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) { + if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) { PyErr_SetString(PyExc_OverflowError, "byte string is too large"); return NULL; } /* Inline PyObject_NewVar */ - op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size); + if (use_calloc) + op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size); + else + op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size); if (op == NULL) return PyErr_NoMemory(); (void)PyObject_INIT_VAR(op, &PyBytes_Type, size); op->ob_shash = -1; - if (str != NULL) - Py_MEMCPY(op->ob_sval, str, size); - op->ob_sval[size] = '\0'; - /* share short strings */ + if (!use_calloc) + op->ob_sval[size] = '\0'; + /* empty byte string singleton */ if (size == 0) { nullstring = op; Py_INCREF(op); - } else if (size == 1 && str != NULL) { + } + return (PyObject *) op; +} + +PyObject * +PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) +{ + PyBytesObject *op; + if (size < 0) { + PyErr_SetString(PyExc_SystemError, + "Negative size passed to PyBytes_FromStringAndSize"); + return NULL; + } + if (size == 1 && str != NULL && + (op = characters[*str & UCHAR_MAX]) != NULL) + { +#ifdef COUNT_ALLOCS + one_strings++; +#endif + Py_INCREF(op); + return (PyObject *)op; + } + + op = (PyBytesObject *)_PyBytes_FromSize(size, 0); + if (op == NULL) + return NULL; + if (str == NULL) + return (PyObject *) op; + + Py_MEMCPY(op->ob_sval, str, size); + /* share short strings */ + if (size == 1) { characters[*str & UCHAR_MAX] = op; Py_INCREF(op); } @@ -347,6 +373,544 @@ PyBytes_FromFormat(const char *format, ...) return ret; } +/* Helpers for formatstring */ + +Py_LOCAL_INLINE(PyObject *) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +{ + Py_ssize_t argidx = *p_argidx; + if (argidx < arglen) { + (*p_argidx)++; + if (arglen < 0) + return args; + else + return PyTuple_GetItem(args, argidx); + } + PyErr_SetString(PyExc_TypeError, + "not enough arguments for format string"); + return NULL; +} + +/* Format codes + * F_LJUST '-' + * F_SIGN '+' + * F_BLANK ' ' + * F_ALT '#' + * F_ZERO '0' + */ +#define F_LJUST (1<<0) +#define F_SIGN (1<<1) +#define F_BLANK (1<<2) +#define F_ALT (1<<3) +#define F_ZERO (1<<4) + +/* Returns a new reference to a PyBytes object, or NULL on failure. */ + +static PyObject * +formatfloat(PyObject *v, int flags, int prec, int type) +{ + char *p; + PyObject *result; + double x; + + x = PyFloat_AsDouble(v); + if (x == -1.0 && PyErr_Occurred()) { + PyErr_Format(PyExc_TypeError, "float argument required, " + "not %.200s", Py_TYPE(v)->tp_name); + return NULL; + } + + if (prec < 0) + prec = 6; + + p = PyOS_double_to_string(x, type, prec, + (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); + + if (p == NULL) + return NULL; + result = PyBytes_FromStringAndSize(p, strlen(p)); + PyMem_Free(p); + return result; +} + +Py_LOCAL_INLINE(int) +byte_converter(PyObject *arg, char *p) +{ + if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) { + *p = PyBytes_AS_STRING(arg)[0]; + return 1; + } + else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) { + *p = PyByteArray_AS_STRING(arg)[0]; + return 1; + } + else { + long ival = PyLong_AsLong(arg); + if (0 <= ival && ival <= 255) { + *p = (char)ival; + return 1; + } + } + PyErr_SetString(PyExc_TypeError, + "%c requires an integer in range(256) or a single byte"); + return 0; +} + +static PyObject * +format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen) +{ + PyObject *func, *result; + _Py_IDENTIFIER(__bytes__); + /* is it a bytes object? */ + if (PyBytes_Check(v)) { + *pbuf = PyBytes_AS_STRING(v); + *plen = PyBytes_GET_SIZE(v); + Py_INCREF(v); + return v; + } + if (PyByteArray_Check(v)) { + *pbuf = PyByteArray_AS_STRING(v); + *plen = PyByteArray_GET_SIZE(v); + Py_INCREF(v); + return v; + } + /* does it support __bytes__? */ + func = _PyObject_LookupSpecial(v, &PyId___bytes__); + if (func != NULL) { + result = PyObject_CallFunctionObjArgs(func, NULL); + Py_DECREF(func); + if (result == NULL) + return NULL; + if (!PyBytes_Check(result)) { + PyErr_Format(PyExc_TypeError, + "__bytes__ returned non-bytes (type %.200s)", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; + } + *pbuf = PyBytes_AS_STRING(result); + *plen = PyBytes_GET_SIZE(result); + return result; + } + PyErr_Format(PyExc_TypeError, + "%%b requires bytes, or an object that implements __bytes__, not '%.100s'", + Py_TYPE(v)->tp_name); + return NULL; +} + +/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) + + FORMATBUFLEN is the length of the buffer in which the ints & + chars are formatted. XXX This is a magic number. Each formatting + routine does bounds checking to ensure no overflow, but a better + solution may be to malloc a buffer of appropriate size for each + format. For now, the current solution is sufficient. +*/ +#define FORMATBUFLEN (size_t)120 + +PyObject * +_PyBytes_Format(PyObject *format, PyObject *args) +{ + char *fmt, *res; + Py_ssize_t arglen, argidx; + Py_ssize_t reslen, rescnt, fmtcnt; + int args_owned = 0; + PyObject *result; + PyObject *dict = NULL; + if (format == NULL || !PyBytes_Check(format) || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + fmt = PyBytes_AS_STRING(format); + fmtcnt = PyBytes_GET_SIZE(format); + reslen = rescnt = fmtcnt + 100; + result = PyBytes_FromStringAndSize((char *)NULL, reslen); + if (result == NULL) + return NULL; + res = PyBytes_AsString(result); + if (PyTuple_Check(args)) { + arglen = PyTuple_GET_SIZE(args); + argidx = 0; + } + else { + arglen = -1; + argidx = -2; + } + if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript && + !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) && + !PyByteArray_Check(args)) { + dict = args; + } + while (--fmtcnt >= 0) { + if (*fmt != '%') { + if (--rescnt < 0) { + rescnt = fmtcnt + 100; + reslen += rescnt; + if (_PyBytes_Resize(&result, reslen)) + return NULL; + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + --rescnt; + } + *res++ = *fmt++; + } + else { + /* Got a format specifier */ + int flags = 0; + Py_ssize_t width = -1; + int prec = -1; + int c = '\0'; + int fill; + PyObject *iobj; + PyObject *v = NULL; + PyObject *temp = NULL; + const char *pbuf = NULL; + int sign; + Py_ssize_t len = 0; + char onechar; /* For byte_converter() */ + + fmt++; + if (*fmt == '(') { + char *keystart; + Py_ssize_t keylen; + PyObject *key; + int pcount = 1; + + if (dict == NULL) { + PyErr_SetString(PyExc_TypeError, + "format requires a mapping"); + goto error; + } + ++fmt; + --fmtcnt; + keystart = fmt; + /* Skip over balanced parentheses */ + while (pcount > 0 && --fmtcnt >= 0) { + if (*fmt == ')') + --pcount; + else if (*fmt == '(') + ++pcount; + fmt++; + } + keylen = fmt - keystart - 1; + if (fmtcnt < 0 || pcount > 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format key"); + goto error; + } + key = PyBytes_FromStringAndSize(keystart, + keylen); + if (key == NULL) + goto error; + if (args_owned) { + Py_DECREF(args); + args_owned = 0; + } + args = PyObject_GetItem(dict, key); + Py_DECREF(key); + if (args == NULL) { + goto error; + } + args_owned = 1; + arglen = -1; + argidx = -2; + } + while (--fmtcnt >= 0) { + switch (c = *fmt++) { + case '-': flags |= F_LJUST; continue; + case '+': flags |= F_SIGN; continue; + case ' ': flags |= F_BLANK; continue; + case '#': flags |= F_ALT; continue; + case '0': flags |= F_ZERO; continue; + } + break; + } + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + goto error; + } + width = PyLong_AsSsize_t(v); + if (width == -1 && PyErr_Occurred()) + goto error; + if (width < 0) { + flags |= F_LJUST; + width = -width; + } + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + width = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "width too big"); + goto error; + } + width = width*10 + (c - '0'); + } + } + if (c == '.') { + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString( + PyExc_TypeError, + "* wants int"); + goto error; + } + prec = _PyLong_AsInt(v); + if (prec == -1 && PyErr_Occurred()) + goto error; + if (prec < 0) + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + prec = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (prec > (INT_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "prec too big"); + goto error; + } + prec = prec*10 + (c - '0'); + } + } + } /* prec */ + if (fmtcnt >= 0) { + if (c == 'h' || c == 'l' || c == 'L') { + if (--fmtcnt >= 0) + c = *fmt++; + } + } + if (fmtcnt < 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format"); + goto error; + } + if (c != '%') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + } + sign = 0; + fill = ' '; + switch (c) { + case '%': + pbuf = "%"; + len = 1; + break; + case 'a': + temp = PyObject_ASCII(v); + if (temp == NULL) + goto error; + assert(PyUnicode_IS_ASCII(temp)); + pbuf = (const char *)PyUnicode_1BYTE_DATA(temp); + len = PyUnicode_GET_LENGTH(temp); + if (prec >= 0 && len > prec) + len = prec; + break; + case 's': + // %s is only for 2/3 code; 3 only code should use %b + case 'b': + temp = format_obj(v, &pbuf, &len); + if (temp == NULL) + goto error; + if (prec >= 0 && len > prec) + len = prec; + break; + case 'i': + case 'd': + case 'u': + case 'o': + case 'x': + case 'X': + if (c == 'i') + c = 'd'; + iobj = NULL; + if (PyNumber_Check(v)) { + if ((PyLong_Check(v))) { + iobj = v; + Py_INCREF(iobj); + } + else { + iobj = PyNumber_Long(v); + if (iobj != NULL && !PyLong_Check(iobj)) + Py_CLEAR(iobj); + } + } + if (iobj == NULL) { + PyErr_Format(PyExc_TypeError, + "%%%c format: a number is required, " + "not %.200s", c, Py_TYPE(v)->tp_name); + goto error; + } + temp = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, c); + Py_DECREF(iobj); + if (!temp) + goto error; + assert(PyUnicode_IS_ASCII(temp)); + pbuf = (const char *)PyUnicode_1BYTE_DATA(temp); + len = PyUnicode_GET_LENGTH(temp); + sign = 1; + if (flags & F_ZERO) + fill = '0'; + break; + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + temp = formatfloat(v, flags, prec, c); + if (temp == NULL) + goto error; + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + sign = 1; + if (flags & F_ZERO) + fill = '0'; + break; + case 'c': + pbuf = &onechar; + len = byte_converter(v, &onechar); + if (!len) + goto error; + break; + default: + PyErr_Format(PyExc_ValueError, + "unsupported format character '%c' (0x%x) " + "at index %zd", + c, c, + (Py_ssize_t)(fmt - 1 - + PyBytes_AsString(format))); + goto error; + } + if (sign) { + if (*pbuf == '-' || *pbuf == '+') { + sign = *pbuf++; + len--; + } + else if (flags & F_SIGN) + sign = '+'; + else if (flags & F_BLANK) + sign = ' '; + else + sign = 0; + } + if (width < len) + width = len; + if (rescnt - (sign != 0) < width) { + reslen -= rescnt; + rescnt = width + fmtcnt + 100; + reslen += rescnt; + if (reslen < 0) { + Py_DECREF(result); + Py_XDECREF(temp); + return PyErr_NoMemory(); + } + if (_PyBytes_Resize(&result, reslen)) { + Py_XDECREF(temp); + return NULL; + } + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + } + if (sign) { + if (fill != ' ') + *res++ = sign; + rescnt--; + if (width > len) + width--; + } + if ((flags & F_ALT) && (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + if (fill != ' ') { + *res++ = *pbuf++; + *res++ = *pbuf++; + } + rescnt -= 2; + width -= 2; + if (width < 0) + width = 0; + len -= 2; + } + if (width > len && !(flags & F_LJUST)) { + do { + --rescnt; + *res++ = fill; + } while (--width > len); + } + if (fill == ' ') { + if (sign) + *res++ = sign; + if ((flags & F_ALT) && + (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + *res++ = *pbuf++; + *res++ = *pbuf++; + } + } + Py_MEMCPY(res, pbuf, len); + res += len; + rescnt -= len; + while (--width >= len) { + --rescnt; + *res++ = ' '; + } + if (dict && (argidx < arglen) && c != '%') { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + Py_XDECREF(temp); + goto error; + } + Py_XDECREF(temp); + } /* '%' */ + } /* until end */ + if (argidx < arglen && !dict) { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + goto error; + } + if (args_owned) { + Py_DECREF(args); + } + if (_PyBytes_Resize(&result, reslen - rescnt)) + return NULL; + return result; + + error: + Py_DECREF(result); + if (args_owned) { + Py_DECREF(args); + } + return NULL; +} + +/* =-= */ + static void bytes_dealloc(PyObject *op) { @@ -540,8 +1104,8 @@ PyBytes_AsStringAndSize(PyObject *obj, if (len != NULL) *len = PyBytes_GET_SIZE(obj); else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) { - PyErr_SetString(PyExc_TypeError, - "expected bytes with no null"); + PyErr_SetString(PyExc_ValueError, + "embedded null byte"); return -1; } return 0; @@ -951,7 +1515,7 @@ bytes_subscript(PyBytesObject* self, PyObject* item) } else { PyErr_Format(PyExc_TypeError, - "byte indices must be integers, not %.200s", + "byte indices must be integers or slices, not %.200s", Py_TYPE(item)->tp_name); return NULL; } @@ -991,37 +1555,72 @@ static PyBufferProcs bytes_as_buffer = { #define RIGHTSTRIP 1 #define BOTHSTRIP 2 -/* Arrays indexed by above */ -static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; +/*[clinic input] +bytes.split + + sep: object = None + The delimiter according which to split the bytes. + None (the default value) means split on ASCII whitespace characters + (space, tab, return, newline, formfeed, vertical tab). + maxsplit: Py_ssize_t = -1 + Maximum number of splits to do. + -1 (the default value) means no limit. + +Return a list of the sections in the bytes, using sep as the delimiter. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_split__doc__, +"split($self, /, sep=None, maxsplit=-1)\n" +"--\n" +"\n" +"Return a list of the sections in the bytes, using sep as the delimiter.\n" +"\n" +" sep\n" +" The delimiter according which to split the bytes.\n" +" None (the default value) means split on ASCII whitespace characters\n" +" (space, tab, return, newline, formfeed, vertical tab).\n" +" maxsplit\n" +" Maximum number of splits to do.\n" +" -1 (the default value) means no limit."); + +#define BYTES_SPLIT_METHODDEF \ + {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__}, -#define STRIPNAME(i) (stripformat[i]+3) +static PyObject * +bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit); -PyDoc_STRVAR(split__doc__, -"B.split(sep=None, maxsplit=-1) -> list of bytes\n\ -\n\ -Return a list of the sections in B, using sep as the delimiter.\n\ -If sep is not specified or is None, B is split on ASCII whitespace\n\ -characters (space, tab, return, newline, formfeed, vertical tab).\n\ -If maxsplit is given, at most maxsplit splits are done."); +static PyObject * +bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static char *_keywords[] = {"sep", "maxsplit", NULL}; + PyObject *sep = Py_None; + Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|On:split", _keywords, + &sep, &maxsplit)) + goto exit; + return_value = bytes_split_impl(self, sep, maxsplit); + +exit: + return return_value; +} static PyObject * -bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds) +bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit) +/*[clinic end generated code: output=c80a47afdd505975 input=8b809b39074abbfa]*/ { - static char *kwlist[] = {"sep", "maxsplit", 0}; Py_ssize_t len = PyBytes_GET_SIZE(self), n; - Py_ssize_t maxsplit = -1; const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; - PyObject *list, *subobj = Py_None; + PyObject *list; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split", - kwlist, &subobj, &maxsplit)) - return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; - if (subobj == Py_None) + if (sep == Py_None) return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit); - if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0) + if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0) return NULL; sub = vsub.buf; n = vsub.len; @@ -1031,85 +1630,202 @@ bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds) return list; } -PyDoc_STRVAR(partition__doc__, -"B.partition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in B, and return the part before it,\n\ -the separator itself, and the part after it. If the separator is not\n\ -found, returns B and two empty bytes objects."); +/*[clinic input] +bytes.partition + + self: self(type="PyBytesObject *") + sep: Py_buffer + / + +Partition the bytes into three parts using the given separator. + +This will search for the separator sep in the bytes. If the separator is found, +returns a 3-tuple containing the part before the separator, the separator +itself, and the part after it. + +If the separator is not found, returns a 3-tuple containing the original bytes +object and two empty bytes objects. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_partition__doc__, +"partition($self, sep, /)\n" +"--\n" +"\n" +"Partition the bytes into three parts using the given separator.\n" +"\n" +"This will search for the separator sep in the bytes. If the separator is found,\n" +"returns a 3-tuple containing the part before the separator, the separator\n" +"itself, and the part after it.\n" +"\n" +"If the separator is not found, returns a 3-tuple containing the original bytes\n" +"object and two empty bytes objects."); + +#define BYTES_PARTITION_METHODDEF \ + {"partition", (PyCFunction)bytes_partition, METH_VARARGS, bytes_partition__doc__}, + +static PyObject * +bytes_partition_impl(PyBytesObject *self, Py_buffer *sep); static PyObject * -bytes_partition(PyBytesObject *self, PyObject *sep_obj) +bytes_partition(PyBytesObject *self, PyObject *args) { + PyObject *return_value = NULL; Py_buffer sep = {NULL, NULL}; - PyObject *res; - if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0) - return NULL; + if (!PyArg_ParseTuple(args, + "y*:partition", + &sep)) + goto exit; + return_value = bytes_partition_impl(self, &sep); + +exit: + /* Cleanup for sep */ + if (sep.obj) + PyBuffer_Release(&sep); - res = stringlib_partition( + return return_value; +} + +static PyObject * +bytes_partition_impl(PyBytesObject *self, Py_buffer *sep) +/*[clinic end generated code: output=3006727cfbf83aa4 input=bc855dc63ca949de]*/ +{ + return stringlib_partition( (PyObject*) self, PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), - sep_obj, sep.buf, sep.len + sep->obj, (const char *)sep->buf, sep->len ); - PyBuffer_Release(&sep); - return res; } -PyDoc_STRVAR(rpartition__doc__, -"B.rpartition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in B, starting at the end of B,\n\ -and return the part before it, the separator itself, and the\n\ -part after it. If the separator is not found, returns two empty\n\ -bytes objects and B."); +/*[clinic input] +bytes.rpartition + + self: self(type="PyBytesObject *") + sep: Py_buffer + / + +Partition the bytes into three parts using the given separator. + +This will search for the separator sep in the bytes, starting and the end. If +the separator is found, returns a 3-tuple containing the part before the +separator, the separator itself, and the part after it. + +If the separator is not found, returns a 3-tuple containing two empty bytes +objects and the original bytes object. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_rpartition__doc__, +"rpartition($self, sep, /)\n" +"--\n" +"\n" +"Partition the bytes into three parts using the given separator.\n" +"\n" +"This will search for the separator sep in the bytes, starting and the end. If\n" +"the separator is found, returns a 3-tuple containing the part before the\n" +"separator, the separator itself, and the part after it.\n" +"\n" +"If the separator is not found, returns a 3-tuple containing two empty bytes\n" +"objects and the original bytes object."); + +#define BYTES_RPARTITION_METHODDEF \ + {"rpartition", (PyCFunction)bytes_rpartition, METH_VARARGS, bytes_rpartition__doc__}, + +static PyObject * +bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep); static PyObject * -bytes_rpartition(PyBytesObject *self, PyObject *sep_obj) +bytes_rpartition(PyBytesObject *self, PyObject *args) { + PyObject *return_value = NULL; Py_buffer sep = {NULL, NULL}; - PyObject *res; - if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0) - return NULL; + if (!PyArg_ParseTuple(args, + "y*:rpartition", + &sep)) + goto exit; + return_value = bytes_rpartition_impl(self, &sep); + +exit: + /* Cleanup for sep */ + if (sep.obj) + PyBuffer_Release(&sep); + + return return_value; +} - res = stringlib_rpartition( +static PyObject * +bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep) +/*[clinic end generated code: output=57b169dc47fa90e8 input=6588fff262a9170e]*/ +{ + return stringlib_rpartition( (PyObject*) self, PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), - sep_obj, sep.buf, sep.len + sep->obj, (const char *)sep->buf, sep->len ); - PyBuffer_Release(&sep); - return res; } -PyDoc_STRVAR(rsplit__doc__, -"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\ -\n\ -Return a list of the sections in B, using sep as the delimiter,\n\ -starting at the end of B and working to the front.\n\ -If sep is not given, B is split on ASCII whitespace characters\n\ -(space, tab, return, newline, formfeed, vertical tab).\n\ -If maxsplit is given, at most maxsplit splits are done."); +/*[clinic input] +bytes.rsplit = bytes.split + +Return a list of the sections in the bytes, using sep as the delimiter. + +Splitting is done starting at the end of the bytes and working to the front. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_rsplit__doc__, +"rsplit($self, /, sep=None, maxsplit=-1)\n" +"--\n" +"\n" +"Return a list of the sections in the bytes, using sep as the delimiter.\n" +"\n" +" sep\n" +" The delimiter according which to split the bytes.\n" +" None (the default value) means split on ASCII whitespace characters\n" +" (space, tab, return, newline, formfeed, vertical tab).\n" +" maxsplit\n" +" Maximum number of splits to do.\n" +" -1 (the default value) means no limit.\n" +"\n" +"Splitting is done starting at the end of the bytes and working to the front."); + +#define BYTES_RSPLIT_METHODDEF \ + {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__}, +static PyObject * +bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit); static PyObject * -bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds) +bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"sep", "maxsplit", 0}; - Py_ssize_t len = PyBytes_GET_SIZE(self), n; + PyObject *return_value = NULL; + static char *_keywords[] = {"sep", "maxsplit", NULL}; + PyObject *sep = Py_None; Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|On:rsplit", _keywords, + &sep, &maxsplit)) + goto exit; + return_value = bytes_rsplit_impl(self, sep, maxsplit); + +exit: + return return_value; +} + +static PyObject * +bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit) +/*[clinic end generated code: output=f86feddedbd7b26d input=0f86c9f28f7d7b7b]*/ +{ + Py_ssize_t len = PyBytes_GET_SIZE(self), n; const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; - PyObject *list, *subobj = Py_None; + PyObject *list; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit", - kwlist, &subobj, &maxsplit)) - return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; - if (subobj == Py_None) + if (sep == Py_None) return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit); - if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0) + if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0) return NULL; sub = vsub.buf; n = vsub.len; @@ -1120,16 +1836,41 @@ bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds) } -PyDoc_STRVAR(join__doc__, -"B.join(iterable_of_bytes) -> bytes\n\ -\n\ -Concatenate any number of bytes objects, with B in between each pair.\n\ -Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'."); +/*[clinic input] +bytes.join + + iterable_of_bytes: object + / + +Concatenate any number of bytes objects. + +The bytes whose method is called is inserted in between each pair. + +The result is returned as a new bytes object. + +Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_join__doc__, +"join($self, iterable_of_bytes, /)\n" +"--\n" +"\n" +"Concatenate any number of bytes objects.\n" +"\n" +"The bytes whose method is called is inserted in between each pair.\n" +"\n" +"The result is returned as a new bytes object.\n" +"\n" +"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'."); + +#define BYTES_JOIN_METHODDEF \ + {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__}, static PyObject * -bytes_join(PyObject *self, PyObject *iterable) +bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes) +/*[clinic end generated code: output=e541a14a8da97908 input=7fe377b95bd549d2]*/ { - return stringlib_bytes_join(self, iterable); + return stringlib_bytes_join((PyObject*)self, iterable_of_bytes); } PyObject * @@ -1137,7 +1878,7 @@ _PyBytes_Join(PyObject *sep, PyObject *x) { assert(sep != NULL && PyBytes_Check(sep)); assert(x != NULL); - return bytes_join(sep, x); + return bytes_join((PyBytesObject*)sep, x); } /* helper macro to fixup start/end slice values */ @@ -1348,62 +2089,159 @@ do_strip(PyBytesObject *self, int striptype) Py_LOCAL_INLINE(PyObject *) -do_argstrip(PyBytesObject *self, int striptype, PyObject *args) +do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes) { - PyObject *sep = NULL; - - if (!PyArg_ParseTuple(args, stripformat[striptype], &sep)) - return NULL; - - if (sep != NULL && sep != Py_None) { - return do_xstrip(self, striptype, sep); + if (bytes != NULL && bytes != Py_None) { + return do_xstrip(self, striptype, bytes); } return do_strip(self, striptype); } +/*[clinic input] +bytes.strip + + self: self(type="PyBytesObject *") + bytes: object = None + / + +Strip leading and trailing bytes contained in the argument. + +If the argument is omitted or None, strip leading and trailing ASCII whitespace. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_strip__doc__, +"strip($self, bytes=None, /)\n" +"--\n" +"\n" +"Strip leading and trailing bytes contained in the argument.\n" +"\n" +"If the argument is omitted or None, strip leading and trailing ASCII whitespace."); + +#define BYTES_STRIP_METHODDEF \ + {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__}, + +static PyObject * +bytes_strip_impl(PyBytesObject *self, PyObject *bytes); -PyDoc_STRVAR(strip__doc__, -"B.strip([bytes]) -> bytes\n\ -\n\ -Strip leading and trailing bytes contained in the argument.\n\ -If the argument is omitted, strip leading and trailing ASCII whitespace."); static PyObject * bytes_strip(PyBytesObject *self, PyObject *args) { - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, BOTHSTRIP); /* Common case */ - else - return do_argstrip(self, BOTHSTRIP, args); + PyObject *return_value = NULL; + PyObject *bytes = Py_None; + + if (!PyArg_UnpackTuple(args, "strip", + 0, 1, + &bytes)) + goto exit; + return_value = bytes_strip_impl(self, bytes); + +exit: + return return_value; +} + +static PyObject * +bytes_strip_impl(PyBytesObject *self, PyObject *bytes) +/*[clinic end generated code: output=c8234a599ba5ec35 input=37daa5fad1395d95]*/ +{ + return do_argstrip(self, BOTHSTRIP, bytes); } +/*[clinic input] +bytes.lstrip + + self: self(type="PyBytesObject *") + bytes: object = None + / + +Strip leading bytes contained in the argument. + +If the argument is omitted or None, strip leading ASCII whitespace. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_lstrip__doc__, +"lstrip($self, bytes=None, /)\n" +"--\n" +"\n" +"Strip leading bytes contained in the argument.\n" +"\n" +"If the argument is omitted or None, strip leading ASCII whitespace."); + +#define BYTES_LSTRIP_METHODDEF \ + {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__}, + +static PyObject * +bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes); -PyDoc_STRVAR(lstrip__doc__, -"B.lstrip([bytes]) -> bytes\n\ -\n\ -Strip leading bytes contained in the argument.\n\ -If the argument is omitted, strip leading ASCII whitespace."); static PyObject * bytes_lstrip(PyBytesObject *self, PyObject *args) { - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, LEFTSTRIP); /* Common case */ - else - return do_argstrip(self, LEFTSTRIP, args); + PyObject *return_value = NULL; + PyObject *bytes = Py_None; + + if (!PyArg_UnpackTuple(args, "lstrip", + 0, 1, + &bytes)) + goto exit; + return_value = bytes_lstrip_impl(self, bytes); + +exit: + return return_value; +} + +static PyObject * +bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes) +/*[clinic end generated code: output=529e8511ab6f1115 input=88811b09dfbc2988]*/ +{ + return do_argstrip(self, LEFTSTRIP, bytes); } +/*[clinic input] +bytes.rstrip + + self: self(type="PyBytesObject *") + bytes: object = None + / + +Strip trailing bytes contained in the argument. + +If the argument is omitted or None, strip trailing ASCII whitespace. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_rstrip__doc__, +"rstrip($self, bytes=None, /)\n" +"--\n" +"\n" +"Strip trailing bytes contained in the argument.\n" +"\n" +"If the argument is omitted or None, strip trailing ASCII whitespace."); + +#define BYTES_RSTRIP_METHODDEF \ + {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__}, + +static PyObject * +bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes); -PyDoc_STRVAR(rstrip__doc__, -"B.rstrip([bytes]) -> bytes\n\ -\n\ -Strip trailing bytes contained in the argument.\n\ -If the argument is omitted, strip trailing ASCII whitespace."); static PyObject * bytes_rstrip(PyBytesObject *self, PyObject *args) { - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, RIGHTSTRIP); /* Common case */ - else - return do_argstrip(self, RIGHTSTRIP, args); + PyObject *return_value = NULL; + PyObject *bytes = Py_None; + + if (!PyArg_UnpackTuple(args, "rstrip", + 0, 1, + &bytes)) + goto exit; + return_value = bytes_rstrip_impl(self, bytes); + +exit: + return return_value; +} + +static PyObject * +bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes) +/*[clinic end generated code: output=e98730bd133e6593 input=8f93c9cd361f0140]*/ +{ + return do_argstrip(self, RIGHTSTRIP, bytes); } @@ -1455,45 +2293,94 @@ bytes_count(PyBytesObject *self, PyObject *args) } -PyDoc_STRVAR(translate__doc__, -"B.translate(table[, deletechars]) -> bytes\n\ -\n\ -Return a copy of B, where all characters occurring in the\n\ -optional argument deletechars are removed, and the remaining\n\ -characters have been mapped through the given translation\n\ -table, which must be a bytes object of length 256."); +/*[clinic input] +bytes.translate + + self: self(type="PyBytesObject *") + table: object + Translation table, which must be a bytes object of length 256. + [ + deletechars: object + ] + / + +Return a copy with each character mapped by the given translation table. + +All characters occurring in the optional argument deletechars are removed. +The remaining characters are mapped through the given translation table. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_translate__doc__, +"translate(table, [deletechars])\n" +"Return a copy with each character mapped by the given translation table.\n" +"\n" +" table\n" +" Translation table, which must be a bytes object of length 256.\n" +"\n" +"All characters occurring in the optional argument deletechars are removed.\n" +"The remaining characters are mapped through the given translation table."); + +#define BYTES_TRANSLATE_METHODDEF \ + {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__}, + +static PyObject * +bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars); static PyObject * bytes_translate(PyBytesObject *self, PyObject *args) { + PyObject *return_value = NULL; + PyObject *table; + int group_right_1 = 0; + PyObject *deletechars = NULL; + + switch (PyTuple_GET_SIZE(args)) { + case 1: + if (!PyArg_ParseTuple(args, "O:translate", &table)) + goto exit; + break; + case 2: + if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars)) + goto exit; + group_right_1 = 1; + break; + default: + PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments"); + goto exit; + } + return_value = bytes_translate_impl(self, table, group_right_1, deletechars); + +exit: + return return_value; +} + +static PyObject * +bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars) +/*[clinic end generated code: output=f0f29a57f41df5d8 input=d8fa5519d7cc4be7]*/ +{ char *input, *output; Py_buffer table_view = {NULL, NULL}; Py_buffer del_table_view = {NULL, NULL}; - const char *table; + const char *table_chars; Py_ssize_t i, c, changed = 0; PyObject *input_obj = (PyObject*)self; - const char *output_start, *del_table=NULL; + const char *output_start, *del_table_chars=NULL; Py_ssize_t inlen, tablen, dellen = 0; PyObject *result; int trans_table[256]; - PyObject *tableobj, *delobj = NULL; - if (!PyArg_UnpackTuple(args, "translate", 1, 2, - &tableobj, &delobj)) - return NULL; - - if (PyBytes_Check(tableobj)) { - table = PyBytes_AS_STRING(tableobj); - tablen = PyBytes_GET_SIZE(tableobj); + if (PyBytes_Check(table)) { + table_chars = PyBytes_AS_STRING(table); + tablen = PyBytes_GET_SIZE(table); } - else if (tableobj == Py_None) { - table = NULL; + else if (table == Py_None) { + table_chars = NULL; tablen = 256; } else { - if (PyObject_GetBuffer(tableobj, &table_view, PyBUF_SIMPLE) != 0) + if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0) return NULL; - table = table_view.buf; + table_chars = table_view.buf; tablen = table_view.len; } @@ -1504,22 +2391,22 @@ bytes_translate(PyBytesObject *self, PyObject *args) return NULL; } - if (delobj != NULL) { - if (PyBytes_Check(delobj)) { - del_table = PyBytes_AS_STRING(delobj); - dellen = PyBytes_GET_SIZE(delobj); + if (deletechars != NULL) { + if (PyBytes_Check(deletechars)) { + del_table_chars = PyBytes_AS_STRING(deletechars); + dellen = PyBytes_GET_SIZE(deletechars); } else { - if (PyObject_GetBuffer(delobj, &del_table_view, PyBUF_SIMPLE) != 0) { + if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) { PyBuffer_Release(&table_view); return NULL; } - del_table = del_table_view.buf; + del_table_chars = del_table_view.buf; dellen = del_table_view.len; } } else { - del_table = NULL; + del_table_chars = NULL; dellen = 0; } @@ -1533,11 +2420,11 @@ bytes_translate(PyBytesObject *self, PyObject *args) output_start = output = PyBytes_AsString(result); input = PyBytes_AS_STRING(input_obj); - if (dellen == 0 && table != NULL) { + if (dellen == 0 && table_chars != NULL) { /* If no deletions are required, use faster code */ for (i = inlen; --i >= 0; ) { c = Py_CHARMASK(*input++); - if (Py_CHARMASK((*output++ = table[c])) != c) + if (Py_CHARMASK((*output++ = table_chars[c])) != c) changed = 1; } if (!changed && PyBytes_CheckExact(input_obj)) { @@ -1550,17 +2437,17 @@ bytes_translate(PyBytesObject *self, PyObject *args) return result; } - if (table == NULL) { + if (table_chars == NULL) { for (i = 0; i < 256; i++) trans_table[i] = Py_CHARMASK(i); } else { for (i = 0; i < 256; i++) - trans_table[i] = Py_CHARMASK(table[i]); + trans_table[i] = Py_CHARMASK(table_chars[i]); } PyBuffer_Release(&table_view); for (i = 0; i < dellen; i++) - trans_table[(int) Py_CHARMASK(del_table[i])] = -1; + trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1; PyBuffer_Release(&del_table_view); for (i = inlen; --i >= 0; ) { @@ -1582,10 +2469,69 @@ bytes_translate(PyBytesObject *self, PyObject *args) } +/*[clinic input] + +@staticmethod +bytes.maketrans + + frm: Py_buffer + to: Py_buffer + / + +Return a translation table useable for the bytes or bytearray translate method. + +The returned table will be one where each byte in frm is mapped to the byte at +the same position in to. + +The bytes objects frm and to must be of the same length. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_maketrans__doc__, +"maketrans(frm, to, /)\n" +"--\n" +"\n" +"Return a translation table useable for the bytes or bytearray translate method.\n" +"\n" +"The returned table will be one where each byte in frm is mapped to the byte at\n" +"the same position in to.\n" +"\n" +"The bytes objects frm and to must be of the same length."); + +#define BYTES_MAKETRANS_METHODDEF \ + {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__}, + static PyObject * -bytes_maketrans(PyObject *null, PyObject *args) +bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to); + +static PyObject * +bytes_maketrans(void *null, PyObject *args) { - return _Py_bytes_maketrans(args); + PyObject *return_value = NULL; + Py_buffer frm = {NULL, NULL}; + Py_buffer to = {NULL, NULL}; + + if (!PyArg_ParseTuple(args, + "y*y*:maketrans", + &frm, &to)) + goto exit; + return_value = bytes_maketrans_impl(&frm, &to); + +exit: + /* Cleanup for frm */ + if (frm.obj) + PyBuffer_Release(&frm); + /* Cleanup for to */ + if (to.obj) + PyBuffer_Release(&to); + + return return_value; +} + +static PyObject * +bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to) +/*[clinic end generated code: output=7df47390c476ac60 input=de7a8fc5632bb8f1]*/ +{ + return _Py_bytes_maketrans(frm, to); } /* find and count characters and substrings */ @@ -2080,31 +3026,74 @@ replace(PyBytesObject *self, } } -PyDoc_STRVAR(replace__doc__, -"B.replace(old, new[, count]) -> bytes\n\ -\n\ -Return a copy of B with all occurrences of subsection\n\ -old replaced by new. If the optional argument count is\n\ -given, only first count occurances are replaced."); + +/*[clinic input] +bytes.replace + + old: Py_buffer + new: Py_buffer + count: Py_ssize_t = -1 + Maximum number of occurrences to replace. + -1 (the default value) means replace all occurrences. + / + +Return a copy with all occurrences of substring old replaced by new. + +If the optional argument count is given, only the first count occurrences are +replaced. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_replace__doc__, +"replace($self, old, new, count=-1, /)\n" +"--\n" +"\n" +"Return a copy with all occurrences of substring old replaced by new.\n" +"\n" +" count\n" +" Maximum number of occurrences to replace.\n" +" -1 (the default value) means replace all occurrences.\n" +"\n" +"If the optional argument count is given, only the first count occurrences are\n" +"replaced."); + +#define BYTES_REPLACE_METHODDEF \ + {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__}, static PyObject * -bytes_replace(PyBytesObject *self, PyObject *args) +bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count); + +static PyObject * +bytes_replace(PyBytesObject*self, PyObject *args) { - PyObject *res; + PyObject *return_value = NULL; Py_buffer old = {NULL, NULL}; Py_buffer new = {NULL, NULL}; Py_ssize_t count = -1; - if (!PyArg_ParseTuple(args, "y*y*|n:replace", &old, &new, &count)) - return NULL; - - res = (PyObject *)replace((PyBytesObject *) self, - (const char *)old.buf, old.len, - (const char *)new.buf, new.len, count); + if (!PyArg_ParseTuple(args, + "y*y*|n:replace", + &old, &new, &count)) + goto exit; + return_value = bytes_replace_impl(self, &old, &new, count); + +exit: + /* Cleanup for old */ + if (old.obj) + PyBuffer_Release(&old); + /* Cleanup for new */ + if (new.obj) + PyBuffer_Release(&new); + + return return_value; +} - PyBuffer_Release(&old); - PyBuffer_Release(&new); - return res; +static PyObject * +bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count) +/*[clinic end generated code: output=f07bd9ecf29ee8d8 input=b2fbbf0bf04de8e5]*/ +{ + return (PyObject *)replace((PyBytesObject *) self, + (const char *)old->buf, old->len, + (const char *)new->buf, new->len, count); } /** End DALKE **/ @@ -2251,60 +3240,121 @@ bytes_endswith(PyBytesObject *self, PyObject *args) } -PyDoc_STRVAR(decode__doc__, -"B.decode(encoding='utf-8', errors='strict') -> str\n\ -\n\ -Decode B using the codec registered for encoding. Default encoding\n\ -is 'utf-8'. errors may be given to set a different error\n\ -handling scheme. Default is 'strict' meaning that encoding errors raise\n\ -a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ -as well as any other name registerd with codecs.register_error that is\n\ -able to handle UnicodeDecodeErrors."); +/*[clinic input] +bytes.decode + + encoding: str(c_default="NULL") = 'utf-8' + The encoding with which to decode the bytes. + errors: str(c_default="NULL") = 'strict' + The error handling scheme to use for the handling of decoding errors. + The default is 'strict' meaning that decoding errors raise a + UnicodeDecodeError. Other possible values are 'ignore' and 'replace' + as well as any other name registered with codecs.register_error that + can handle UnicodeDecodeErrors. + +Decode the bytes using the codec registered for encoding. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_decode__doc__, +"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n" +"--\n" +"\n" +"Decode the bytes using the codec registered for encoding.\n" +"\n" +" encoding\n" +" The encoding with which to decode the bytes.\n" +" errors\n" +" The error handling scheme to use for the handling of decoding errors.\n" +" The default is \'strict\' meaning that decoding errors raise a\n" +" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n" +" as well as any other name registered with codecs.register_error that\n" +" can handle UnicodeDecodeErrors."); + +#define BYTES_DECODE_METHODDEF \ + {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__}, static PyObject * -bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs) +bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors); + +static PyObject * +bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs) { + PyObject *return_value = NULL; + static char *_keywords[] = {"encoding", "errors", NULL}; const char *encoding = NULL; const char *errors = NULL; - static char *kwlist[] = {"encoding", "errors", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors)) - return NULL; - return PyUnicode_FromEncodedObject(self, encoding, errors); + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|ss:decode", _keywords, + &encoding, &errors)) + goto exit; + return_value = bytes_decode_impl(self, encoding, errors); + +exit: + return return_value; } +static PyObject * +bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors) +/*[clinic end generated code: output=61a80290bbfce696 input=958174769d2a40ca]*/ +{ + return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors); +} -PyDoc_STRVAR(splitlines__doc__, -"B.splitlines([keepends]) -> list of lines\n\ -\n\ -Return a list of the lines in B, breaking at line boundaries.\n\ -Line breaks are not included in the resulting list unless keepends\n\ -is given and true."); -static PyObject* -bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds) +/*[clinic input] +bytes.splitlines + + keepends: int(py_default="False") = 0 + +Return a list of the lines in the bytes, breaking at line boundaries. + +Line breaks are not included in the resulting list unless keepends is given and +true. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_splitlines__doc__, +"splitlines($self, /, keepends=False)\n" +"--\n" +"\n" +"Return a list of the lines in the bytes, breaking at line boundaries.\n" +"\n" +"Line breaks are not included in the resulting list unless keepends is given and\n" +"true."); + +#define BYTES_SPLITLINES_METHODDEF \ + {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__}, + +static PyObject * +bytes_splitlines_impl(PyBytesObject*self, int keepends); + +static PyObject * +bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"keepends", 0}; + PyObject *return_value = NULL; + static char *_keywords[] = {"keepends", NULL}; int keepends = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines", - kwlist, &keepends)) - return NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|i:splitlines", _keywords, + &keepends)) + goto exit; + return_value = bytes_splitlines_impl(self, keepends); +exit: + return return_value; +} + +static PyObject * +bytes_splitlines_impl(PyBytesObject*self, int keepends) +/*[clinic end generated code: output=79da057d05d126de input=ddb93e3351080c8c]*/ +{ return stringlib_splitlines( (PyObject*) self, PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), keepends ); } - -PyDoc_STRVAR(fromhex_doc, -"bytes.fromhex(string) -> bytes\n\ -\n\ -Create a bytes object from a string of hexadecimal numbers.\n\ -Spaces between two numbers are accepted.\n\ -Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'."); - static int hex_digit_to_int(Py_UCS4 c) { @@ -2321,24 +3371,67 @@ hex_digit_to_int(Py_UCS4 c) return -1; } +/*[clinic input] +@classmethod +bytes.fromhex + + string: unicode + / + +Create a bytes object from a string of hexadecimal numbers. + +Spaces between two numbers are accepted. +Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'. +[clinic start generated code]*/ + +PyDoc_STRVAR(bytes_fromhex__doc__, +"fromhex($type, string, /)\n" +"--\n" +"\n" +"Create a bytes object from a string of hexadecimal numbers.\n" +"\n" +"Spaces between two numbers are accepted.\n" +"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\\\xb9\\\\x01\\\\xef\'."); + +#define BYTES_FROMHEX_METHODDEF \ + {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__}, + static PyObject * -bytes_fromhex(PyObject *cls, PyObject *args) +bytes_fromhex_impl(PyTypeObject *type, PyObject *string); + +static PyObject * +bytes_fromhex(PyTypeObject *type, PyObject *args) { - PyObject *newstring, *hexobj; + PyObject *return_value = NULL; + PyObject *string; + + if (!PyArg_ParseTuple(args, + "U:fromhex", + &string)) + goto exit; + return_value = bytes_fromhex_impl(type, string); + +exit: + return return_value; +} + +static PyObject * +bytes_fromhex_impl(PyTypeObject *type, PyObject *string) +/*[clinic end generated code: output=09e6cbef56cbbb65 input=bf4d1c361670acd3]*/ +{ + PyObject *newstring; char *buf; Py_ssize_t hexlen, byteslen, i, j; int top, bot; void *data; unsigned int kind; - if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj)) - return NULL; - assert(PyUnicode_Check(hexobj)); - if (PyUnicode_READY(hexobj)) + assert(PyUnicode_Check(string)); + if (PyUnicode_READY(string)) return NULL; - kind = PyUnicode_KIND(hexobj); - data = PyUnicode_DATA(hexobj); - hexlen = PyUnicode_GET_LENGTH(hexobj); + kind = PyUnicode_KIND(string); + data = PyUnicode_DATA(string); + hexlen = PyUnicode_GET_LENGTH(string); byteslen = hexlen/2; /* This overestimates if there are spaces */ newstring = PyBytes_FromStringAndSize(NULL, byteslen); @@ -2384,14 +3477,13 @@ bytes_methods[] = { _Py_capitalize__doc__}, {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__}, {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__}, - {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, + BYTES_DECODE_METHODDEF {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__}, {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS, expandtabs__doc__}, {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__}, - {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, - fromhex_doc}, + BYTES_FROMHEX_METHODDEF {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__}, {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS, _Py_isalnum__doc__}, @@ -2407,38 +3499,49 @@ bytes_methods[] = { _Py_istitle__doc__}, {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS, _Py_isupper__doc__}, - {"join", (PyCFunction)bytes_join, METH_O, join__doc__}, + BYTES_JOIN_METHODDEF {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__}, {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, - {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__}, - {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, - _Py_maketrans__doc__}, - {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__}, - {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__}, + BYTES_LSTRIP_METHODDEF + BYTES_MAKETRANS_METHODDEF + BYTES_PARTITION_METHODDEF + BYTES_REPLACE_METHODDEF {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__}, {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__}, {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, - {"rpartition", (PyCFunction)bytes_rpartition, METH_O, - rpartition__doc__}, - {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__}, - {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__}, - {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, - {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS, - splitlines__doc__}, + BYTES_RPARTITION_METHODDEF + BYTES_RSPLIT_METHODDEF + BYTES_RSTRIP_METHODDEF + BYTES_SPLIT_METHODDEF + BYTES_SPLITLINES_METHODDEF {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS, startswith__doc__}, - {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__}, + BYTES_STRIP_METHODDEF {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS, _Py_swapcase__doc__}, {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__}, - {"translate", (PyCFunction)bytes_translate, METH_VARARGS, - translate__doc__}, + BYTES_TRANSLATE_METHODDEF {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__}, {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__}, {NULL, NULL} /* sentinel */ }; static PyObject * +bytes_mod(PyObject *v, PyObject *w) +{ + if (!PyBytes_Check(v)) + Py_RETURN_NOTIMPLEMENTED; + return _PyBytes_Format(v, w); +} + +static PyNumberMethods bytes_as_number = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + bytes_mod, /*nb_remainder*/ +}; + +static PyObject * str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static PyObject * @@ -2465,7 +3568,7 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) "argument"); return NULL; } - return PyBytes_FromString(""); + return PyBytes_FromStringAndSize(NULL, 0); } if (PyUnicode_Check(x)) { @@ -2522,11 +3625,9 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } else { - new = PyBytes_FromStringAndSize(NULL, size); + new = _PyBytes_FromSize(size, 1); if (new == NULL) return NULL; - if (size > 0) - memset(((PyBytesObject*)new)->ob_sval, 0, size); return new; } @@ -2624,10 +3725,12 @@ PyBytes_FromObject(PyObject *x) returning a shared empty bytes string. This required because we want to call _PyBytes_Resize() the returned object, which we can only do on bytes objects with refcount == 1. */ - size += 1; + if (size == 0) + size = 1; new = PyBytes_FromStringAndSize(NULL, size); if (new == NULL) return NULL; + assert(Py_REFCNT(new) == 1); /* Get the iterator */ it = PyObject_GetIter(x); @@ -2729,7 +3832,7 @@ PyTypeObject PyBytes_Type = { 0, /* tp_setattr */ 0, /* tp_reserved */ (reprfunc)bytes_repr, /* tp_repr */ - 0, /* tp_as_number */ + &bytes_as_number, /* tp_as_number */ &bytes_as_sequence, /* tp_as_sequence */ &bytes_as_mapping, /* tp_as_mapping */ (hashfunc)bytes_hash, /* tp_hash */ @@ -2764,7 +3867,6 @@ PyTypeObject PyBytes_Type = { void PyBytes_Concat(PyObject **pv, PyObject *w) { - PyObject *v; assert(pv != NULL); if (*pv == NULL) return; @@ -2772,9 +3874,45 @@ PyBytes_Concat(PyObject **pv, PyObject *w) Py_CLEAR(*pv); return; } - v = bytes_concat(*pv, w); - Py_DECREF(*pv); - *pv = v; + + if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) { + /* Only one reference, so we can resize in place */ + Py_ssize_t oldsize; + Py_buffer wb; + + wb.len = -1; + if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) { + PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s", + Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name); + Py_CLEAR(*pv); + return; + } + + oldsize = PyBytes_GET_SIZE(*pv); + if (oldsize > PY_SSIZE_T_MAX - wb.len) { + PyErr_NoMemory(); + goto error; + } + if (_PyBytes_Resize(pv, oldsize + wb.len) < 0) + goto error; + + memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len); + PyBuffer_Release(&wb); + return; + + error: + PyBuffer_Release(&wb); + Py_CLEAR(*pv); + return; + } + + else { + /* Multiple references, need to create new object */ + PyObject *v; + v = bytes_concat(*pv, w); + Py_DECREF(*pv); + *pv = v; + } } void @@ -2785,14 +3923,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w) } -/* The following function breaks the notion that strings are immutable: - it changes the size of a string. We get away with this only if there +/* The following function breaks the notion that bytes are immutable: + it changes the size of a bytes object. We get away with this only if there is only one module referencing the object. You can also think of it - as creating a new string object and destroying the old one, only - more efficiently. In any case, don't use this if the string may + as creating a new bytes object and destroying the old one, only + more efficiently. In any case, don't use this if the bytes object may already be known to some other part of the code... - Note that if there's not enough memory to resize the string, the original - string object at *pv is deallocated, *pv is set to NULL, an "out of + Note that if there's not enough memory to resize the bytes object, the + original bytes object at *pv is deallocated, *pv is set to NULL, an "out of memory" exception is set, and -1 is returned. Else (on success) 0 is returned, and the value in *pv may or may not be the same as on input. As always, an extra byte is allocated for a trailing \0 byte (newsize @@ -2815,7 +3953,7 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) _Py_DEC_REFTOTAL; _Py_ForgetReference(v); *pv = (PyObject *) - PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize); + PyObject_REALLOC(v, PyBytesObject_SIZE + newsize); if (*pv == NULL) { PyObject_Del(v); PyErr_NoMemory(); |