summaryrefslogtreecommitdiff
path: root/Objects/bytesobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r--Objects/bytesobject.c262
1 files changed, 133 insertions, 129 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 62ddf21532..1d2fed7030 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -41,10 +41,6 @@ static PyBytesObject *nullstring;
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
/*
- For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
- parameter `size' denotes number of characters to allocate, not counting any
- null terminating character.
-
For PyBytes_FromString(), the parameter `str' points to a null-terminated
string containing exactly `size' bytes.
@@ -61,8 +57,8 @@ static PyBytesObject *nullstring;
The PyObject member `op->ob_size', which denotes the number of "extra
items" in a variable-size object, will contain the number of bytes
- allocated for string data, not counting the null terminating character. It
- is therefore equal to the equal to the `size' parameter (for
+ allocated for string data, not counting the null terminating character.
+ It is therefore equal to the `size' parameter (for
PyBytes_FromStringAndSize()) or the length of the string in the `str'
parameter (for PyBytes_FromString()).
*/
@@ -568,76 +564,69 @@ PyBytes_AsStringAndSize(register PyObject *obj,
PyObject *
PyBytes_Repr(PyObject *obj, int smartquotes)
{
- static const char *hexdigits = "0123456789abcdef";
register PyBytesObject* op = (PyBytesObject*) obj;
- Py_ssize_t length = Py_SIZE(op);
- size_t newsize;
+ Py_ssize_t i, length = Py_SIZE(op);
+ size_t newsize, squotes, dquotes;
PyObject *v;
- if (length > (PY_SSIZE_T_MAX - 3) / 4) {
+ unsigned char quote, *s, *p;
+
+ /* Compute size of output string */
+ squotes = dquotes = 0;
+ newsize = 3; /* b'' */
+ s = (unsigned char*)op->ob_sval;
+ for (i = 0; i < length; i++) {
+ switch(s[i]) {
+ case '\'': squotes++; newsize++; break;
+ case '"': dquotes++; newsize++; break;
+ case '\\': case '\t': case '\n': case '\r':
+ newsize += 2; break; /* \C */
+ default:
+ if (s[i] < ' ' || s[i] >= 0x7f)
+ newsize += 4; /* \xHH */
+ else
+ newsize++;
+ }
+ }
+ quote = '\'';
+ if (smartquotes && squotes && !dquotes)
+ quote = '"';
+ if (squotes && quote == '\'')
+ newsize += squotes;
+
+ if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
PyErr_SetString(PyExc_OverflowError,
"bytes object is too large to make repr");
return NULL;
}
- newsize = 3 + 4 * length;
- v = PyUnicode_FromUnicode(NULL, newsize);
+
+ v = PyUnicode_New(newsize, 127);
if (v == NULL) {
return NULL;
}
- else {
- register Py_ssize_t i;
- register Py_UNICODE c;
- register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
- int quote;
-
- /* Figure out which quote to use; single is preferred */
- quote = '\'';
- if (smartquotes) {
- char *test, *start;
- start = PyBytes_AS_STRING(op);
- for (test = start; test < start+length; ++test) {
- if (*test == '"') {
- quote = '\''; /* back to single */
- goto decided;
- }
- else if (*test == '\'')
- quote = '"';
- }
- decided:
- ;
- }
-
- *p++ = 'b', *p++ = quote;
- for (i = 0; i < length; i++) {
- /* There's at least enough room for a hex escape
- and a closing quote. */
- assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
- c = op->ob_sval[i];
- if (c == quote || c == '\\')
- *p++ = '\\', *p++ = c;
- else if (c == '\t')
- *p++ = '\\', *p++ = 't';
- else if (c == '\n')
- *p++ = '\\', *p++ = 'n';
- else if (c == '\r')
- *p++ = '\\', *p++ = 'r';
- else if (c < ' ' || c >= 0x7f) {
- *p++ = '\\';
- *p++ = 'x';
- *p++ = hexdigits[(c & 0xf0) >> 4];
- *p++ = hexdigits[c & 0xf];
- }
- else
- *p++ = c;
- }
- assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
- *p++ = quote;
- *p = '\0';
- if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
- Py_DECREF(v);
- return NULL;
+ p = PyUnicode_1BYTE_DATA(v);
+
+ *p++ = 'b', *p++ = quote;
+ for (i = 0; i < length; i++) {
+ unsigned char c = op->ob_sval[i];
+ if (c == quote || c == '\\')
+ *p++ = '\\', *p++ = c;
+ else if (c == '\t')
+ *p++ = '\\', *p++ = 't';
+ else if (c == '\n')
+ *p++ = '\\', *p++ = 'n';
+ else if (c == '\r')
+ *p++ = '\\', *p++ = 'r';
+ else if (c < ' ' || c >= 0x7f) {
+ *p++ = '\\';
+ *p++ = 'x';
+ *p++ = Py_hexdigits[(c & 0xf0) >> 4];
+ *p++ = Py_hexdigits[c & 0xf];
}
- return v;
+ else
+ *p++ = c;
}
+ *p++ = quote;
+ return v;
}
static PyObject *
@@ -871,33 +860,11 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
static Py_hash_t
bytes_hash(PyBytesObject *a)
{
- register Py_ssize_t len;
- register unsigned char *p;
- register Py_hash_t x;
-
- assert(_Py_HashSecret_Initialized);
- if (a->ob_shash != -1)
- return a->ob_shash;
- len = Py_SIZE(a);
- /*
- We make the hash of the empty string be 0, rather than using
- (prefix ^ suffix), since this slightly obfuscates the hash secret
- */
- if (len == 0) {
- a->ob_shash = 0;
- return 0;
- }
- p = (unsigned char *) a->ob_sval;
- x = _Py_HashSecret.prefix;
- x ^= *p << 7;
- while (--len >= 0)
- x = (_PyHASH_MULTIPLIER*x) ^ *p++;
- x ^= Py_SIZE(a);
- x ^= _Py_HashSecret.suffix;
- if (x == -1)
- x = -2;
- a->ob_shash = x;
- return x;
+ if (a->ob_shash == -1) {
+ /* Can't fail */
+ a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
+ }
+ return a->ob_shash;
}
static PyObject*
@@ -1005,7 +972,7 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
PyDoc_STRVAR(split__doc__,
-"B.split([sep[, maxsplit]]) -> list of bytes\n\
+"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
\n\
Return a list of the sections in B, using sep as the delimiter.\n\
If sep is not specified or is None, B is split on ASCII whitespace\n\
@@ -1013,15 +980,17 @@ characters (space, tab, return, newline, formfeed, vertical tab).\n\
If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
-bytes_split(PyBytesObject *self, PyObject *args)
+bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
{
+ static char *kwlist[] = {"sep", "maxsplit", 0};
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1;
const char *s = PyBytes_AS_STRING(self), *sub;
Py_buffer vsub;
PyObject *list, *subobj = Py_None;
- if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
+ kwlist, &subobj, &maxsplit))
return NULL;
if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX;
@@ -1093,7 +1062,7 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
}
PyDoc_STRVAR(rsplit__doc__,
-"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
+"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
\n\
Return a list of the sections in B, using sep as the delimiter,\n\
starting at the end of B and working to the front.\n\
@@ -1103,15 +1072,17 @@ If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
-bytes_rsplit(PyBytesObject *self, PyObject *args)
+bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
{
+ static char *kwlist[] = {"sep", "maxsplit", 0};
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1;
const char *s = PyBytes_AS_STRING(self), *sub;
Py_buffer vsub;
PyObject *list, *subobj = Py_None;
- if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
+ kwlist, &subobj, &maxsplit))
return NULL;
if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX;
@@ -1252,31 +1223,42 @@ Py_LOCAL_INLINE(Py_ssize_t)
bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
{
PyObject *subobj;
+ char byte;
+ Py_buffer subbuf;
const char *sub;
Py_ssize_t sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
+ Py_ssize_t res;
- if (!stringlib_parse_args_finds("find/rfind/index/rindex",
- args, &subobj, &start, &end))
+ if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
+ args, &subobj, &byte, &start, &end))
return -2;
- if (PyBytes_Check(subobj)) {
- sub = PyBytes_AS_STRING(subobj);
- sub_len = PyBytes_GET_SIZE(subobj);
+ if (subobj) {
+ if (_getbuffer(subobj, &subbuf) < 0)
+ return -2;
+
+ sub = subbuf.buf;
+ sub_len = subbuf.len;
+ }
+ else {
+ sub = &byte;
+ sub_len = 1;
}
- else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
- /* XXX - the "expected a character buffer object" is pretty
- confusing for a non-expert. remap to something else ? */
- return -2;
if (dir > 0)
- return stringlib_find_slice(
+ res = stringlib_find_slice(
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
sub, sub_len, start, end);
else
- return stringlib_rfind_slice(
+ res = stringlib_rfind_slice(
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
sub, sub_len, start, end);
+
+ if (subobj)
+ PyBuffer_Release(&subbuf);
+
+ return res;
}
@@ -1502,23 +1484,38 @@ bytes_count(PyBytesObject *self, PyObject *args)
PyObject *sub_obj;
const char *str = PyBytes_AS_STRING(self), *sub;
Py_ssize_t sub_len;
+ char byte;
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
- if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
+ Py_buffer vsub;
+ PyObject *count_obj;
+
+ if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
+ &start, &end))
return NULL;
- if (PyBytes_Check(sub_obj)) {
- sub = PyBytes_AS_STRING(sub_obj);
- sub_len = PyBytes_GET_SIZE(sub_obj);
+ if (sub_obj) {
+ if (_getbuffer(sub_obj, &vsub) < 0)
+ return NULL;
+
+ sub = vsub.buf;
+ sub_len = vsub.len;
+ }
+ else {
+ sub = &byte;
+ sub_len = 1;
}
- else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
- return NULL;
ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
- return PyLong_FromSsize_t(
+ count_obj = PyLong_FromSsize_t(
stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
);
+
+ if (sub_obj)
+ PyBuffer_Release(&vsub);
+
+ return count_obj;
}
@@ -2327,11 +2324,13 @@ Line breaks are not included in the resulting list unless keepends\n\
is given and true.");
static PyObject*
-bytes_splitlines(PyObject *self, PyObject *args)
+bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
{
+ static char *kwlist[] = {"keepends", 0};
int keepends = 0;
- if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
+ kwlist, &keepends))
return NULL;
return stringlib_splitlines(
@@ -2349,7 +2348,7 @@ Spaces between two numbers are accepted.\n\
Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
static int
-hex_digit_to_int(Py_UNICODE c)
+hex_digit_to_int(Py_UCS4 c)
{
if (c >= 128)
return -1;
@@ -2369,15 +2368,20 @@ bytes_fromhex(PyObject *cls, PyObject *args)
{
PyObject *newstring, *hexobj;
char *buf;
- Py_UNICODE *hex;
Py_ssize_t hexlen, byteslen, i, j;
int top, bot;
+ void *data;
+ unsigned int kind;
if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
return NULL;
assert(PyUnicode_Check(hexobj));
- hexlen = PyUnicode_GET_SIZE(hexobj);
- hex = PyUnicode_AS_UNICODE(hexobj);
+ if (PyUnicode_READY(hexobj))
+ return NULL;
+ kind = PyUnicode_KIND(hexobj);
+ data = PyUnicode_DATA(hexobj);
+ hexlen = PyUnicode_GET_LENGTH(hexobj);
+
byteslen = hexlen/2; /* This overestimates if there are spaces */
newstring = PyBytes_FromStringAndSize(NULL, byteslen);
if (!newstring)
@@ -2385,12 +2389,12 @@ bytes_fromhex(PyObject *cls, PyObject *args)
buf = PyBytes_AS_STRING(newstring);
for (i = j = 0; i < hexlen; i += 2) {
/* skip over spaces in the input */
- while (hex[i] == ' ')
+ while (PyUnicode_READ(kind, data, i) == ' ')
i++;
if (i >= hexlen)
break;
- top = hex_digit_to_int(hex[i]);
- bot = hex_digit_to_int(hex[i+1]);
+ top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
+ bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
if (top == -1 || bot == -1) {
PyErr_Format(PyExc_ValueError,
"non-hexadecimal number found in "
@@ -2470,10 +2474,10 @@ bytes_methods[] = {
{"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
{"rpartition", (PyCFunction)bytes_rpartition, METH_O,
rpartition__doc__},
- {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
+ {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
{"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
- {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
- {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
+ {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
+ {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
splitlines__doc__},
{"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
startswith__doc__},
@@ -2931,7 +2935,7 @@ _PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
PyErr_BadInternalCall();
return NULL;
}
- llen = PyUnicode_GetSize(result);
+ llen = PyUnicode_GetLength(result);
if (llen > INT_MAX) {
PyErr_SetString(PyExc_ValueError,
"string too large in _PyBytes_FormatLong");