diff options
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r-- | Objects/bytesobject.c | 508 |
1 files changed, 3 insertions, 505 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 2bf39e3529..aeddf53a17 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2198,508 +2198,6 @@ bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to) return _Py_bytes_maketrans(frm, to); } -/* find and count characters and substrings */ - -#define findchar(target, target_len, c) \ - ((char *)memchr((const void *)(target), c, target_len)) - -/* String ops must return a string. */ -/* If the object is subclass of string, create a copy */ -Py_LOCAL(PyBytesObject *) -return_self(PyBytesObject *self) -{ - if (PyBytes_CheckExact(self)) { - Py_INCREF(self); - return self; - } - return (PyBytesObject *)PyBytes_FromStringAndSize( - PyBytes_AS_STRING(self), - PyBytes_GET_SIZE(self)); -} - -Py_LOCAL_INLINE(Py_ssize_t) -countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount) -{ - Py_ssize_t count=0; - const char *start=target; - const char *end=target+target_len; - - while ( (start=findchar(start, end-start, c)) != NULL ) { - count++; - if (count >= maxcount) - break; - start += 1; - } - return count; -} - - -/* Algorithms for different cases of string replacement */ - -/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_interleave(PyBytesObject *self, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - Py_ssize_t self_len, result_len; - Py_ssize_t count, i; - PyBytesObject *result; - - self_len = PyBytes_GET_SIZE(self); - - /* 1 at the end plus 1 after every character; - count = min(maxcount, self_len + 1) */ - if (maxcount <= self_len) - count = maxcount; - else - /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ - count = self_len + 1; - - /* Check for overflow */ - /* result_len = count * to_len + self_len; */ - assert(count > 0); - if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, - "replacement bytes are too long"); - return NULL; - } - result_len = count * to_len + self_len; - - if (! (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) ) - return NULL; - - self_s = PyBytes_AS_STRING(self); - result_s = PyBytes_AS_STRING(result); - - if (to_len > 1) { - /* Lay the first one down (guaranteed this will occur) */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - count -= 1; - - for (i = 0; i < count; i++) { - *result_s++ = *self_s++; - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - } - } - else { - result_s[0] = to_s[0]; - result_s += to_len; - count -= 1; - for (i = 0; i < count; i++) { - *result_s++ = *self_s++; - result_s[0] = to_s[0]; - result_s += to_len; - } - } - - /* Copy the rest of the original string */ - Py_MEMCPY(result_s, self_s, self_len-i); - - return result; -} - -/* Special case for deleting a single character */ -/* len(self)>=1, len(from)==1, to="", maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_delete_single_character(PyBytesObject *self, - char from_c, Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count; - PyBytesObject *result; - - self_len = PyBytes_GET_SIZE(self); - self_s = PyBytes_AS_STRING(self); - - count = countchar(self_s, self_len, from_c, maxcount); - if (count == 0) { - return return_self(self); - } - - result_len = self_len - count; /* from_len == 1 */ - assert(result_len>=0); - - if ( (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - start = next+1; - } - Py_MEMCPY(result_s, start, end-start); - - return result; -} - -/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ - -Py_LOCAL(PyBytesObject *) -replace_delete_substring(PyBytesObject *self, - const char *from_s, Py_ssize_t from_len, - Py_ssize_t maxcount) { - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count, offset; - PyBytesObject *result; - - self_len = PyBytes_GET_SIZE(self); - self_s = PyBytes_AS_STRING(self); - - count = stringlib_count(self_s, self_len, - from_s, from_len, - maxcount); - - if (count == 0) { - /* no matches */ - return return_self(self); - } - - result_len = self_len - (count * from_len); - assert (result_len>=0); - - if ( (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) == NULL ) - return NULL; - - result_s = PyBytes_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset == -1) - break; - next = start + offset; - - Py_MEMCPY(result_s, start, next-start); - - result_s += (next-start); - start = next+from_len; - } - Py_MEMCPY(result_s, start, end-start); - return result; -} - -/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_single_character_in_place(PyBytesObject *self, - char from_c, char to_c, - Py_ssize_t maxcount) -{ - char *self_s, *result_s, *start, *end, *next; - Py_ssize_t self_len; - PyBytesObject *result; - - /* The result string will be the same size */ - self_s = PyBytes_AS_STRING(self); - self_len = PyBytes_GET_SIZE(self); - - next = findchar(self_s, self_len, from_c); - - if (next == NULL) { - /* No matches; return the original string */ - return return_self(self); - } - - /* Need to make a new string */ - result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len); - if (result == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - Py_MEMCPY(result_s, self_s, self_len); - - /* change everything in-place, starting with this one */ - start = result_s + (next-self_s); - *start = to_c; - start++; - end = result_s + self_len; - - while (--maxcount > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - *next = to_c; - start = next+1; - } - - return result; -} - -/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_substring_in_place(PyBytesObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *result_s, *start, *end; - char *self_s; - Py_ssize_t self_len, offset; - PyBytesObject *result; - - /* The result string will be the same size */ - - self_s = PyBytes_AS_STRING(self); - self_len = PyBytes_GET_SIZE(self); - - offset = stringlib_find(self_s, self_len, - from_s, from_len, - 0); - if (offset == -1) { - /* No matches; return the original string */ - return return_self(self); - } - - /* Need to make a new string */ - result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len); - if (result == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - Py_MEMCPY(result_s, self_s, self_len); - - /* change everything in-place, starting with this one */ - start = result_s + offset; - Py_MEMCPY(start, to_s, from_len); - start += from_len; - end = result_s + self_len; - - while ( --maxcount > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset==-1) - break; - Py_MEMCPY(start+offset, to_s, from_len); - start += offset+from_len; - } - - return result; -} - -/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_single_character(PyBytesObject *self, - char from_c, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count; - PyBytesObject *result; - - self_s = PyBytes_AS_STRING(self); - self_len = PyBytes_GET_SIZE(self); - - count = countchar(self_s, self_len, from_c, maxcount); - if (count == 0) { - /* no matches, return unchanged */ - return return_self(self); - } - - /* use the difference between current and new, hence the "-1" */ - /* result_len = self_len + count * (to_len-1) */ - assert(count > 0); - if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, - "replacement bytes are too long"); - return NULL; - } - result_len = self_len + count * (to_len - 1); - - if ( (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - - if (next == start) { - /* replace with the 'to' */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start += 1; - } else { - /* copy the unchanged old then the 'to' */ - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start = next+1; - } - } - /* Copy the remainder of the remaining string */ - Py_MEMCPY(result_s, start, end-start); - - return result; -} - -/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_substring(PyBytesObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) { - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count, offset; - PyBytesObject *result; - - self_s = PyBytes_AS_STRING(self); - self_len = PyBytes_GET_SIZE(self); - - count = stringlib_count(self_s, self_len, - from_s, from_len, - maxcount); - - if (count == 0) { - /* no matches, return unchanged */ - return return_self(self); - } - - /* Check for overflow */ - /* result_len = self_len + count * (to_len-from_len) */ - assert(count > 0); - if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, - "replacement bytes are too long"); - return NULL; - } - result_len = self_len + count * (to_len-from_len); - - if ( (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset == -1) - break; - next = start+offset; - if (next == start) { - /* replace with the 'to' */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start += from_len; - } else { - /* copy the unchanged old then the 'to' */ - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start = next+from_len; - } - } - /* Copy the remainder of the remaining string */ - Py_MEMCPY(result_s, start, end-start); - - return result; -} - - -Py_LOCAL(PyBytesObject *) -replace(PyBytesObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - if (maxcount < 0) { - maxcount = PY_SSIZE_T_MAX; - } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) { - /* nothing to do; return the original string */ - return return_self(self); - } - - if (maxcount == 0 || - (from_len == 0 && to_len == 0)) { - /* nothing to do; return the original string */ - return return_self(self); - } - - /* Handle zero-length special cases */ - - if (from_len == 0) { - /* insert the 'to' string everywhere. */ - /* >>> "Python".replace("", ".") */ - /* '.P.y.t.h.o.n.' */ - return replace_interleave(self, to_s, to_len, maxcount); - } - - /* Except for "".replace("", "A") == "A" there is no way beyond this */ - /* point for an empty self string to generate a non-empty string */ - /* Special case so the remaining code always gets a non-empty string */ - if (PyBytes_GET_SIZE(self) == 0) { - return return_self(self); - } - - if (to_len == 0) { - /* delete all occurrences of 'from' string */ - if (from_len == 1) { - return replace_delete_single_character( - self, from_s[0], maxcount); - } else { - return replace_delete_substring(self, from_s, - from_len, maxcount); - } - } - - /* Handle special case where both strings have the same length */ - - if (from_len == to_len) { - if (from_len == 1) { - return replace_single_character_in_place( - self, - from_s[0], - to_s[0], - maxcount); - } else { - return replace_substring_in_place( - self, from_s, from_len, to_s, to_len, - maxcount); - } - } - - /* Otherwise use the more generic algorithms */ - if (from_len == 1) { - return replace_single_character(self, from_s[0], - to_s, to_len, maxcount); - } else { - /* len('from')>=2, len('to')>=1 */ - return replace_substring(self, from_s, from_len, to_s, to_len, - maxcount); - } -} - /*[clinic input] bytes.replace @@ -2722,9 +2220,9 @@ bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new, Py_ssize_t count) /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/ { - return (PyObject *)replace((PyBytesObject *) self, - (const char *)old->buf, old->len, - (const char *)new->buf, new->len, count); + return stringlib_replace((PyObject *)self, + (const char *)old->buf, old->len, + (const char *)new->buf, new->len, count); } /** End DALKE **/ |