diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
commit | cf46733632c7279a9fd0fe6ce26f9185a4ae82a9 (patch) | |
tree | da27775a2161723ef342e91af41a8b51fedef405 /subversion/libsvn_subr/string.c | |
parent | bb0ef45f7c46b0ae221b26265ef98a768c33f820 (diff) | |
download | subversion-tarball-master.tar.gz |
subversion-1.9.7HEADsubversion-1.9.7master
Diffstat (limited to 'subversion/libsvn_subr/string.c')
-rw-r--r-- | subversion/libsvn_subr/string.c | 357 |
1 files changed, 284 insertions, 73 deletions
diff --git a/subversion/libsvn_subr/string.c b/subversion/libsvn_subr/string.c index c3d7fec..43a1a4e 100644 --- a/subversion/libsvn_subr/string.c +++ b/subversion/libsvn_subr/string.c @@ -26,6 +26,7 @@ #include <apr.h> +#include <assert.h> #include <string.h> /* for memcpy(), memcmp(), strlen() */ #include <apr_fnmatch.h> @@ -53,9 +54,9 @@ membuf_create(void **data, apr_size_t *size, /* apr_palloc will allocate multiples of 8. * Thus, we would waste some of that memory if we stuck to the * smaller size. Note that this is safe even if apr_palloc would - * use some other aligment or none at all. */ + * use some other alignment or none at all. */ minimum_size = APR_ALIGN_DEFAULT(minimum_size); - *data = (!minimum_size ? NULL : apr_palloc(pool, minimum_size)); + *data = apr_palloc(pool, minimum_size); *size = minimum_size; } @@ -78,14 +79,10 @@ membuf_ensure(void **data, apr_size_t *size, apr_size_t new_size = *size; if (new_size == 0) - /* APR will increase odd allocation sizes to the next - * multiple for 8, for instance. Take advantage of that - * knowledge and allow for the extra size to be used. */ new_size = minimum_size; else while (new_size < minimum_size) { - /* new_size is aligned; doubling it should keep it aligned */ const apr_size_t prev_size = new_size; new_size *= 2; @@ -121,7 +118,10 @@ svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size) const apr_size_t old_size = membuf->size; membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool); - if (membuf->data && old_data && old_data != membuf->data) + + /* If we re-allocated MEMBUF->DATA, it cannot be NULL. + * Statically initialized membuffers (OLD_DATA) may be NULL, though. */ + if (old_data && old_data != membuf->data) memcpy(membuf->data, old_data, old_size); } @@ -151,7 +151,7 @@ string_compare(const char *str1, if (len1 != len2) return FALSE; - /* now the strings must have identical lenghths */ + /* now the strings must have identical lengths */ if ((memcmp(str1, str2, len1)) == 0) return TRUE; @@ -240,7 +240,9 @@ svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool) new_string->data = data; new_string->len = size; - memcpy(data, bytes, size); + /* If SIZE is 0, NULL is valid for BYTES. */ + if (size) + memcpy(data, bytes, size); /* Null termination is the convention -- even if we suspect the data to be binary, it's not up to us to decide, it's the caller's @@ -299,8 +301,9 @@ svn_string_isempty(const svn_string_t *str) svn_string_t * svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool) { - return (svn_string_ncreate(original_string->data, - original_string->len, pool)); + return (original_string ? svn_string_ncreate(original_string->data, + original_string->len, pool) + : NULL); } @@ -393,7 +396,10 @@ svn_stringbuf_t * svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool) { svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool); - memcpy(strbuf->data, bytes, size); + + /* If SIZE is 0, NULL is valid for BYTES. */ + if (size) + memcpy(strbuf->data, bytes, size); /* Null termination is the convention -- even if we suspect the data to be binary, it's not up to us to decide, it's the caller's @@ -418,6 +424,17 @@ svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool) return svn_stringbuf_ncreate(str->data, str->len, pool); } +svn_stringbuf_t * +svn_stringbuf_create_wrap(char *str, apr_pool_t *pool) +{ + svn_stringbuf_t *result = apr_palloc(pool, sizeof(*result)); + result->pool = pool; + result->data = str; + result->len = strlen(str); + result->blocksize = result->len + 1; + + return result; +} svn_stringbuf_t * svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap) @@ -579,6 +596,10 @@ svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes, apr_size_t total_len; void *start_address; + if (!count) + /* Allow BYTES to be NULL by avoiding passing it to memcpy. */ + return; + total_len = str->len + count; /* total size needed */ /* svn_stringbuf_ensure adds 1 for null terminator. */ @@ -595,6 +616,21 @@ svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes, to null-terminate. */ } +void +svn_stringbuf_appendfill(svn_stringbuf_t *str, + char byte, + apr_size_t count) +{ + apr_size_t new_len = str->len + count; + svn_stringbuf_ensure(str, new_len); + + memset(str->data + str->len, byte, count); + + /* update buffer length and always NUL-terminate it */ + str->len = new_len; + str->data[new_len] = '\0'; +} + void svn_stringbuf_appendstr(svn_stringbuf_t *targetstr, @@ -616,23 +652,22 @@ svn_stringbuf_insert(svn_stringbuf_t *str, const char *bytes, apr_size_t count) { + /* For COUNT==0, we allow BYTES to be NULL. It's a no-op in that case. */ + if (count == 0) + return; + + /* special case: BYTES overlaps with this string -> copy the source */ if (bytes + count > str->data && bytes < str->data + str->blocksize) - { - /* special case: BYTES overlaps with this string -> copy the source */ - const char *temp = apr_pmemdup(str->pool, bytes, count); - svn_stringbuf_insert(str, pos, temp, count); - } - else - { - if (pos > str->len) - pos = str->len; + bytes = apr_pmemdup(str->pool, bytes, count); - svn_stringbuf_ensure(str, str->len + count); - memmove(str->data + pos + count, str->data + pos, str->len - pos + 1); - memcpy(str->data + pos, bytes, count); + if (pos > str->len) + pos = str->len; - str->len += count; - } + svn_stringbuf_ensure(str, str->len + count); + memmove(str->data + pos + count, str->data + pos, str->len - pos + 1); + memcpy(str->data + pos, bytes, count); + + str->len += count; } void @@ -642,7 +677,7 @@ svn_stringbuf_remove(svn_stringbuf_t *str, { if (pos > str->len) pos = str->len; - if (pos + count > str->len) + if (count > str->len - pos) count = str->len - pos; memmove(str->data + pos, str->data + pos + count, str->len - pos - count + 1); @@ -656,32 +691,35 @@ svn_stringbuf_replace(svn_stringbuf_t *str, const char *bytes, apr_size_t new_count) { - if (bytes + new_count > str->data && bytes < str->data + str->blocksize) + /* For COUNT==0, we allow BYTES to be NULL. + * In that case, this is just a substring removal. */ + if (new_count == 0) { - /* special case: BYTES overlaps with this string -> copy the source */ - const char *temp = apr_pmemdup(str->pool, bytes, new_count); - svn_stringbuf_replace(str, pos, old_count, temp, new_count); + svn_stringbuf_remove(str, pos, old_count); + return; } - else - { - if (pos > str->len) - pos = str->len; - if (pos + old_count > str->len) - old_count = str->len - pos; - if (old_count < new_count) - { - apr_size_t delta = new_count - old_count; - svn_stringbuf_ensure(str, str->len + delta); - } + /* special case: BYTES overlaps with this string -> copy the source */ + if (bytes + new_count > str->data && bytes < str->data + str->blocksize) + bytes = apr_pmemdup(str->pool, bytes, new_count); - if (old_count != new_count) - memmove(str->data + pos + new_count, str->data + pos + old_count, - str->len - pos - old_count + 1); + if (pos > str->len) + pos = str->len; + if (old_count > str->len - pos) + old_count = str->len - pos; - memcpy(str->data + pos, bytes, new_count); - str->len += new_count - old_count; + if (old_count < new_count) + { + apr_size_t delta = new_count - old_count; + svn_stringbuf_ensure(str, str->len + delta); } + + if (old_count != new_count) + memmove(str->data + pos + new_count, str->data + pos + old_count, + str->len - pos - old_count + 1); + + memcpy(str->data + pos, bytes, new_count); + str->len += new_count - old_count; } @@ -832,7 +870,7 @@ char * svn_cstring_tokenize(const char *sep, char **str) { char *token; - const char * next; + char *next; char csep; /* check parameters */ @@ -862,8 +900,8 @@ svn_cstring_tokenize(const char *sep, char **str) } else { - *(char *)next = '\0'; - *str = (char *)next + 1; + *next = '\0'; + *str = next + 1; } return token; @@ -1014,17 +1052,33 @@ svn_cstring_atoi(int *n, const char *str) return SVN_NO_ERROR; } - -apr_status_t -svn__strtoff(apr_off_t *offset, const char *buf, char **end, int base) +unsigned long +svn__strtoul(const char* buffer, const char** end) { -#if !APR_VERSION_AT_LEAST(1,0,0) - errno = 0; - *offset = strtol(buf, end, base); - return APR_FROM_OS_ERROR(errno); -#else - return apr_strtoff(offset, buf, end, base); -#endif + unsigned long result = 0; + + /* this loop will execute in just 2 CPU cycles, confirmed by measurement: + 7 macro-ops (max 4 / cycle => 2 cycles) + 1 load (max 1 / cycle) + 1 jumps (compare + conditional jump == 1 macro op; max 1 / cycle) + 2 arithmetic ops (subtract, increment; max 3 / cycle) + 2 scale-and-add AGU ops (max 3 / cycle) + 1 compiler-generated move operation + dependency chain: temp = result * 4 + result; result = temp * 2 + c + (2 ops with latency 1 => 2 cycles) + */ + while (1) + { + unsigned long c = (unsigned char)*buffer - (unsigned char)'0'; + if (c > 9) + break; + + result = result * 10 + c; + ++buffer; + } + + *end = buffer; + return result; } /* "Precalculated" itoa values for 2 places (including leading zeros). @@ -1123,11 +1177,11 @@ svn__i64toa(char * dest, apr_int64_t number) return svn__ui64toa(dest, (apr_uint64_t)number); *dest = '-'; - return svn__ui64toa(dest + 1, (apr_uint64_t)(0-number)) + 1; + return svn__ui64toa(dest + 1, 0 - (apr_uint64_t)number) + 1; } static void -ui64toa_sep(apr_uint64_t number, char seperator, char *buffer) +ui64toa_sep(apr_uint64_t number, char separator, char *buffer) { apr_size_t length = svn__ui64toa(buffer, number); apr_size_t i; @@ -1135,7 +1189,7 @@ ui64toa_sep(apr_uint64_t number, char seperator, char *buffer) for (i = length; i > 3; i -= 3) { memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3); - buffer[i-3] = seperator; + buffer[i-3] = separator; length++; } @@ -1143,30 +1197,110 @@ ui64toa_sep(apr_uint64_t number, char seperator, char *buffer) } char * -svn__ui64toa_sep(apr_uint64_t number, char seperator, apr_pool_t *pool) +svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool) { char buffer[2 * SVN_INT64_BUFFER_SIZE]; - ui64toa_sep(number, seperator, buffer); + ui64toa_sep(number, separator, buffer); return apr_pstrdup(pool, buffer); } char * -svn__i64toa_sep(apr_int64_t number, char seperator, apr_pool_t *pool) +svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool) { char buffer[2 * SVN_INT64_BUFFER_SIZE]; if (number < 0) { buffer[0] = '-'; - ui64toa_sep((apr_uint64_t)(-number), seperator, &buffer[1]); + ui64toa_sep((apr_uint64_t)(-number), separator, &buffer[1]); } else - ui64toa_sep((apr_uint64_t)(number), seperator, buffer); + ui64toa_sep((apr_uint64_t)(number), separator, buffer); return apr_pstrdup(pool, buffer); } -unsigned int +apr_size_t +svn__ui64tobase36(char *dest, apr_uint64_t value) +{ + char *dest_start = dest; + if (value < 10) + { + /* pretty frequent and trivial case. Make it fast. */ + *(dest++) = (char)(value) + '0'; + } + else + { + char buffer[SVN_INT64_BUFFER_SIZE]; + char *p = buffer; + + /* write result as little-endian to buffer */ + while (value > 0) + { + char c = (char)(value % 36); + value /= 36; + + *p = (c <= 9) ? (c + '0') : (c - 10 + 'a'); + ++p; + } + + /* copy as big-endian to DEST */ + while (p > buffer) + *(dest++) = *(--p); + } + + *dest = '\0'; + return dest - dest_start; +} + +apr_uint64_t +svn__base36toui64(const char **next, const char *source) +{ + apr_uint64_t result = 0; + apr_uint64_t factor = 1; + int i = 0; + char digits[SVN_INT64_BUFFER_SIZE]; + + /* convert digits to numerical values and count the number of places. + * Also, prevent buffer overflow. */ + while (i < sizeof(digits)) + { + char c = *source; + if (c < 'a') + { + /* includes detection of NUL terminator */ + if (c < '0' || c > '9') + break; + + c -= '0'; + } + else + { + if (c < 'a' || c > 'z') + break; + + c -= 'a' - 10; + } + + digits[i++] = c; + source++; + } + + /* fold digits into the result */ + while (i > 0) + { + result += factor * (apr_uint64_t)digits[--i]; + factor *= 36; + } + + if (next) + *next = source; + + return result; +} + + +apr_size_t svn_cstring__similarity(const char *stra, const char *strb, svn_membuf_t *buffer, apr_size_t *rlcs) { @@ -1178,7 +1312,7 @@ svn_cstring__similarity(const char *stra, const char *strb, return svn_string__similarity(&stringa, &stringb, buffer, rlcs); } -unsigned int +apr_size_t svn_string__similarity(const svn_string_t *stringa, const svn_string_t *stringb, svn_membuf_t *buffer, apr_size_t *rlcs) @@ -1242,7 +1376,7 @@ svn_string__similarity(const svn_string_t *stringa, /* Calculate LCS length of the remainder */ for (pstr = stra; pstr < enda; ++pstr) { - int i; + apr_size_t i; for (i = 1; i <= slots; ++i) { if (*pstr == strb[i-1]) @@ -1267,7 +1401,84 @@ svn_string__similarity(const svn_string_t *stringa, /* Return similarity ratio rounded to 4 significant digits */ if (total) - return(unsigned int)((2000 * lcs + total/2) / total); + return ((2 * SVN_STRING__SIM_RANGE_MAX * lcs + total/2) / total); else - return 1000; + return SVN_STRING__SIM_RANGE_MAX; +} + +apr_size_t +svn_cstring__match_length(const char *a, + const char *b, + apr_size_t max_len) +{ + apr_size_t pos = 0; + +#if SVN_UNALIGNED_ACCESS_IS_OK + + /* Chunky processing is so much faster ... + * + * We can't make this work on architectures that require aligned access + * because A and B will probably have different alignment. So, skipping + * the first few chars until alignment is reached is not an option. + */ + for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t)) + if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos)) + break; + +#endif + + for (; pos < max_len; ++pos) + if (a[pos] != b[pos]) + break; + + return pos; +} + +apr_size_t +svn_cstring__reverse_match_length(const char *a, + const char *b, + apr_size_t max_len) +{ + apr_size_t pos = 0; + +#if SVN_UNALIGNED_ACCESS_IS_OK + + /* Chunky processing is so much faster ... + * + * We can't make this work on architectures that require aligned access + * because A and B will probably have different alignment. So, skipping + * the first few chars until alignment is reached is not an option. + */ + for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t)) + if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos)) + break; + + pos -= sizeof(apr_size_t); + +#endif + + /* If we find a mismatch at -pos, pos-1 characters matched. + */ + while (++pos <= max_len) + if (a[0-pos] != b[0-pos]) + return pos - 1; + + /* No mismatch found -> at least MAX_LEN matching chars. + */ + return max_len; +} + +const char * +svn_cstring_skip_prefix(const char *str, const char *prefix) +{ + apr_size_t len = strlen(prefix); + + if (strncmp(str, prefix, len) == 0) + { + return str + len; + } + else + { + return NULL; + } } |