diff options
Diffstat (limited to 'subversion/libsvn_subr/hash.c')
-rw-r--r-- | subversion/libsvn_subr/hash.c | 291 |
1 files changed, 150 insertions, 141 deletions
diff --git a/subversion/libsvn_subr/hash.c b/subversion/libsvn_subr/hash.c index 7868cac..f58c43c 100644 --- a/subversion/libsvn_subr/hash.c +++ b/subversion/libsvn_subr/hash.c @@ -40,6 +40,7 @@ #include "svn_pools.h" #include "private/svn_dep_compat.h" +#include "private/svn_sorts_private.h" #include "private/svn_subr_private.h" #include "svn_private_config.h" @@ -89,119 +90,156 @@ /*** Dumping and loading hash files. */ /* Implements svn_hash_read2 and svn_hash_read_incremental. */ -static svn_error_t * -hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator, - svn_boolean_t incremental, apr_pool_t *pool) +svn_error_t * +svn_hash__read_entry(svn_hash__entry_t *entry, + svn_stream_t *stream, + const char *terminator, + svn_boolean_t incremental, + apr_pool_t *pool) { svn_stringbuf_t *buf; svn_boolean_t eof; - apr_size_t len, keylen, vallen; - char c, *keybuf, *valbuf; - apr_pool_t *iterpool = svn_pool_create(pool); + apr_size_t len; + char c; - while (1) - { - svn_error_t *err; - apr_uint64_t ui64; + svn_error_t *err; + apr_uint64_t ui64; - svn_pool_clear(iterpool); + /* Read a key length line. Might be END, though. */ + SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool)); - /* Read a key length line. Might be END, though. */ - SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, iterpool)); + /* Check for the end of the hash. */ + if ((!terminator && eof && buf->len == 0) + || (terminator && (strcmp(buf->data, terminator) == 0))) + { + entry->key = NULL; + entry->keylen = 0; + entry->val = NULL; + entry->vallen = 0; - /* Check for the end of the hash. */ - if ((!terminator && eof && buf->len == 0) - || (terminator && (strcmp(buf->data, terminator) == 0))) - break; + return SVN_NO_ERROR; + } - /* Check for unexpected end of stream */ - if (eof) + /* Check for unexpected end of stream */ + if (eof) + return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, + _("Serialized hash missing terminator")); + + if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' ')) + { + /* Get the length of the key */ + err = svn_cstring_strtoui64(&ui64, buf->data + 2, + 0, APR_SIZE_MAX, 10); + if (err) + return svn_error_create(SVN_ERR_MALFORMED_FILE, err, + _("Serialized hash malformed key length")); + entry->keylen = (apr_size_t)ui64; + + /* Now read that much into a buffer. */ + entry->key = apr_palloc(pool, entry->keylen + 1); + SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen)); + entry->key[entry->keylen] = '\0'; + + /* Suck up extra newline after key data */ + len = 1; + SVN_ERR(svn_stream_read_full(stream, &c, &len)); + if (c != '\n') return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, - _("Serialized hash missing terminator")); + _("Serialized hash malformed key data")); - if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' ')) + /* Read a val length line */ + SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool)); + + if ((buf->data[0] == 'V') && (buf->data[1] == ' ')) { - /* Get the length of the key */ + /* Get the length of the val */ err = svn_cstring_strtoui64(&ui64, buf->data + 2, 0, APR_SIZE_MAX, 10); if (err) return svn_error_create(SVN_ERR_MALFORMED_FILE, err, - _("Serialized hash malformed")); - keylen = (apr_size_t)ui64; + _("Serialized hash malformed value length")); + entry->vallen = (apr_size_t)ui64; - /* Now read that much into a buffer. */ - keybuf = apr_palloc(pool, keylen + 1); - SVN_ERR(svn_stream_read(stream, keybuf, &keylen)); - keybuf[keylen] = '\0'; + entry->val = apr_palloc(pool, entry->vallen + 1); + SVN_ERR(svn_stream_read_full(stream, entry->val, &entry->vallen)); + entry->val[entry->vallen] = '\0'; - /* Suck up extra newline after key data */ + /* Suck up extra newline after val data */ len = 1; - SVN_ERR(svn_stream_read(stream, &c, &len)); + SVN_ERR(svn_stream_read_full(stream, &c, &len)); if (c != '\n') return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, - _("Serialized hash malformed")); - - /* Read a val length line */ - SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, iterpool)); + _("Serialized hash malformed value data")); + } + else + return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, + _("Serialized hash malformed")); + } + else if (incremental && (buf->len >= 3) + && (buf->data[0] == 'D') && (buf->data[1] == ' ')) + { + /* Get the length of the key */ + err = svn_cstring_strtoui64(&ui64, buf->data + 2, + 0, APR_SIZE_MAX, 10); + if (err) + return svn_error_create(SVN_ERR_MALFORMED_FILE, err, + _("Serialized hash malformed key length")); + entry->keylen = (apr_size_t)ui64; + + /* Now read that much into a buffer. */ + entry->key = apr_palloc(pool, entry->keylen + 1); + SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen)); + entry->key[entry->keylen] = '\0'; + + /* Suck up extra newline after key data */ + len = 1; + SVN_ERR(svn_stream_read_full(stream, &c, &len)); + if (c != '\n') + return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, + _("Serialized hash malformed key data")); - if ((buf->data[0] == 'V') && (buf->data[1] == ' ')) - { - err = svn_cstring_strtoui64(&ui64, buf->data + 2, - 0, APR_SIZE_MAX, 10); - if (err) - return svn_error_create(SVN_ERR_MALFORMED_FILE, err, - _("Serialized hash malformed")); - vallen = (apr_size_t)ui64; + /* Remove this hash entry. */ + entry->vallen = 0; + entry->val = NULL; + } + else + { + return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, + _("Serialized hash malformed")); + } - valbuf = apr_palloc(iterpool, vallen + 1); - SVN_ERR(svn_stream_read(stream, valbuf, &vallen)); - valbuf[vallen] = '\0'; + return SVN_NO_ERROR; +} - /* Suck up extra newline after val data */ - len = 1; - SVN_ERR(svn_stream_read(stream, &c, &len)); - if (c != '\n') - return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, - _("Serialized hash malformed")); +static svn_error_t * +hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator, + svn_boolean_t incremental, apr_pool_t *pool) +{ + apr_pool_t *iterpool = svn_pool_create(pool); - /* Add a new hash entry. */ - apr_hash_set(hash, keybuf, keylen, - svn_string_ncreate(valbuf, vallen, pool)); - } - else - return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, - _("Serialized hash malformed")); - } - else if (incremental && (buf->len >= 3) - && (buf->data[0] == 'D') && (buf->data[1] == ' ')) - { - /* Get the length of the key */ - err = svn_cstring_strtoui64(&ui64, buf->data + 2, - 0, APR_SIZE_MAX, 10); - if (err) - return svn_error_create(SVN_ERR_MALFORMED_FILE, err, - _("Serialized hash malformed")); - keylen = (apr_size_t)ui64; + while (1) + { + svn_hash__entry_t entry; - /* Now read that much into a buffer. */ - keybuf = apr_palloc(iterpool, keylen + 1); - SVN_ERR(svn_stream_read(stream, keybuf, &keylen)); - keybuf[keylen] = '\0'; + svn_pool_clear(iterpool); + SVN_ERR(svn_hash__read_entry(&entry, stream, terminator, + incremental, iterpool)); - /* Suck up extra newline after key data */ - len = 1; - SVN_ERR(svn_stream_read(stream, &c, &len)); - if (c != '\n') - return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, - _("Serialized hash malformed")); + /* end of hash? */ + if (entry.key == NULL) + break; - /* Remove this hash entry. */ - apr_hash_set(hash, keybuf, keylen, NULL); + if (entry.val) + { + /* Add a new hash entry. */ + apr_hash_set(hash, apr_pstrmemdup(pool, entry.key, entry.keylen), + entry.keylen, + svn_string_ncreate(entry.val, entry.vallen, pool)); } else { - return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, - _("Serialized hash malformed")); + /* Remove this hash entry. */ + apr_hash_set(hash, entry.key, entry.keylen, NULL); } } @@ -497,7 +535,7 @@ svn_hash_keys(apr_array_header_t **array, for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi)) { - APR_ARRAY_PUSH(*array, const char *) = svn__apr_hash_index_key(hi); + APR_ARRAY_PUSH(*array, const char *) = apr_hash_this_key(hi); } return SVN_NO_ERROR; @@ -522,23 +560,6 @@ svn_hash_from_cstring_keys(apr_hash_t **hash_p, } -#if !APR_VERSION_AT_LEAST(1, 3, 0) -void -svn_hash__clear(apr_hash_t *hash) -{ - apr_hash_index_t *hi; - const void *key; - apr_ssize_t klen; - - for (hi = apr_hash_first(NULL, hash); hi; hi = apr_hash_next(hi)) - { - apr_hash_this(hi, &key, &klen, NULL); - apr_hash_set(hash, key, klen, NULL); - } -} -#endif - - /*** Specialized getter APIs ***/ @@ -576,20 +597,16 @@ svn_hash__get_bool(apr_hash_t *hash, const char *key, /*** Optimized hash function ***/ -/* Optimized version of apr_hashfunc_default in APR 1.4.5 and earlier. - * It assumes that the CPU has 32-bit multiplications with high throughput - * of at least 1 operation every 3 cycles. Latency is not an issue. Another - * optimization is a mildly unrolled main loop and breaking the dependency - * chain within the loop. +/* apr_hashfunc_t optimized for the key that we use in SVN: paths and + * property names. Its primary goal is speed for keys of known length. * - * Note that most CPUs including Intel Atom, VIA Nano, ARM feature the - * assumed pipelined multiplication circuitry. They can do one MUL every - * or every other cycle. - * - * The performance is ultimately limited by the fact that most CPUs can - * do only one LOAD and only one BRANCH operation per cycle. The best we - * can do is to process one character per cycle - provided the processor - * is wide enough to do 1 LOAD, COMPARE, BRANCH, MUL and ADD per cycle. + * Since strings tend to spawn large value spaces (usually differ in many + * bits with differences spanning a larger section of the key), we can be + * quite sloppy extracting a hash value. The more keys there are in a + * hash container, the more bits of the value returned by this function + * will be used. For a small number of string keys, choosing bits from any + * any fix location close to the tail of those keys would usually be good + * enough to prevent high collision rates. */ static unsigned int hashfunc_compatible(const char *char_key, apr_ssize_t *klen) @@ -600,37 +617,29 @@ hashfunc_compatible(const char *char_key, apr_ssize_t *klen) apr_ssize_t i; if (*klen == APR_HASH_KEY_STRING) + *klen = strlen(char_key); + +#if SVN_UNALIGNED_ACCESS_IS_OK + for (p = key, i = *klen; i >= 4; i-=4, p+=4) { - for (p = key; ; p+=4) - { - unsigned int new_hash = hash * 33 * 33 * 33 * 33; - if (!p[0]) break; - new_hash += p[0] * 33 * 33 * 33; - if (!p[1]) break; - new_hash += p[1] * 33 * 33; - if (!p[2]) break; - new_hash += p[2] * 33; - if (!p[3]) break; - hash = new_hash + p[3]; - } - for (; *p; p++) - hash = hash * 33 + *p; - - *klen = p - key; + apr_uint32_t chunk = *(const apr_uint32_t *)p; + + /* the ">> 17" part gives upper bits in the chunk a chance to make + some impact as well */ + hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17); } - else +#else + for (p = key, i = *klen; i >= 4; i-=4, p+=4) { - for (p = key, i = *klen; i >= 4; i-=4, p+=4) - { - hash = hash * 33 * 33 * 33 * 33 - + p[0] * 33 * 33 * 33 - + p[1] * 33 * 33 - + p[2] * 33 - + p[3]; - } - for (; i; i--, p++) - hash = hash * 33 + *p; + hash = hash * 33 * 33 * 33 * 33 + + p[0] * 33 * 33 * 33 + + p[1] * 33 * 33 + + p[2] * 33 + + p[3]; } +#endif + for (; i; i--, p++) + hash = hash * 33 + *p; return hash; } |