diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
commit | cf46733632c7279a9fd0fe6ce26f9185a4ae82a9 (patch) | |
tree | da27775a2161723ef342e91af41a8b51fedef405 /subversion/libsvn_subr/prefix_string.c | |
parent | bb0ef45f7c46b0ae221b26265ef98a768c33f820 (diff) | |
download | subversion-tarball-master.tar.gz |
subversion-1.9.7HEADsubversion-1.9.7master
Diffstat (limited to 'subversion/libsvn_subr/prefix_string.c')
-rw-r--r-- | subversion/libsvn_subr/prefix_string.c | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/subversion/libsvn_subr/prefix_string.c b/subversion/libsvn_subr/prefix_string.c new file mode 100644 index 0000000..fcf11bd --- /dev/null +++ b/subversion/libsvn_subr/prefix_string.c @@ -0,0 +1,315 @@ +/* prefix_string.c --- implement strings based on a prefix tree + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <assert.h> +#include "private/svn_string_private.h" + +/* A node in the tree represents a common prefix. The root node is the + * empty prefix. Nodes may have up to 256 sub-nodes, each starting with + * a different character (possibly '\0'). + * + * The nodes in the tree store only up to 8 chars of the respective common + * prefix, i.e. longer common prefixes must be drawn out over multiple + * hierarchy levels. This is a space <-> efficiency trade-off. + * + * Strings are the leaf nodes in the tree and use a specialized, smaller + * data structure. They may add 0 to 7 extra chars to the prefix. Both + * data types can be discerned by the last char in the data buffer. This + * must be 0 for strings (leaves) and non-0 otherwise. Please note that + * ordinary nodes have a length information so that no terminating 0 is + * required for them. + */ + +/* forward declaration */ +typedef struct node_t node_t; + +/* String type and tree leaf. + */ +struct svn_prefix_string__t +{ + /* mandatory prefix */ + node_t *prefix; + + /* 0 ..7 chars to add the the prefix. NUL-terminated. */ + char data[8]; +}; + +/* A node inside the tree, i.e. not a string and not a leaf (unless this is + * the root node). + * + * Note: keep the ordering to minimize size / alignment overhead on 64 bit + * machines. + */ +struct node_t +{ + /* pointer to the parent prefix plus the 1 .. 8 extra chars. + * Only the root will provide 0 extra chars. */ + svn_prefix_string__t key; + + /* Length of the prefix from the root down to and including this one. + * 0 for the root node. Only then will key.prefix be NULL. */ + apr_uint32_t length; + + /* Number of entries used in SUB_NODES. */ + apr_uint32_t sub_node_count; + + /* The sub-nodes, ordered by first char. node_t and svn_prefix_string__t + * may be mixed here. May be NULL. + * The number of allocated entries is always a power-of-two and only + * given implicitly by SUB_NODE_COUNT. */ + struct node_t **sub_nodes; +}; + +/* The actual tree structure. */ +struct svn_prefix_tree__t +{ + /* the common tree root (represents the empty prefix). */ + node_t *root; + + /* all sub-nodes & strings will be allocated from this pool */ + apr_pool_t *pool; +}; + +/* Return TRUE, iff NODE is a leaf node. + */ +static svn_boolean_t +is_leaf(node_t *node) +{ + return node->key.data[7] == 0; +} + +/* Ensure that the sub-nodes array of NODE within TREE has at least one + * unused entry. Re-allocate as necessary. + */ +static void +auto_realloc_sub_nodes(svn_prefix_tree__t *tree, + node_t *node) +{ + if (node->sub_node_count & (node->sub_node_count - 1)) + return; + + if (node->sub_node_count == 0) + { + node->sub_nodes = apr_pcalloc(tree->pool, sizeof(*node->sub_nodes)); + } + else + { + node_t **sub_nodes + = apr_pcalloc(tree->pool, + 2 * node->sub_node_count * sizeof(*sub_nodes)); + memcpy(sub_nodes, node->sub_nodes, + node->sub_node_count * sizeof(*sub_nodes)); + node->sub_nodes = sub_nodes; + } +} + +/* Given the COUNT pointers in the SUB_NODES array, return the location at + * which KEY is either located or would be inserted. + */ +static int +search_lower_bound(node_t **sub_nodes, + unsigned char key, + int count) +{ + int lower = 0; + int upper = count - 1; + + /* Binary search for the lowest position at which to insert KEY. */ + while (lower <= upper) + { + int current = lower + (upper - lower) / 2; + + if ((unsigned char)sub_nodes[current]->key.data[0] < key) + lower = current + 1; + else + upper = current - 1; + } + + return lower; +} + +svn_prefix_tree__t * +svn_prefix_tree__create(apr_pool_t *pool) +{ + svn_prefix_tree__t *tree = apr_pcalloc(pool, sizeof(*tree)); + tree->pool = pool; + + tree->root = apr_pcalloc(pool, sizeof(*tree->root)); + tree->root->key.data[7] = '\xff'; + + return tree; +} + +svn_prefix_string__t * +svn_prefix_string__create(svn_prefix_tree__t *tree, + const char *s) +{ + svn_prefix_string__t *new_string; + apr_size_t len = strlen(s); + node_t *node = tree->root; + node_t *new_node; + int idx; + + /* walk the existing tree until we either find S or the node at which S + * has to be inserted */ + while (TRUE) + { + node_t *sub_node; + int match = 1; + + /* index of the matching sub-node */ + idx = node->sub_node_count + ? search_lower_bound(node->sub_nodes, + (unsigned char)s[node->length], + node->sub_node_count) + : 0; + + /* any (partially) matching sub-nodes? */ + if (idx == (int)node->sub_node_count + || node->sub_nodes[idx]->key.data[0] != s[node->length]) + break; + + sub_node = node->sub_nodes[idx]; + + /* fully matching sub-node? */ + if (is_leaf(sub_node)) + { + if (strcmp(sub_node->key.data, s + node->length) == 0) + return &sub_node->key; + } + else + { + apr_size_t sub_node_len = sub_node->length - node->length; + if (strncmp(sub_node->key.data, s + node->length, + sub_node_len) == 0) + { + node = sub_node; + continue; + } + } + + /* partial match -> split */ + while (sub_node->key.data[match] == s[node->length + match]) + ++match; + + new_node = apr_pcalloc(tree->pool, sizeof(*new_node)); + new_node->key = sub_node->key; + new_node->length = node->length + match; + new_node->key.data[7] = '\xff'; + new_node->sub_node_count = 1; + new_node->sub_nodes = apr_palloc(tree->pool, sizeof(node_t *)); + new_node->sub_nodes[0] = sub_node; + + memmove(sub_node->key.data, sub_node->key.data + match, 8 - match); + + /* replace old sub-node with new one and continue lookup */ + sub_node->key.prefix = new_node; + node->sub_nodes[idx] = new_node; + node = new_node; + } + + /* add sub-node(s) and final string */ + while (node->length + 7 < len) + { + new_node = apr_pcalloc(tree->pool, sizeof(*new_node)); + new_node->key.prefix = node; + new_node->length = node->length + 8; + memcpy(new_node->key.data, s + node->length, 8); + + auto_realloc_sub_nodes(tree, node); + memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx, + (node->sub_node_count - idx) * sizeof(node_t *)); + + /* replace old sub-node with new one and continue lookup */ + node->sub_nodes[idx] = new_node; + node->sub_node_count++; + node = new_node; + idx = 0; + } + + new_string = apr_pcalloc(tree->pool, sizeof(*new_string)); + new_string->prefix = node; + memcpy(new_string->data, s + node->length, len - node->length); + + auto_realloc_sub_nodes(tree, node); + memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx, + (node->sub_node_count - idx) * sizeof(node_t *)); + + node->sub_nodes[idx] = (node_t *)new_string; + node->sub_node_count++; + return new_string; +} + +svn_string_t * +svn_prefix_string__expand(const svn_prefix_string__t *s, + apr_pool_t *pool) +{ + apr_size_t s_len = strlen(s->data); + apr_size_t len = s->prefix->length + s_len; + char *buffer = apr_palloc(pool, len + 1); + + svn_string_t *result = apr_pcalloc(pool, sizeof(*result)); + result->data = buffer; + result->len = len; + buffer[len] = '\0'; + + while (s->prefix) + { + memcpy(buffer + s->prefix->length, s->data, len - s->prefix->length); + len = s->prefix->length; + s = &s->prefix->key; + } + + return result; +} + +int +svn_prefix_string__compare(const svn_prefix_string__t *lhs, + const svn_prefix_string__t *rhs) +{ + const node_t *lhs_parent = lhs->prefix; + const node_t *rhs_parent = rhs->prefix; + + if (lhs == rhs) + return 0; + + /* find the common root */ + while (lhs_parent != rhs_parent) + { + if (lhs_parent->length <= rhs_parent->length) + { + rhs = &rhs_parent->key; + rhs_parent = rhs_parent->key.prefix; + } + else if (rhs_parent->length <= lhs_parent->length) + { + lhs = &lhs_parent->key; + lhs_parent = lhs_parent->key.prefix; + } + + /* same tree? */ + assert(lhs_parent && rhs_parent); + } + + /* at the common root, strings will differ in the first follow-up char */ + return (int)(unsigned char)lhs->data[0] - (int)(unsigned char)rhs->data[0]; +} |