subversion-1.9.7HEAD subversion-1.9.7 master

author: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-08-05 16:22:51 +0000
committer: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-08-05 16:22:51 +0000
commit: cf46733632c7279a9fd0fe6ce26f9185a4ae82a9 (patch)
tree: da27775a2161723ef342e91af41a8b51fedef405 /subversion/libsvn_subr/string.c
parent: bb0ef45f7c46b0ae221b26265ef98a768c33f820 (diff)
download: subversion-tarball-master.tar.gz
1 files changed, 284 insertions, 73 deletions
diff --git a/subversion/libsvn_subr/string.c b/subversion/libsvn_subr/string.c
index c3d7fec..43a1a4e 100644
--- a/subversion/libsvn_subr/string.c
+++ b/subversion/libsvn_subr/string.c
@@ -26,6 +26,7 @@
 
 
 #include <apr.h>
+#include <assert.h>
 
 #include <string.h>      /* for memcpy(), memcmp(), strlen() */
 #include <apr_fnmatch.h>
@@ -53,9 +54,9 @@ membuf_create(void **data, apr_size_t *size,
   /* apr_palloc will allocate multiples of 8.
    * Thus, we would waste some of that memory if we stuck to the
    * smaller size. Note that this is safe even if apr_palloc would
-   * use some other aligment or none at all. */
+   * use some other alignment or none at all. */
   minimum_size = APR_ALIGN_DEFAULT(minimum_size);
-  *data = (!minimum_size ? NULL : apr_palloc(pool, minimum_size));
+  *data = apr_palloc(pool, minimum_size);
   *size = minimum_size;
 }
 
@@ -78,14 +79,10 @@ membuf_ensure(void **data, apr_size_t *size,
       apr_size_t new_size = *size;
 
       if (new_size == 0)
-        /* APR will increase odd allocation sizes to the next
-         * multiple for 8, for instance. Take advantage of that
-         * knowledge and allow for the extra size to be used. */
         new_size = minimum_size;
       else
         while (new_size < minimum_size)
           {
-            /* new_size is aligned; doubling it should keep it aligned */
             const apr_size_t prev_size = new_size;
             new_size *= 2;
 
@@ -121,7 +118,10 @@ svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size)
   const apr_size_t old_size = membuf->size;
 
   membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
-  if (membuf->data && old_data && old_data != membuf->data)
+
+  /* If we re-allocated MEMBUF->DATA, it cannot be NULL.
+   * Statically initialized membuffers (OLD_DATA) may be NULL, though. */
+  if (old_data && old_data != membuf->data)
     memcpy(membuf->data, old_data, old_size);
 }
 
@@ -151,7 +151,7 @@ string_compare(const char *str1,
   if (len1 != len2)
     return FALSE;
 
-  /* now the strings must have identical lenghths */
+  /* now the strings must have identical lengths */
 
   if ((memcmp(str1, str2, len1)) == 0)
     return TRUE;
@@ -240,7 +240,9 @@ svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
   new_string->data = data;
   new_string->len = size;
 
-  memcpy(data, bytes, size);
+  /* If SIZE is 0, NULL is valid for BYTES. */
+  if (size)
+    memcpy(data, bytes, size);
 
   /* Null termination is the convention -- even if we suspect the data
      to be binary, it's not up to us to decide, it's the caller's
@@ -299,8 +301,9 @@ svn_string_isempty(const svn_string_t *str)
 svn_string_t *
 svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool)
 {
-  return (svn_string_ncreate(original_string->data,
-                             original_string->len, pool));
+  return (original_string ? svn_string_ncreate(original_string->data,
+                                               original_string->len, pool)
+                          : NULL);
 }
 
 
@@ -393,7 +396,10 @@ svn_stringbuf_t *
 svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
 {
   svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool);
-  memcpy(strbuf->data, bytes, size);
+
+  /* If SIZE is 0, NULL is valid for BYTES. */
+  if (size)
+    memcpy(strbuf->data, bytes, size);
 
   /* Null termination is the convention -- even if we suspect the data
      to be binary, it's not up to us to decide, it's the caller's
@@ -418,6 +424,17 @@ svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool)
   return svn_stringbuf_ncreate(str->data, str->len, pool);
 }
 
+svn_stringbuf_t *
+svn_stringbuf_create_wrap(char *str, apr_pool_t *pool)
+{
+  svn_stringbuf_t *result = apr_palloc(pool, sizeof(*result));
+  result->pool = pool;
+  result->data = str;
+  result->len = strlen(str);
+  result->blocksize = result->len + 1;
+
+  return result;
+}
 
 svn_stringbuf_t *
 svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap)
@@ -579,6 +596,10 @@ svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,
   apr_size_t total_len;
   void *start_address;
 
+  if (!count)
+    /* Allow BYTES to be NULL by avoiding passing it to memcpy. */
+    return;
+
   total_len = str->len + count;  /* total size needed */
 
   /* svn_stringbuf_ensure adds 1 for null terminator. */
@@ -595,6 +616,21 @@ svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,
                                   to null-terminate. */
 }
 
+void
+svn_stringbuf_appendfill(svn_stringbuf_t *str,
+                         char byte,
+                         apr_size_t count)
+{
+  apr_size_t new_len = str->len + count;
+  svn_stringbuf_ensure(str, new_len);
+
+  memset(str->data + str->len, byte, count);
+
+  /* update buffer length and always NUL-terminate it */
+  str->len = new_len;
+  str->data[new_len] = '\0';
+}
+
 
 void
 svn_stringbuf_appendstr(svn_stringbuf_t *targetstr,
@@ -616,23 +652,22 @@ svn_stringbuf_insert(svn_stringbuf_t *str,
                      const char *bytes,
                      apr_size_t count)
 {
+  /* For COUNT==0, we allow BYTES to be NULL. It's a no-op in that case. */
+  if (count == 0)
+    return;
+
+  /* special case: BYTES overlaps with this string -> copy the source */
   if (bytes + count > str->data && bytes < str->data + str->blocksize)
-    {
-      /* special case: BYTES overlaps with this string -> copy the source */
-      const char *temp = apr_pmemdup(str->pool, bytes, count);
-      svn_stringbuf_insert(str, pos, temp, count);
-    }
-  else
-    {
-      if (pos > str->len)
-        pos = str->len;
+    bytes = apr_pmemdup(str->pool, bytes, count);
 
-      svn_stringbuf_ensure(str, str->len + count);
-      memmove(str->data + pos + count, str->data + pos, str->len - pos + 1);
-      memcpy(str->data + pos, bytes, count);
+  if (pos > str->len)
+    pos = str->len;
 
-      str->len += count;
-    }
+  svn_stringbuf_ensure(str, str->len + count);
+  memmove(str->data + pos + count, str->data + pos, str->len - pos + 1);
+  memcpy(str->data + pos, bytes, count);
+
+  str->len += count;
 }
 
 void
@@ -642,7 +677,7 @@ svn_stringbuf_remove(svn_stringbuf_t *str,
 {
   if (pos > str->len)
     pos = str->len;
-  if (pos + count > str->len)
+  if (count > str->len - pos)
     count = str->len - pos;
 
   memmove(str->data + pos, str->data + pos + count, str->len - pos - count + 1);
@@ -656,32 +691,35 @@ svn_stringbuf_replace(svn_stringbuf_t *str,
                       const char *bytes,
                       apr_size_t new_count)
 {
-  if (bytes + new_count > str->data && bytes < str->data + str->blocksize)
+  /* For COUNT==0, we allow BYTES to be NULL.
+   * In that case, this is just a substring removal. */
+  if (new_count == 0)
     {
-      /* special case: BYTES overlaps with this string -> copy the source */
-      const char *temp = apr_pmemdup(str->pool, bytes, new_count);
-      svn_stringbuf_replace(str, pos, old_count, temp, new_count);
+      svn_stringbuf_remove(str, pos, old_count);
+      return;
     }
-  else
-    {
-      if (pos > str->len)
-        pos = str->len;
-      if (pos + old_count > str->len)
-        old_count = str->len - pos;
 
-      if (old_count < new_count)
-        {
-          apr_size_t delta = new_count - old_count;
-          svn_stringbuf_ensure(str, str->len + delta);
-        }
+  /* special case: BYTES overlaps with this string -> copy the source */
+  if (bytes + new_count > str->data && bytes < str->data + str->blocksize)
+    bytes = apr_pmemdup(str->pool, bytes, new_count);
 
-      if (old_count != new_count)
-        memmove(str->data + pos + new_count, str->data + pos + old_count,
-                str->len - pos - old_count + 1);
+  if (pos > str->len)
+    pos = str->len;
+  if (old_count > str->len - pos)
+    old_count = str->len - pos;
 
-      memcpy(str->data + pos, bytes, new_count);
-      str->len += new_count - old_count;
+  if (old_count < new_count)
+    {
+      apr_size_t delta = new_count - old_count;
+      svn_stringbuf_ensure(str, str->len + delta);
     }
+
+  if (old_count != new_count)
+    memmove(str->data + pos + new_count, str->data + pos + old_count,
+            str->len - pos - old_count + 1);
+
+  memcpy(str->data + pos, bytes, new_count);
+  str->len += new_count - old_count;
 }
 
 
@@ -832,7 +870,7 @@ char *
 svn_cstring_tokenize(const char *sep, char **str)
 {
     char *token;
-    const char * next;
+    char *next;
     char csep;
 
     /* check parameters */
@@ -862,8 +900,8 @@ svn_cstring_tokenize(const char *sep, char **str)
       }
     else
       {
-        *(char *)next = '\0';
-        *str = (char *)next + 1;
+        *next = '\0';
+        *str = next + 1;
       }
 
     return token;
@@ -1014,17 +1052,33 @@ svn_cstring_atoi(int *n, const char *str)
   return SVN_NO_ERROR;
 }
 
-
-apr_status_t
-svn__strtoff(apr_off_t *offset, const char *buf, char **end, int base)
+unsigned long
+svn__strtoul(const char* buffer, const char** end)
 {
-#if !APR_VERSION_AT_LEAST(1,0,0)
-  errno = 0;
-  *offset = strtol(buf, end, base);
-  return APR_FROM_OS_ERROR(errno);
-#else
-  return apr_strtoff(offset, buf, end, base);
-#endif
+  unsigned long result = 0;
+
+  /* this loop will execute in just 2 CPU cycles, confirmed by measurement:
+     7 macro-ops (max 4 / cycle => 2 cycles)
+       1 load (max 1 / cycle)
+       1 jumps (compare + conditional jump == 1 macro op; max 1 / cycle)
+       2 arithmetic ops (subtract, increment; max 3 / cycle)
+       2 scale-and-add AGU ops (max 3 / cycle)
+       1 compiler-generated move operation
+     dependency chain: temp = result * 4 + result; result = temp * 2 + c
+                       (2 ops with latency 1 => 2 cycles)
+   */
+  while (1)
+    {
+      unsigned long c = (unsigned char)*buffer - (unsigned char)'0';
+      if (c > 9)
+        break;
+
+      result = result * 10 + c;
+      ++buffer;
+    }
+
+  *end = buffer;
+  return result;
 }
 
 /* "Precalculated" itoa values for 2 places (including leading zeros).
@@ -1123,11 +1177,11 @@ svn__i64toa(char * dest, apr_int64_t number)
     return svn__ui64toa(dest, (apr_uint64_t)number);
 
   *dest = '-';
-  return svn__ui64toa(dest + 1, (apr_uint64_t)(0-number)) + 1;
+  return svn__ui64toa(dest + 1, 0 - (apr_uint64_t)number) + 1;
 }
 
 static void
-ui64toa_sep(apr_uint64_t number, char seperator, char *buffer)
+ui64toa_sep(apr_uint64_t number, char separator, char *buffer)
 {
   apr_size_t length = svn__ui64toa(buffer, number);
   apr_size_t i;
@@ -1135,7 +1189,7 @@ ui64toa_sep(apr_uint64_t number, char seperator, char *buffer)
   for (i = length; i > 3; i -= 3)
     {
       memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3);
-      buffer[i-3] = seperator;
+      buffer[i-3] = separator;
       length++;
     }
 
@@ -1143,30 +1197,110 @@ ui64toa_sep(apr_uint64_t number, char seperator, char *buffer)
 }
 
 char *
-svn__ui64toa_sep(apr_uint64_t number, char seperator, apr_pool_t *pool)
+svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool)
 {
   char buffer[2 * SVN_INT64_BUFFER_SIZE];
-  ui64toa_sep(number, seperator, buffer);
+  ui64toa_sep(number, separator, buffer);
 
   return apr_pstrdup(pool, buffer);
 }
 
 char *
-svn__i64toa_sep(apr_int64_t number, char seperator, apr_pool_t *pool)
+svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool)
 {
   char buffer[2 * SVN_INT64_BUFFER_SIZE];
   if (number < 0)
     {
       buffer[0] = '-';
-      ui64toa_sep((apr_uint64_t)(-number), seperator, &buffer[1]);
+      ui64toa_sep((apr_uint64_t)(-number), separator, &buffer[1]);
     }
   else
-    ui64toa_sep((apr_uint64_t)(number), seperator, buffer);
+    ui64toa_sep((apr_uint64_t)(number), separator, buffer);
 
   return apr_pstrdup(pool, buffer);
 }
 
-unsigned int
+apr_size_t
+svn__ui64tobase36(char *dest, apr_uint64_t value)
+{
+  char *dest_start = dest;
+  if (value < 10)
+    {
+      /* pretty frequent and trivial case. Make it fast. */
+      *(dest++) = (char)(value) + '0';
+    }
+  else
+    {
+      char buffer[SVN_INT64_BUFFER_SIZE];
+      char *p = buffer;
+
+      /* write result as little-endian to buffer */
+      while (value > 0)
+        {
+          char c = (char)(value % 36);
+          value /= 36;
+
+          *p = (c <= 9) ? (c + '0') : (c - 10 + 'a');
+          ++p;
+        }
+
+      /* copy as big-endian to DEST */
+      while (p > buffer)
+        *(dest++) = *(--p);
+    }
+
+  *dest = '\0';
+  return dest - dest_start;
+}
+
+apr_uint64_t
+svn__base36toui64(const char **next, const char *source)
+{
+  apr_uint64_t result = 0;
+  apr_uint64_t factor = 1;
+  int i  = 0;
+  char digits[SVN_INT64_BUFFER_SIZE];
+
+  /* convert digits to numerical values and count the number of places.
+   * Also, prevent buffer overflow. */
+  while (i < sizeof(digits))
+    {
+      char c = *source;
+      if (c < 'a')
+        {
+          /* includes detection of NUL terminator */
+          if (c < '0' || c > '9')
+            break;
+
+          c -= '0';
+        }
+      else
+        {
+          if (c < 'a' || c > 'z')
+            break;
+
+          c -= 'a' - 10;
+        }
+
+      digits[i++] = c;
+      source++;
+    }
+
+  /* fold digits into the result */
+  while (i > 0)
+    {
+      result += factor * (apr_uint64_t)digits[--i];
+      factor *= 36;
+    }
+
+  if (next)
+    *next = source;
+
+  return result;
+}
+
+
+apr_size_t
 svn_cstring__similarity(const char *stra, const char *strb,
                         svn_membuf_t *buffer, apr_size_t *rlcs)
 {
@@ -1178,7 +1312,7 @@ svn_cstring__similarity(const char *stra, const char *strb,
   return svn_string__similarity(&stringa, &stringb, buffer, rlcs);
 }
 
-unsigned int
+apr_size_t
 svn_string__similarity(const svn_string_t *stringa,
                        const svn_string_t *stringb,
                        svn_membuf_t *buffer, apr_size_t *rlcs)
@@ -1242,7 +1376,7 @@ svn_string__similarity(const svn_string_t *stringa,
       /* Calculate LCS length of the remainder */
       for (pstr = stra; pstr < enda; ++pstr)
         {
-          int i;
+          apr_size_t i;
           for (i = 1; i <= slots; ++i)
             {
               if (*pstr == strb[i-1])
@@ -1267,7 +1401,84 @@ svn_string__similarity(const svn_string_t *stringa,
 
   /* Return similarity ratio rounded to 4 significant digits */
   if (total)
-    return(unsigned int)((2000 * lcs + total/2) / total);
+    return ((2 * SVN_STRING__SIM_RANGE_MAX * lcs + total/2) / total);
   else
-    return 1000;
+    return SVN_STRING__SIM_RANGE_MAX;
+}
+
+apr_size_t
+svn_cstring__match_length(const char *a,
+                          const char *b,
+                          apr_size_t max_len)
+{
+  apr_size_t pos = 0;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+  /* Chunky processing is so much faster ...
+   *
+   * We can't make this work on architectures that require aligned access
+   * because A and B will probably have different alignment. So, skipping
+   * the first few chars until alignment is reached is not an option.
+   */
+  for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
+    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
+      break;
+
+#endif
+
+  for (; pos < max_len; ++pos)
+    if (a[pos] != b[pos])
+      break;
+
+  return pos;
+}
+
+apr_size_t
+svn_cstring__reverse_match_length(const char *a,
+                                  const char *b,
+                                  apr_size_t max_len)
+{
+  apr_size_t pos = 0;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+  /* Chunky processing is so much faster ...
+   *
+   * We can't make this work on architectures that require aligned access
+   * because A and B will probably have different alignment. So, skipping
+   * the first few chars until alignment is reached is not an option.
+   */
+  for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
+    if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
+      break;
+
+  pos -= sizeof(apr_size_t);
+
+#endif
+
+  /* If we find a mismatch at -pos, pos-1 characters matched.
+   */
+  while (++pos <= max_len)
+    if (a[0-pos] != b[0-pos])
+      return pos - 1;
+
+  /* No mismatch found -> at least MAX_LEN matching chars.
+   */
+  return max_len;
+}
+
+const char *
+svn_cstring_skip_prefix(const char *str, const char *prefix)
+{
+  apr_size_t len = strlen(prefix);
+
+  if (strncmp(str, prefix, len) == 0)
+    {
+      return str + len;
+    }
+  else
+    {
+      return NULL;
+    }
 }
author	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-08-05 16:22:51 +0000
committer	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-08-05 16:22:51 +0000
commit	cf46733632c7279a9fd0fe6ce26f9185a4ae82a9 (patch)
tree	da27775a2161723ef342e91af41a8b51fedef405 /subversion/libsvn_subr/string.c
parent	bb0ef45f7c46b0ae221b26265ef98a768c33f820 (diff)
download	subversion-tarball-master.tar.gz