summaryrefslogtreecommitdiff
path: root/Zend/zend_string.h
diff options
context:
space:
mode:
Diffstat (limited to 'Zend/zend_string.h')
-rw-r--r--Zend/zend_string.h287
1 files changed, 242 insertions, 45 deletions
diff --git a/Zend/zend_string.h b/Zend/zend_string.h
index 978426e7c5..dfdc74eda8 100644
--- a/Zend/zend_string.h
+++ b/Zend/zend_string.h
@@ -24,70 +24,267 @@
#include "zend.h"
BEGIN_EXTERN_C()
-ZEND_API extern const char *(*zend_new_interned_string)(const char *str, int len, int free_src TSRMLS_DC);
+
+ZEND_API extern zend_string *(*zend_new_interned_string)(zend_string *str TSRMLS_DC);
ZEND_API extern void (*zend_interned_strings_snapshot)(TSRMLS_D);
ZEND_API extern void (*zend_interned_strings_restore)(TSRMLS_D);
+ZEND_API zend_ulong zend_hash_func(const char *str, uint len);
void zend_interned_strings_init(TSRMLS_D);
void zend_interned_strings_dtor(TSRMLS_D);
+
END_EXTERN_C()
-#ifndef ZTS
+#define IS_INTERNED(s) (GC_FLAGS(s) & IS_STR_INTERNED)
+
+#define STR_HASH_VAL(s) zend_str_hash_val(s)
+#define STR_FORGET_HASH_VAL(s) zend_str_forget_hash_val(s)
+
+#define STR_REFCOUNT(s) zend_str_refcount(s)
+#define STR_ADDREF(s) zend_str_addref(s)
+#define STR_DELREF(s) zend_str_delref(s)
+#define STR_ALLOC(len, persistent) zend_str_alloc(len, persistent)
+#define STR_SAFE_ALLOC(n, m, l, p) zend_str_safe_alloc(n, m, l, p)
+#define STR_INIT(str, len, persistent) zend_str_init(str, len, persistent)
+#define STR_COPY(s) zend_str_copy(s)
+#define STR_DUP(s, persistent) zend_str_dup(s, persistent)
+#define STR_REALLOC(s, len, persistent) zend_str_realloc(s, len, persistent)
+#define STR_SAFE_REALLOC(s, n, m, l, p) zend_str_safe_realloc(s, n, m, l, p)
+#define STR_FREE(s) zend_str_free(s)
+#define STR_RELEASE(s) zend_str_release(s)
+#define STR_EMPTY_ALLOC() CG(empty_string)
+
+#define _STR_HEADER_SIZE XtOffsetOf(zend_string, val)
-#define IS_INTERNED(s) \
- (((s) >= CG(interned_strings_start)) && ((s) < CG(interned_strings_end)))
+#define STR_ALLOCA_ALLOC(str, _len, use_heap) do { \
+ (str) = do_alloca(_STR_HEADER_SIZE + (_len) + 1, (use_heap)); \
+ GC_REFCOUNT(str) = 1; \
+ (str)->h = 0; \
+ (str)->len = (_len); \
+} while (0)
+#define STR_ALLOCA_INIT(str, s, len, use_heap) do { \
+ STR_ALLOCA_ALLOC(str, len, use_heap); \
+ memcpy((str)->val, (s), (len)); \
+ (str)->val[(len)] = '\0'; \
+} while (0)
+
+#define STR_ALLOCA_FREE(str, use_heap) free_alloca(str, use_heap)
+
+static zend_always_inline zend_ulong zend_str_hash_val(zend_string *s)
+{
+ if (!s->h) {
+ s->h = zend_hash_func(s->val, s->len);
+ }
+ return s->h;
+}
+
+static zend_always_inline void zend_str_forget_hash_val(zend_string *s)
+{
+ s->h = 0;
+}
+static zend_always_inline zend_uint zend_str_refcount(zend_string *s)
+{
+ if (!IS_INTERNED(s)) {
+ return GC_REFCOUNT(s);
+ }
+ return 1;
+}
+
+static zend_always_inline zend_uint zend_str_addref(zend_string *s)
+{
+ if (!IS_INTERNED(s)) {
+ return ++GC_REFCOUNT(s);
+ }
+ return 1;
+}
+
+static zend_always_inline zend_uint zend_str_delref(zend_string *s)
+{
+ if (!IS_INTERNED(s)) {
+ return --GC_REFCOUNT(s);
+ }
+ return 1;
+}
+
+static zend_always_inline zend_string *zend_str_alloc(int len, int persistent)
+{
+ zend_string *ret = pemalloc(_STR_HEADER_SIZE + len + 1, persistent);
+
+ GC_REFCOUNT(ret) = 1;
+#if 1
+ /* optimized single assignment */
+ GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << 8);
#else
+ GC_TYPE(ret) = IS_STRING;
+ GC_FLAGS(ret) = (persistent ? IS_STR_PERSISTENT : 0);
+ GC_INFO(ret) = 0;
+#endif
+ ret->h = 0;
+ ret->len = len;
+ return ret;
+}
-#define IS_INTERNED(s) \
- (0)
+static zend_always_inline zend_string *zend_str_safe_alloc(size_t n, size_t m, size_t l, int persistent)
+{
+ zend_string *ret = safe_pemalloc(n, m, _STR_HEADER_SIZE + l + 1, persistent);
+ GC_REFCOUNT(ret) = 1;
+#if 1
+ /* optimized single assignment */
+ GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << 8);
+#else
+ GC_TYPE(ret) = IS_STRING;
+ GC_FLAGS(ret) = (persistent ? IS_STR_PERSISTENT : 0);
+ GC_INFO(ret) = 0;
#endif
+ ret->h = 0;
+ ret->len = (n * m) + l;
+ return ret;
+}
+
+static zend_always_inline zend_string *zend_str_init(const char *str, int len, int persistent)
+{
+ zend_string *ret = STR_ALLOC(len, persistent);
+
+ memcpy(ret->val, str, len);
+ ret->val[len] = '\0';
+ return ret;
+}
-#define INTERNED_LEN(s) \
- (((Bucket*)(((char*)(s))-sizeof(Bucket)))->nKeyLength)
-
-#define INTERNED_HASH(s) \
- (((Bucket*)(((char*)(s))-sizeof(Bucket)))->h)
-
-#define str_efree(s) do { \
- if (!IS_INTERNED(s)) { \
- efree((char*)s); \
- } \
- } while (0)
-
-#define str_efree_rel(s) do { \
- if (!IS_INTERNED(s)) { \
- efree_rel((char *)s); \
- } \
- } while (0)
-
-#define str_free(s) do { \
- if (!IS_INTERNED(s)) { \
- free((char*)s); \
- } \
- } while (0)
-
-#define str_erealloc(str, new_len) \
- (IS_INTERNED(str) \
- ? _str_erealloc(str, new_len, INTERNED_LEN(str)) \
- : erealloc(str, new_len))
-
-static inline char *_str_erealloc(char *str, size_t new_len, size_t old_len) {
- char *buf = (char *) emalloc(new_len);
- memcpy(buf, str, old_len);
- return buf;
+static zend_always_inline zend_string *zend_str_copy(zend_string *s)
+{
+ if (!IS_INTERNED(s)) {
+ STR_ADDREF(s);
+ }
+ return s;
}
-#define str_estrndup(str, len) \
- (IS_INTERNED(str) ? (str) : estrndup((str), (len)))
+static zend_always_inline zend_string *zend_str_dup(zend_string *s, int persistent)
+{
+ if (IS_INTERNED(s)) {
+ return s;
+ } else {
+ return STR_INIT(s->val, s->len, persistent);
+ }
+}
+
+static zend_always_inline zend_string *zend_str_realloc(zend_string *s, int len, int persistent)
+{
+ zend_string *ret;
-#define str_strndup(str, len) \
- (IS_INTERNED(str) ? (str) : zend_strndup((str), (len)));
+ if (IS_INTERNED(s)) {
+ ret = STR_ALLOC(len, persistent);
+ memcpy(ret->val, s->val, (len > s->len ? s->len : len) + 1);
+ } else if (STR_REFCOUNT(s) == 1) {
+ ret = perealloc(s, _STR_HEADER_SIZE + len + 1, persistent);
+ ret->len = len;
+ STR_FORGET_HASH_VAL(ret);
+ } else {
+ ret = STR_ALLOC(len, persistent);
+ memcpy(ret->val, s->val, (len > s->len ? s->len : len) + 1);
+ STR_DELREF(s);
+ }
+ return ret;
+}
-#define str_hash(str, len) \
- (IS_INTERNED(str) ? INTERNED_HASH(str) : zend_hash_func((str), (len)+1))
+static zend_always_inline zend_string *zend_str_safe_realloc(zend_string *s, size_t n, size_t m, size_t l, int persistent)
+{
+ zend_string *ret;
+
+ if (IS_INTERNED(s)) {
+ ret = STR_SAFE_ALLOC(n, m, l, persistent);
+ memcpy(ret->val, s->val, ((n * m) + l > s->len ? s->len : ((n * m) + l)) + 1);
+ } else if (STR_REFCOUNT(s) == 1) {
+ ret = safe_perealloc(s, n, m, _STR_HEADER_SIZE + l + 1, persistent);
+ ret->len = (n * m) + l;
+ STR_FORGET_HASH_VAL(ret);
+ } else {
+ ret = STR_SAFE_ALLOC(n, m, l, persistent);
+ memcpy(ret->val, s->val, ((n * m) + l > s->len ? s->len : ((n * m) + l)) + 1);
+ STR_DELREF(s);
+ }
+ return ret;
+}
+static zend_always_inline void zend_str_free(zend_string *s)
+{
+ if (!IS_INTERNED(s)) {
+ ZEND_ASSERT(STR_REFCOUNT(s) <= 1);
+ pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
+ }
+}
+
+static zend_always_inline void zend_str_release(zend_string *s)
+{
+ if (!IS_INTERNED(s)) {
+ if (STR_DELREF(s) == 0) {
+ pefree(s, GC_FLAGS(s) & IS_STR_PERSISTENT);
+ }
+ }
+}
+
+/*
+ * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
+ *
+ * This is Daniel J. Bernstein's popular `times 33' hash function as
+ * posted by him years ago on comp.lang.c. It basically uses a function
+ * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
+ * known hash functions for strings. Because it is both computed very
+ * fast and distributes very well.
+ *
+ * The magic of number 33, i.e. why it works better than many other
+ * constants, prime or not, has never been adequately explained by
+ * anyone. So I try an explanation: if one experimentally tests all
+ * multipliers between 1 and 256 (as RSE did now) one detects that even
+ * numbers are not useable at all. The remaining 128 odd numbers
+ * (except for the number 1) work more or less all equally well. They
+ * all distribute in an acceptable way and this way fill a hash table
+ * with an average percent of approx. 86%.
+ *
+ * If one compares the Chi^2 values of the variants, the number 33 not
+ * even has the best value. But the number 33 and a few other equally
+ * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
+ * advantage to the remaining numbers in the large set of possible
+ * multipliers: their multiply operation can be replaced by a faster
+ * operation based on just one shift plus either a single addition
+ * or subtraction operation. And because a hash function has to both
+ * distribute good _and_ has to be very fast to compute, those few
+ * numbers should be preferred and seems to be the reason why Daniel J.
+ * Bernstein also preferred it.
+ *
+ *
+ * -- Ralf S. Engelschall <rse@engelschall.com>
+ */
+
+static inline ulong zend_inline_hash_func(const char *str, uint len)
+{
+ register ulong hash = 5381;
+
+ /* variant with the hash unrolled eight times */
+ for (; len >= 8; len -= 8) {
+ hash = ((hash << 5) + hash) + *str++;
+ hash = ((hash << 5) + hash) + *str++;
+ hash = ((hash << 5) + hash) + *str++;
+ hash = ((hash << 5) + hash) + *str++;
+ hash = ((hash << 5) + hash) + *str++;
+ hash = ((hash << 5) + hash) + *str++;
+ hash = ((hash << 5) + hash) + *str++;
+ hash = ((hash << 5) + hash) + *str++;
+ }
+ switch (len) {
+ case 7: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
+ case 6: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
+ case 5: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
+ case 4: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
+ case 3: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
+ case 2: hash = ((hash << 5) + hash) + *str++; /* fallthrough... */
+ case 1: hash = ((hash << 5) + hash) + *str++; break;
+ case 0: break;
+EMPTY_SWITCH_DEFAULT_CASE()
+ }
+ return hash;
+}
#endif /* ZEND_STRING_H */