summaryrefslogtreecommitdiff
path: root/ext/standard/html.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/standard/html.c')
-rw-r--r--ext/standard/html.c99
1 files changed, 47 insertions, 52 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c
index fd210c8086..7564fb8217 100644
--- a/ext/standard/html.c
+++ b/ext/standard/html.c
@@ -936,8 +936,7 @@ static inline size_t write_octet_sequence(unsigned char *buf, enum entity_charse
static void traverse_for_entities(
const char *old,
size_t oldlen,
- char *ret, /* should have allocated TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(olden) */
- size_t *retlen,
+ zend_string *ret, /* should have allocated TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(olden) */
int all,
int flags,
const entity_ht *inv_map,
@@ -951,7 +950,7 @@ static void traverse_for_entities(
lim = old + oldlen; /* terminator address */
assert(*lim == '\0');
- for (p = old, q = ret; p < lim;) {
+ for (p = old, q = ret->val; p < lim;) {
unsigned code, code2 = 0;
const char *next = NULL; /* when set, next > p, otherwise possible inf loop */
@@ -1024,9 +1023,9 @@ static void traverse_for_entities(
goto invalid_code; /* not representable in target charset */
}
- q += write_octet_sequence(q, charset, code);
+ q += write_octet_sequence((unsigned char*)q, charset, code);
if (code2) {
- q += write_octet_sequence(q, charset, code2);
+ q += write_octet_sequence((unsigned char*)q, charset, code2);
}
/* jump over the valid entity; may go beyond size of buffer; np */
@@ -1040,7 +1039,7 @@ invalid_code:
}
*q = '\0';
- *retlen = (size_t)(q - ret);
+ ret->len = (size_t)(q - ret->val);
}
/* }}} */
@@ -1095,10 +1094,10 @@ static entity_table_opt determine_entity_table(int all, int doctype)
* only the basic ones, i.e., those in basic_entities_ex + the numeric entities
* that correspond to quotes.
*/
-PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC)
+PHPAPI zend_string *php_unescape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset TSRMLS_DC)
{
size_t retlen;
- char *ret;
+ zend_string *ret;
enum entity_charset charset;
const entity_ht *inverse_map = NULL;
size_t new_size = TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(oldlen);
@@ -1113,12 +1112,13 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_
if (oldlen > new_size) {
/* overflow, refuse to do anything */
- ret = estrndup((char*)old, oldlen);
+ ret = STR_INIT((char*)old, oldlen, 0);
retlen = oldlen;
goto empty_source;
}
- ret = emalloc(new_size);
- *ret = '\0';
+ ret = STR_ALLOC(new_size, 0);
+ ret->val[0] = '\0';
+ ret->len = oldlen;
retlen = oldlen;
if (retlen == 0) {
goto empty_source;
@@ -1127,17 +1127,16 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_
inverse_map = unescape_inverse_map(all, flags);
/* replace numeric entities */
- traverse_for_entities(old, oldlen, ret, &retlen, all, flags, inverse_map, charset);
+ traverse_for_entities((char*)old, oldlen, ret, all, flags, inverse_map, charset);
empty_source:
- *newlen = retlen;
return ret;
}
/* }}} */
-PHPAPI char *php_escape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC)
+PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset TSRMLS_DC)
{
- return php_escape_html_entities_ex(old, oldlen, newlen, all, flags, hint_charset, 1 TSRMLS_CC);
+ return php_escape_html_entities_ex(old, oldlen, all, flags, hint_charset, 1 TSRMLS_CC);
}
/* {{{ find_entity_for_char */
@@ -1223,10 +1222,10 @@ static inline void find_entity_for_char_basic(
/* {{{ php_escape_html_entities
*/
-PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset, zend_bool double_encode TSRMLS_DC)
+PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode TSRMLS_DC)
{
size_t cursor, maxlen, len;
- char *replaced;
+ zend_string *replaced;
enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
int doctype = flags & ENT_HTML_DOC_TYPE_MASK;
entity_table_opt entity_table;
@@ -1276,7 +1275,7 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size
}
}
- replaced = emalloc(maxlen + 1); /* adding 1 is safe: maxlen is even */
+ replaced = STR_ALLOC(maxlen, 0);
len = 0;
cursor = 0;
while (cursor < oldlen) {
@@ -1289,7 +1288,7 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size
/* guarantee we have at least 40 bytes to write.
* In HTML5, entities may take up to 33 bytes */
if (len > maxlen - 40) { /* maxlen can never be smaller than 128 */
- replaced = safe_erealloc(replaced, maxlen , 1, 128 + 1);
+ replaced = STR_SAFE_REALLOC(replaced, maxlen, 1, 128, 0);
maxlen += 128;
}
@@ -1298,12 +1297,11 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size
if (flags & ENT_HTML_IGNORE_ERRORS) {
continue;
} else if (flags & ENT_HTML_SUBSTITUTE_ERRORS) {
- memcpy(&replaced[len], replacement, replacement_len);
+ memcpy(&replaced->val[len], replacement, replacement_len);
len += replacement_len;
continue;
} else {
- efree(replaced);
- *newlen = 0;
+ STR_FREE(replaced);
return STR_EMPTY_ALLOC();
}
} else { /* SUCCESS */
@@ -1336,10 +1334,10 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size
}
if (rep != NULL) {
- replaced[len++] = '&';
- memcpy(&replaced[len], rep, rep_len);
+ replaced->val[len++] = '&';
+ memcpy(&replaced->val[len], rep, rep_len);
len += rep_len;
- replaced[len++] = ';';
+ replaced->val[len++] = ';';
} else {
/* we did not find an entity for this char.
* check for its validity, if its valid pass it unchanged */
@@ -1373,16 +1371,16 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size
}
pass_char_through:
if (mbseqlen > 1) {
- memcpy(replaced + len, mbsequence, mbseqlen);
+ memcpy(replaced->val + len, mbsequence, mbseqlen);
len += mbseqlen;
} else {
- replaced[len++] = mbsequence[0];
+ replaced->val[len++] = mbsequence[0];
}
}
} else { /* this_char == '&' */
if (double_encode) {
encode_amp:
- memcpy(&replaced[len], "&amp;", sizeof("&amp;") - 1);
+ memcpy(&replaced->val[len], "&amp;", sizeof("&amp;") - 1);
len += sizeof("&amp;") - 1;
} else { /* no double encode */
/* check if entity is valid */
@@ -1422,19 +1420,19 @@ encode_amp:
/* at this point maxlen - len >= 40 */
if (maxlen - len < ent_len + 2 /* & and ; */) {
/* ent_len < oldlen, which is certainly <= SIZE_MAX/2 */
- replaced = safe_erealloc(replaced, maxlen, 1, ent_len + 128 + 1);
+ replaced = STR_SAFE_REALLOC(replaced, maxlen, 1, ent_len + 128, 0);
maxlen += ent_len + 128;
}
- replaced[len++] = '&';
- memcpy(&replaced[len], &old[cursor], ent_len);
+ replaced->val[len++] = '&';
+ memcpy(&replaced->val[len], &old[cursor], ent_len);
len += ent_len;
- replaced[len++] = ';';
+ replaced->val[len++] = ';';
cursor += ent_len + 1;
}
}
}
- replaced[len] = '\0';
- *newlen = len;
+ replaced->val[len] = '\0';
+ replaced->len = len;
return replaced;
}
@@ -1446,9 +1444,8 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
{
char *str, *hint_charset = NULL;
int str_len, hint_charset_len = 0;
- size_t new_len;
long flags = ENT_COMPAT;
- char *replaced;
+ zend_string *replaced;
zend_bool double_encode = 1;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls!b", &str, &str_len, &flags, &hint_charset, &hint_charset_len, &double_encode) == FAILURE) {
@@ -1458,9 +1455,8 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
if (!hint_charset) {
hint_charset = get_default_charset(TSRMLS_C);
}
- replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
-
- RETVAL_STRINGL(replaced, (int)new_len, 0);
+ replaced = php_escape_html_entities_ex((unsigned char*)str, str_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
+ RETVAL_STR(replaced);
}
/* }}} */
@@ -1500,17 +1496,16 @@ PHP_FUNCTION(htmlspecialchars_decode)
{
char *str;
int str_len;
- size_t new_len = 0;
long quote_style = ENT_COMPAT;
- char *replaced;
+ zend_string *replaced;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &str_len, &quote_style) == FAILURE) {
return;
}
- replaced = php_unescape_html_entities(str, str_len, &new_len, 0 /*!all*/, quote_style, NULL TSRMLS_CC);
+ replaced = php_unescape_html_entities((unsigned char*)str, str_len, 0 /*!all*/, quote_style, NULL TSRMLS_CC);
if (replaced) {
- RETURN_STRINGL(replaced, (int)new_len, 0);
+ RETURN_STR(replaced);
}
RETURN_FALSE;
}
@@ -1521,10 +1516,10 @@ PHP_FUNCTION(htmlspecialchars_decode)
PHP_FUNCTION(html_entity_decode)
{
char *str, *hint_charset = NULL;
- int str_len, hint_charset_len;
+ int str_len, hint_charset_len = 0;
size_t new_len = 0;
long quote_style = ENT_COMPAT;
- char *replaced;
+ zend_string *replaced;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls", &str, &str_len,
&quote_style, &hint_charset, &hint_charset_len) == FAILURE) {
@@ -1534,10 +1529,10 @@ PHP_FUNCTION(html_entity_decode)
if (!hint_charset) {
hint_charset = get_default_charset(TSRMLS_C);
}
- replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
+ replaced = php_unescape_html_entities((unsigned char*)str, str_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
if (replaced) {
- RETURN_STRINGL(replaced, (int)new_len, 0);
+ RETURN_STR(replaced);
}
RETURN_FALSE;
}
@@ -1563,13 +1558,13 @@ static inline void write_s3row_data(
char entity[LONGEST_ENTITY_LENGTH + 2] = {'&'};
size_t written_k1;
- written_k1 = write_octet_sequence(key, charset, orig_cp);
+ written_k1 = write_octet_sequence((unsigned char*)key, charset, orig_cp);
if (!r->ambiguous) {
size_t l = r->data.ent.entity_len;
memcpy(&entity[1], r->data.ent.entity, l);
entity[l + 1] = ';';
- add_assoc_stringl_ex(arr, key, written_k1 + 1, entity, l + 2, 1);
+ add_assoc_stringl_ex(arr, key, written_k1, entity, l + 2);
} else {
unsigned i,
num_entries;
@@ -1579,7 +1574,7 @@ static inline void write_s3row_data(
size_t l = mcpr[0].leading_entry.default_entity_len;
memcpy(&entity[1], mcpr[0].leading_entry.default_entity, l);
entity[l + 1] = ';';
- add_assoc_stringl_ex(arr, key, written_k1 + 1, entity, l + 2, 1);
+ add_assoc_stringl_ex(arr, key, written_k1, entity, l + 2);
}
num_entries = mcpr[0].leading_entry.size;
for (i = 1; i <= num_entries; i++) {
@@ -1598,11 +1593,11 @@ static inline void write_s3row_data(
spe_cp = uni_cp;
}
- written_k2 = write_octet_sequence(&key[written_k1], charset, spe_cp);
+ written_k2 = write_octet_sequence((unsigned char*)&key[written_k1], charset, spe_cp);
memcpy(&entity[1], mcpr[i].normal_entry.entity, l);
entity[l + 1] = ';';
entity[l + 1] = '\0';
- add_assoc_stringl_ex(arr, key, written_k1 + written_k2 + 1, entity, l + 1, 1);
+ add_assoc_stringl_ex(arr, key, written_k1 + written_k2, entity, l + 1);
}
}
}