summaryrefslogtreecommitdiff
path: root/main/streams/streams.c
diff options
context:
space:
mode:
Diffstat (limited to 'main/streams/streams.c')
-rwxr-xr-xmain/streams/streams.c996
1 files changed, 827 insertions, 169 deletions
diff --git a/main/streams/streams.c b/main/streams/streams.c
index d3ede8fc44..fd69a4b36f 100755
--- a/main/streams/streams.c
+++ b/main/streams/streams.c
@@ -368,9 +368,11 @@ fprintf(stderr, "stream_free: %s:%p[%s] preserve_handle=%d release_cast=%d remov
stream->wrapperdata = NULL;
}
- if (stream->readbuf) {
- pefree(stream->readbuf, stream->is_persistent);
- stream->readbuf = NULL;
+ while (stream->readbuf.head) {
+ php_stream_bucket *bucket = stream->readbuf.head;
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
}
if (stream->is_persistent && (close_options & PHP_STREAM_FREE_PERSISTENT)) {
@@ -422,8 +424,6 @@ fprintf(stderr, "stream_free: %s:%p[%s] preserve_handle=%d release_cast=%d remov
static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_DC)
{
- /* allocate/fill the buffer */
-
if (stream->readfilters.head) {
char *chunk_buf;
int err_flag = 0;
@@ -433,7 +433,7 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D
/* allocate a buffer for reading chunks */
chunk_buf = emalloc(stream->chunk_size);
- while (!err_flag && (stream->writepos - stream->readpos < (off_t)size)) {
+ while (!err_flag && (stream->readbuf_avail < (off_t)size)) {
size_t justread = 0;
int flags;
php_stream_bucket *bucket;
@@ -475,22 +475,38 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D
/* we get here when the last filter in the chain has data to pass on.
* in this situation, we are passing the brig_in brigade into the
* stream read buffer */
- while (brig_inp->head) {
- bucket = brig_inp->head;
- /* grow buffer to hold this bucket
- * TODO: this can fail for persistent streams */
- if (stream->readbuflen - stream->writepos < bucket->buflen) {
- stream->readbuflen += bucket->buflen;
- stream->readbuf = perealloc(stream->readbuf, stream->readbuflen,
- stream->is_persistent);
- }
- memcpy(stream->readbuf + stream->writepos, bucket->buf, bucket->buflen);
- stream->writepos += bucket->buflen;
-
+ while ((bucket = brig_inp->head)) {
+ php_stream_bucket *tail = stream->readbuf.tail;
php_stream_bucket_unlink(bucket TSRMLS_CC);
- php_stream_bucket_delref(bucket TSRMLS_CC);
+ if (bucket->is_unicode &&
+ U16_IS_SURROGATE(*bucket->buf.ustr.val) &&
+ !U16_IS_SURROGATE_LEAD(*bucket->buf.ustr.val) &&
+ tail && tail->is_unicode &&
+ U16_IS_SURROGATE(tail->buf.ustr.val[tail->buf.ustr.len - 1]) &&
+ U16_IS_SURROGATE_LEAD(tail->buf.ustr.val[tail->buf.ustr.len - 1])) {
+ /* Surrogate pair got split between buckets -- Unlikely */
+ UChar *tmp;
+
+ tmp = peumalloc(bucket->buf.ustr.len + 1, bucket->is_persistent);
+ *tmp = stream->readbuf.tail->buf.ustr.val[--tail->buf.ustr.len];
+ memmove(tmp + UBYTES(1), bucket->buf.ustr.val, UBYTES(bucket->buf.ustr.len));
+ pefree(bucket->buf.ustr.val, bucket->is_persistent);
+ bucket->buf.ustr.val = tmp;
+
+ if (tail->buf.ustr.len <= 0) {
+ /* Tail was only a one UChar bucket */
+ php_stream_bucket_unlink(tail TSRMLS_CC);
+ php_stream_bucket_delref(tail TSRMLS_CC);
+ } else if (tail == stream->readbuf.head && (tail->buf.ustr.len <= stream->readbuf_ofs)) {
+ /* Tail was head and last char was only unused portion */
+ php_stream_bucket_unlink(tail TSRMLS_CC);
+ php_stream_bucket_delref(tail TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ }
+ }
+ php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC);
+ stream->readbuf_avail += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len;
}
-
break;
case PSFS_FEED_ME:
@@ -520,30 +536,22 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D
} else {
/* is there enough data in the buffer ? */
- if (stream->writepos - stream->readpos < (off_t)size) {
+ if (stream->readbuf_avail < (off_t)size) {
+ char *chunk_buf;
size_t justread = 0;
+ int is_persistent = php_stream_is_persistent(stream);
- /* reduce buffer memory consumption if possible, to avoid a realloc */
- if (stream->readbuf && stream->readbuflen - stream->writepos < stream->chunk_size) {
- memmove(stream->readbuf, stream->readbuf + stream->readpos, stream->readbuflen - stream->readpos);
- stream->writepos -= stream->readpos;
- stream->readpos = 0;
- }
-
- /* grow the buffer if required
- * TODO: this can fail for persistent streams */
- if (stream->readbuflen - stream->writepos < stream->chunk_size) {
- stream->readbuflen += stream->chunk_size;
- stream->readbuf = perealloc(stream->readbuf, stream->readbuflen,
- stream->is_persistent);
- }
+ chunk_buf = pemalloc(stream->chunk_size, is_persistent);
+ justread = stream->ops->read(stream, chunk_buf, stream->chunk_size TSRMLS_CC);
- justread = stream->ops->read(stream, stream->readbuf + stream->writepos,
- stream->readbuflen - stream->writepos
- TSRMLS_CC);
+ if (justread == (size_t)-1 || justread == 0) {
+ pefree(chunk_buf, is_persistent);
+ } else {
+ php_stream_bucket *bucket;
- if (justread != (size_t)-1) {
- stream->writepos += justread;
+ bucket = php_stream_bucket_new(stream, chunk_buf, justread, 1, is_persistent TSRMLS_CC);
+ php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC);
+ stream->readbuf_avail += justread;
}
}
}
@@ -551,23 +559,32 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D
PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS_DC)
{
+ php_stream_bucket *bucket;
size_t toread = 0, didread = 0;
while (size > 0) {
-
/* take from the read buffer first.
* It is possible that a buffered stream was switched to non-buffered, so we
* drain the remainder of the buffer before using the "raw" read mode for
* the excess */
- if (stream->writepos > stream->readpos) {
- toread = stream->writepos - stream->readpos;
+ while (size > 0 && (bucket = stream->readbuf.head)) {
+ if (bucket->is_unicode) {
+ /* This is an string read func, convert to string first */
+ php_stream_bucket_tostring(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC);
+ }
+ toread = bucket->buf.str.len - stream->readbuf_ofs;
if (toread > size) {
toread = size;
}
-
- memcpy(buf, stream->readbuf + stream->readpos, toread);
- stream->readpos += toread;
+ memcpy(buf, bucket->buf.str.val + stream->readbuf_ofs, toread);
+ stream->readbuf_ofs += toread;
+ stream->readbuf_avail -= toread;
+ if (stream->readbuf_ofs >= bucket->buf.str.len) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ }
size -= toread;
buf += toread;
didread += toread;
@@ -578,32 +595,90 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS
break;
}
+ /* just break anyway, to avoid greedy read */
+ if (didread > 0 && (stream->wrapper != &php_plain_files_wrapper)) {
+ break;
+ }
+
if (!stream->readfilters.head && (stream->flags & PHP_STREAM_FLAG_NO_BUFFER || stream->chunk_size == 1)) {
toread = stream->ops->read(stream, buf, size TSRMLS_CC);
- } else {
- php_stream_fill_read_buffer(stream, size TSRMLS_CC);
+ if (toread <= 0) {
+ break;
+ }
+ buf += toread;
+ size -= toread;
+ didread += toread;
+ continue;
+ }
+
+ php_stream_fill_read_buffer(stream, size TSRMLS_CC);
+ if (stream->readbuf_avail <= 0) {
+ /* EOF, or temporary end of data (for non-blocking mode). */
+ break;
+ }
+ }
+
+ if (didread > 0) {
+ stream->position += didread;
+ }
+ return didread;
+}
- toread = stream->writepos - stream->readpos;
+PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int32_t size TSRMLS_DC)
+{
+ php_stream_bucket *bucket;
+ size_t toread = 0, didread = 0;
+
+ while (size > 0) {
+ /* take from the read buffer first.
+ * It is possible that a buffered stream was switched to non-buffered, so we
+ * drain the remainder of the buffer before using the "raw" read mode for
+ * the excess */
+
+ while (size > 0 && (bucket = stream->readbuf.head)) {
+ UChar lastchar = 0;
+
+ if (!bucket->is_unicode) {
+ /* This is a unicode read func, convert to unicode first */
+ php_stream_bucket_tounicode(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC);
+ }
+ toread = bucket->buf.ustr.len - stream->readbuf_ofs;
if (toread > size) {
toread = size;
}
-
- if (toread > 0) {
- memcpy(buf, stream->readbuf + stream->readpos, toread);
- stream->readpos += toread;
+ lastchar = *(bucket->buf.ustr.val + stream->readbuf_ofs + toread - 1);
+ if (U16_IS_SURROGATE(lastchar) && U16_IS_SURROGATE_LEAD(lastchar)) {
+ toread--;
+ /* The only time we should encounter a split surrogate is when the buffer size is truncating the data
+ In this case, reduce size along with toread to avoid getting stuck */
+ size--;
+ }
+ memcpy(buf, bucket->buf.ustr.val + stream->readbuf_ofs, toread * sizeof(UChar));
+ stream->readbuf_ofs += toread;
+ stream->readbuf_avail -= toread;
+ if (stream->readbuf_ofs >= bucket->buf.ustr.len) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
}
- }
- if (toread > 0) {
- didread += toread;
- buf += toread;
size -= toread;
- } else {
- /* EOF, or temporary end of data (for non-blocking mode). */
+ buf += toread;
+ didread += toread;
+ }
+
+ /* ignore eof here; the underlying state might have changed */
+ if (size == 0) {
break;
}
/* just break anyway, to avoid greedy read */
- if (stream->wrapper != &php_plain_files_wrapper) {
+ if (didread > 0 && (stream->wrapper != &php_plain_files_wrapper)) {
+ break;
+ }
+
+ php_stream_fill_read_buffer(stream, size * sizeof(UChar) TSRMLS_CC);
+ if (stream->readbuf_avail <= 0) {
+ /* EOF, or temporary end of data (for non-blocking mode). */
break;
}
}
@@ -615,10 +690,182 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS
return didread;
}
+/* buf mabe NULL (in which case it will be allocated)
+ num_bytes and num_chars must be initialized upon entry to maximum for each (-1 for no maximum)
+ num_bytes/num_chars will be set on exit to actual contents of buf
+ Will return unicode/string type dependent on the first character unit in the read buf
+ Will return as many characters as possible (and permitted by max lengths) without changing unicode/string type
+ Will not split surrogate pairs */
+PHPAPI void *_php_stream_u_read(php_stream *stream, void *buf, int32_t *pnum_bytes, int32_t *pnum_chars, int *pis_unicode TSRMLS_DC)
+{
+ int grow_mode = 0;
+ int32_t num_bytes = 0, num_chars = 0;
+ int32_t max_bytes = *pnum_bytes, max_chars = *pnum_chars;
+ int32_t buflen = buf ? max_bytes : 2048;
+ int32_t bufpos = 0;
+ int is_unicode;
+ php_stream_bucket *bucket;
+
+ /* It's possible that we have a readbuf, but that it's only half of a surrogate pair */
+ if (!stream->readbuf.head ||
+ (stream->readbuf.head == stream->readbuf.tail && stream->readbuf.head->is_unicode &&
+ (stream->readbuf.head->buf.ustr.len - stream->readbuf_ofs) == 1 &&
+ U16_IS_SURROGATE(stream->readbuf.head->buf.ustr.val[stream->readbuf.head->buf.ustr.len-1]))) {
+ php_stream_fill_read_buffer(stream, max_bytes ? max_bytes : (max_chars ? max_chars : stream->chunk_size) TSRMLS_CC);
+ }
+
+
+ if (!stream->readbuf.head ||
+ (stream->readbuf.head == stream->readbuf.tail && stream->readbuf.head->is_unicode &&
+ (stream->readbuf.head->buf.ustr.len - stream->readbuf_ofs) == 1 &&
+ U16_IS_SURROGATE(stream->readbuf.head->buf.ustr.val[stream->readbuf.head->buf.ustr.len-1]))) {
+ /* Nothing to return */
+ *pnum_bytes = 0;
+ *pnum_chars = 0;
+ *pis_unicode = 0;
+ return NULL;
+ }
+
+
+ if (!buf) {
+ grow_mode = 1;
+ buf = emalloc(buflen);
+ }
+
+ is_unicode = stream->readbuf.head->is_unicode;
+ if (is_unicode) {
+ /* normalize byte boundary */
+ if (max_bytes >= 0 && (max_bytes % sizeof(UChar))) {
+ max_bytes -= (max_bytes % sizeof(UChar));
+ }
+ if (max_bytes >= 0 && max_bytes < UBYTES(max_chars)) {
+ /* max_bytes needs to be at least twice max_chars when both are provided */
+ max_chars = (max_bytes / sizeof(UChar));
+ }
+ } else {
+ if (max_chars < 0 && max_bytes >= 0) {
+ max_chars = max_bytes;
+ } else if (max_chars >= 0 && grow_mode) {
+ max_bytes = max_chars;
+ }
+ }
+
+ for (;;) {
+ if (buflen - bufpos < 1024 && max_bytes >= 0 && max_bytes > buflen) {
+ buflen += 1024;
+ if (buflen > max_bytes) {
+ buflen = max_bytes;
+ }
+ buf = erealloc(buf, buflen);
+ }
+
+ if ((bucket = stream->readbuf.head)) {
+ if ((bucket->is_unicode && !is_unicode) ||
+ (!bucket->is_unicode && is_unicode)) {
+ /* data type swap, exit now */
+ break;
+ }
+ if (bucket->is_unicode) {
+ UChar *s = bucket->buf.ustr.val + stream->readbuf_ofs, *p;
+ int bytes_in_buf, chars_in_buf;
+ int32_t ofs = 0;
+
+ chars_in_buf = u_countChar32(s, bucket->buf.ustr.len - stream->readbuf_ofs);
+
+ if (chars_in_buf > max_chars && max_chars >= 0) {
+ chars_in_buf = max_chars;
+ }
+ /* u_countChar32 tells us that we won't overrun anyway */
+ U16_FWD_N_UNSAFE(s, ofs, chars_in_buf);
+ p = s + ofs;
+ bytes_in_buf = UBYTES(ofs);
+ if (bytes_in_buf > (max_bytes - num_bytes)) {
+ bytes_in_buf = max_bytes - num_bytes;
+ bytes_in_buf -= bytes_in_buf & 1; /* normalize */
+ p = s + (bytes_in_buf >> 1);
+ if (p > s && U16_IS_SURROGATE(p[-1]) && U16_IS_SURROGATE_LEAD(p[-1])) {
+ /* Don't split surrogate pairs */
+ p--;
+ bytes_in_buf -= UBYTES(1);
+ }
+ if (bytes_in_buf <= 0) {
+ /* No room to copy data (surrogate pair) */
+ break;
+ }
+ chars_in_buf = u_countChar32(s, p - s);
+ }
+ memcpy((char *)buf + num_bytes, s, bytes_in_buf);
+ num_bytes += bytes_in_buf;
+ num_chars += chars_in_buf;
+ stream->readbuf_ofs += p - s;
+ stream->readbuf_avail -= p - s;
+ if (stream->readbuf_ofs >= bucket->buf.ustr.len) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ } else if (stream->readbuf_ofs == (bucket->buf.ustr.len - 1) &&
+ U16_IS_SURROGATE(bucket->buf.ustr.val[bucket->buf.ustr.len - 1]) &&
+ bucket->next && bucket->next->is_unicode) {
+ /* Only one char left in the bucket, avoid already split surrogates getting "stuck" -- Should never happen thanks to fill_read_buffer */
+ php_stream_bucket *next_bucket = bucket->next;
+
+ bucket->buf.ustr.val = peurealloc(bucket->buf.ustr.val, next_bucket->buf.ustr.len + 1, bucket->is_persistent);
+ bucket->buf.ustr.val[0] = bucket->buf.ustr.val[bucket->buf.ustr.len - 1];
+ memcpy(bucket->buf.ustr.val + 1, next_bucket->buf.ustr.val, UBYTES(next_bucket->buf.ustr.len));
+ php_stream_bucket_unlink(next_bucket TSRMLS_CC);
+ php_stream_bucket_delref(next_bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ } else {
+ /* Reached max limits */
+ break;
+ }
+ } else {
+ int want = (max_chars < 0 || max_chars >= buflen) ? (buflen - num_bytes) : (max_chars - num_chars);
+ int avail = bucket->buf.str.len - stream->readbuf_ofs;
+
+ if (max_bytes >= 0 && want > max_bytes) {
+ want = max_bytes;
+ }
+
+ if (want > avail) {
+ want = avail;
+ }
+
+ memcpy((char *)buf + num_bytes, bucket->buf.str.val + stream->readbuf_ofs, want);
+ stream->readbuf_ofs += want;
+ stream->readbuf_avail -= want;
+ num_bytes += want;
+ num_chars += want;
+ if (stream->readbuf_ofs >= bucket->buf.str.len) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ stream->readbuf_ofs = 0;
+ } else {
+ /* Reached max limit */
+ break;
+ }
+ }
+ } else {
+ /* No more data */
+ break;
+ }
+ }
+ /* Successful exit */
+ *pnum_bytes = num_bytes;
+ *pnum_chars = num_chars;
+ *pis_unicode = is_unicode;
+
+ if (num_chars == 0 && grow_mode) {
+ efree(buf);
+ buf = NULL;
+ }
+ return buf;
+}
+
PHPAPI int _php_stream_eof(php_stream *stream TSRMLS_DC)
{
/* if there is data in the buffer, it's not EOF */
- if (stream->writepos - stream->readpos > 0) {
+ if (stream->readbuf_avail > 0) {
return 0;
}
@@ -684,6 +931,8 @@ PHPAPI int _php_stream_stat(php_stream *stream, php_stream_statbuf *ssb TSRMLS_D
return (stream->ops->stat)(stream, ssb TSRMLS_CC);
}
+/* buf != NULL Still used by file() in ext/standard/file.c
+ buf == NULL semantics no longer supported */
PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len TSRMLS_DC)
{
size_t avail;
@@ -691,8 +940,7 @@ PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len
char *readptr;
if (!buf) {
- readptr = stream->readbuf + stream->readpos;
- avail = stream->writepos - stream->readpos;
+ return NULL;
} else {
readptr = buf;
avail = buf_len;
@@ -725,123 +973,366 @@ PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len
/* If buf == NULL, the buffer will be allocated automatically and will be of an
* appropriate length to hold the line, regardless of the line length, memory
- * permitting */
+ * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL */
PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
size_t *returned_len TSRMLS_DC)
{
- size_t avail = 0;
- size_t current_buf_size = 0;
+ php_stream_bucket *bucket;
size_t total_copied = 0;
- int grow_mode = 0;
- char *bufstart = buf;
+ int growmode = 0;
- if (buf == NULL) {
- grow_mode = 1;
- } else if (maxlen == 0) {
- return NULL;
+ if (!buf) {
+ maxlen = stream->chunk_size + 1;
+ buf = emalloc(maxlen);
+ growmode = 1;
}
- /*
- * If the underlying stream operations block when no new data is readable,
- * we need to take extra precautions.
- *
- * If there is buffered data available, we check for a EOL. If it exists,
- * we pass the data immediately back to the caller. This saves a call
- * to the read implementation and will not block where blocking
- * is not necessary at all.
- *
- * If the stream buffer contains more data than the caller requested,
- * we can also avoid that costly step and simply return that data.
- */
+ /* Leave room for NULL */
+ maxlen--;
- for (;;) {
- avail = stream->writepos - stream->readpos;
+ for(;;) {
+ /* Fill buf with buffered data
+ until no space is left in the buffer
+ or EOL is found */
+ char lastchar = 0;
- if (avail > 0) {
- size_t cpysz = 0;
- char *readptr;
+ /* consumed readbuf if possible */
+ while ((bucket = stream->readbuf.head)) {
char *eol;
- int done = 0;
+ size_t tocopy;
+ size_t wanted = maxlen - total_copied;
+ int bucket_consumed = 0;
- readptr = stream->readbuf + stream->readpos;
- eol = php_stream_locate_eol(stream, NULL, 0 TSRMLS_CC);
+ if (bucket->is_unicode) {
+ /* This is a string read func, convert to string first */
+ php_stream_bucket_tostring(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC);
+ }
- if (eol) {
- cpysz = eol - readptr + 1;
- done = 1;
+ if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && lastchar == '\r') {
+ /* Line ending was actually found in the last char of the last bucket
+ Since it was \r it could have been MAC or DOS */
+ stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL;
+ if (bucket->buf.str.val[stream->readbuf_ofs] == '\n') {
+ /* First byte here is a \n, put them together and you get DOS line endings */
+ stream->readbuf_ofs++;
+ stream->readbuf_avail--;
+ buf[total_copied++] = '\n';
+ /* unlikely -- It'd mean a one byte bucket -- possible though */
+ if (stream->readbuf_ofs >= bucket->buf.str.len) {
+ stream->readbuf_ofs = 0;
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
+ } else {
+ /* Seeing no \n in the first char of this bucket, we know it was MAC */
+ stream->flags |= PHP_STREAM_FLAG_EOL_MAC;
+ }
+ goto exit_getline;
+ } else if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) {
+ char *cr, *lf;
+ lf = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\n', bucket->buf.str.len - stream->readbuf_ofs);
+ cr = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\r', bucket->buf.str.len - stream->readbuf_ofs);
+ eol = (cr && (!lf || cr < (lf - 1))) ? cr : lf;
+ } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) {
+ eol = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\r', bucket->buf.str.len - stream->readbuf_ofs);
} else {
- cpysz = avail;
+ eol = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\n', bucket->buf.str.len - stream->readbuf_ofs);
}
- if (grow_mode) {
- /* allow room for a NUL. If this realloc is really a realloc
- * (ie: second time around), we get an extra byte. In most
- * cases, with the default chunk size of 8K, we will only
- * incur that overhead once. When people have lines longer
- * than 8K, we waste 1 byte per additional 8K or so.
- * That seems acceptable to me, to avoid making this code
- * hard to follow */
- bufstart = erealloc(bufstart, current_buf_size + cpysz + 1);
- current_buf_size += cpysz + 1;
- buf = bufstart + total_copied;
- } else {
- if (cpysz >= maxlen - 1) {
- cpysz = maxlen - 1;
- done = 1;
+ /* No \r or \n found in bucket -- grab it all */
+ if (!eol) {
+ eol = bucket->buf.str.val + bucket->buf.str.len - 1;
+ }
+ tocopy = eol - (bucket->buf.str.val + stream->readbuf_ofs) + 1;
+
+ /* maxlen exceeded */
+ if (tocopy > wanted && growmode) {
+ if (tocopy - wanted > stream->chunk_size) {
+ maxlen += tocopy - wanted;
+ } else {
+ maxlen += stream->chunk_size;
}
+ buf = erealloc(buf, maxlen + 1);
+ wanted = maxlen - total_copied;
}
- memcpy(buf, readptr, cpysz);
+ if (tocopy > wanted) {
+ tocopy = wanted;
+ }
- stream->position += cpysz;
- stream->readpos += cpysz;
- buf += cpysz;
- maxlen -= cpysz;
- total_copied += cpysz;
+ memcpy(buf + total_copied, bucket->buf.str.val + stream->readbuf_ofs, tocopy);
+ total_copied += tocopy;
+ stream->readbuf_ofs += tocopy;
+ stream->readbuf_avail -= tocopy;
+ lastchar = buf[total_copied-1];
- if (done) {
- break;
+ if (stream->readbuf_ofs >= bucket->buf.str.len) {
+ stream->readbuf_ofs = 0;
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ bucket_consumed = 1;
}
- } else if (stream->eof) {
- break;
- } else {
- /* XXX: Should be fine to always read chunk_size */
- size_t toread;
-
- if (grow_mode) {
- toread = stream->chunk_size;
- } else {
- toread = maxlen - 1;
- if (toread > stream->chunk_size) {
- toread = stream->chunk_size;
- }
+
+ if (total_copied >= maxlen) {
+ goto exit_getline;
}
- php_stream_fill_read_buffer(stream, toread TSRMLS_CC);
+ if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL &&
+ bucket_consumed && lastchar == '\r') {
+ /* Could be MAC, could be DOS...
+ Need to check the first char of the next bucket to be sure */
+ continue;
+ }
- if (stream->writepos - stream->readpos == 0) {
- break;
+ if (lastchar == '\r' || lastchar == '\n') {
+ stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL;
+ if (lastchar == '\r') {
+ /* if there were a \n in this bucket after the \r, we would be looking at it */
+ stream->flags |= PHP_STREAM_FLAG_EOL_MAC;
+ }
+ goto exit_getline;
}
}
- }
- if (total_copied == 0) {
- if (grow_mode) {
- assert(bufstart == NULL);
+ if (stream->eof) {
+ if (total_copied == 0) {
+ if (growmode) {
+ efree(buf);
+ }
+ return NULL;
+ }
+ goto exit_getline;
}
- return NULL;
+
+ if (maxlen - total_copied) {
+ size_t bufneeded = maxlen - total_copied;
+
+ if (growmode) {
+ bufneeded = stream->chunk_size;
+ }
+ php_stream_fill_read_buffer(stream, bufneeded TSRMLS_CC);
+ }
+
}
- buf[0] = '\0';
+ exit_getline:
+
if (returned_len) {
*returned_len = total_copied;
}
+ buf[total_copied] = 0;
+ stream->position += total_copied;
+
+ return buf;
+}
+
+/* If buf == NULL, the buffer will be allocated automatically and will be of an
+ * appropriate length to hold the line, regardless of the line length, memory
+ * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL */
+PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC)
+{
+ php_stream_bucket *bucket;
+ int32_t num_bytes = 0, num_chars = 0;
+ int32_t max_bytes = *pmax_bytes, max_chars = *pmax_chars;
+ int growmode = 0, is_unicode;
+
+ while (!stream->readbuf.head) {
+ /* Nothing buffered, get an idea of the data type by polling */
+ int32_t fillsize = (max_chars > 0) ? max_chars : ((max_bytes > 0) ? max_bytes : stream->chunk_size);
+
+ php_stream_fill_read_buffer(stream, fillsize TSRMLS_CC);
+ if (!stream->readbuf.head) {
+ *pmax_bytes = 0;
+ *pmax_chars = 0;
+ *pis_unicode = 0;
+ return NULL;
+ }
+ }
+
+ *pis_unicode = is_unicode = stream->readbuf.head->is_unicode;
+
+ if (!is_unicode) {
+ /* Wrap normal get_line() */
+ int returned_len;
+ char *retbuf = php_stream_get_line(stream, (char*)buf, max_chars, &returned_len);
+
+ *pmax_chars = returned_len;
+ *pmax_bytes = returned_len;
+ return (UChar*)retbuf;
+ }
+
+ /* Now act like php_stream_u_read(), but stopping at 000A, 000D, or 000D 000A */
+
+ if (!buf) {
+ max_bytes = UBYTES(257);
+ buf = emalloc(max_bytes);
+ growmode = 1;
+ }
+
+ /* Leave room for NULL */
+ max_bytes -= UBYTES(1);
+
+ for(;;) {
+ /* Fill buf with buffered data
+ until no space is left in the buffer
+ or EOL is found */
+ UChar lastchar = 0;
+
+ /* consumed readbuf if possible */
+ while ((bucket = stream->readbuf.head)) {
+ UChar *eol, *s;
+ int32_t want_chars = max_chars - num_chars;
+ int32_t want_bytes = max_bytes - num_bytes;
+ int32_t count_chars;
+ int32_t count_bytes;
+ int bucket_consumed = 0;
+
+ if (!bucket->is_unicode) {
+ /* Done with unicode data, bail as though EOL was reached (even though it wasn't) */
+ goto exit_ugetline;
+ }
+
+ if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && lastchar == '\r') {
+ /* Line ending was actually found in the last char of the last bucket
+ Since it was \r it could have been MAC or DOS */
+ stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL;
+ if (bucket->buf.ustr.val[stream->readbuf_ofs] == '\n') {
+ /* First byte here is a \n, put them together and you get DOS line endings */
+ stream->readbuf_ofs++;
+ stream->readbuf_avail--;
+ buf[num_bytes >> 1] = '\n'; /* Can't use num_chars here, surrogate pairs will foul it up */
+ num_bytes += UBYTES(1);
+ num_chars++;
+ /* unlikely -- It'd mean a one UChar bucket -- possible though */
+ if (stream->readbuf_ofs >= bucket->buf.ustr.len) {
+ stream->readbuf_ofs = 0;
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
+ } else {
+ /* Seeing no \n in the first char of this bucket, we know it was MAC */
+ stream->flags |= PHP_STREAM_FLAG_EOL_MAC;
+ }
+ goto exit_ugetline;
+ } else if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) {
+ UChar *cr, *lf;
+ lf = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\n', bucket->buf.ustr.len - stream->readbuf_ofs);
+ cr = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\r', bucket->buf.ustr.len - stream->readbuf_ofs);
+ eol = (cr && (!lf || cr < (lf - 1))) ? cr : lf;
+ } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) {
+ eol = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\r', bucket->buf.ustr.len - stream->readbuf_ofs);
+ } else {
+ eol = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\n', bucket->buf.ustr.len - stream->readbuf_ofs);
+ }
+
+ /* No \r or \n found in bucket -- grab it all */
+ if (!eol) {
+ eol = bucket->buf.ustr.val + bucket->buf.ustr.len - 1;
+ }
+ s = bucket->buf.ustr.val + stream->readbuf_ofs;
+
+ count_bytes = UBYTES(eol - s + 1);
+ if (count_bytes > want_bytes && growmode) {
+ max_bytes = num_bytes + count_bytes + UBYTES(256);
+ want_bytes = max_bytes - num_bytes;
+ buf = erealloc(buf, max_bytes + UBYTES(1));
+ } else if (count_bytes > want_bytes) {
+ count_bytes = want_bytes;
+ }
+ if (U16_IS_SURROGATE(s[(count_bytes >> 1) - 1]) &&
+ U16_IS_SURROGATE_LEAD(s[(count_bytes >> 1) - 1])) {
+ count_bytes -= UBYTES(1);
+ }
+ if (count_bytes <= 0) {
+ /* Not enough space in buffer, just break out */
+ goto exit_ugetline;
+ }
+ count_chars = u_countChar32(s, count_bytes >> 1);
+
+ if (max_chars >= 0 && count_chars > want_chars) {
+ count_chars = want_chars;
+ count_bytes = 0;
+ U16_FWD_N_UNSAFE(s, count_bytes, count_chars);
+ count_bytes <<= 1; /* translate U16 to bytes */
+ }
+
+ memcpy(buf + num_bytes, s, count_bytes);
+ num_bytes += count_bytes;
+ num_chars += count_chars;
+ stream->readbuf_ofs += count_bytes >> 1;
+ stream->readbuf_avail -= count_bytes >> 1;
+
+ lastchar = buf[(num_bytes >> 1) - 1];
+
+ if (stream->readbuf_ofs >= bucket->buf.ustr.len) {
+ stream->readbuf_ofs = 0;
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ bucket_consumed = 1;
+ }
+
+ if ((max_bytes >= 0 && num_bytes >= max_bytes) ||
+ (max_chars >= 0 && num_chars >= max_chars)) {
+ goto exit_ugetline;
+ }
+
+ if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL &&
+ bucket_consumed && lastchar == '\r') {
+ /* Could be MAC, could be DOS...
+ Need to check the first char of the next bucket to be sure */
+ continue;
+ }
+
+ if (lastchar == '\r' || lastchar == '\n') {
+ stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL;
+ if (lastchar == '\r') {
+ /* if there were a \n in this bucket after the \r, we would be looking at it */
+ stream->flags |= PHP_STREAM_FLAG_EOL_MAC;
+ }
+ goto exit_ugetline;
+ }
+ }
+
+ if (stream->eof) {
+ if (num_bytes == 0) {
+ if (growmode) {
+ efree(buf);
+ }
+ buf = NULL;
+ }
+ goto exit_ugetline;
+ }
+
+ if (max_bytes - num_bytes) {
+ int32_t want_bytes = max_bytes - num_bytes;
+
+ if (growmode) {
+ want_bytes = stream->chunk_size;
+ }
+ php_stream_fill_read_buffer(stream, want_bytes TSRMLS_CC);
+ }
+
+ }
+
+ exit_ugetline:
- return bufstart;
+ *pmax_chars = num_chars;
+ *pmax_bytes = num_bytes;
+ *pis_unicode = is_unicode;
+ if (buf) {
+ buf[num_bytes >> 1] = 0;
+ }
+ stream->position += num_bytes;
+
+ return buf;
}
PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *returned_len, char *delim, size_t delim_len TSRMLS_DC)
{
+ /* UTODO: Needs desperate rewriting for unicode conversion */
+ return NULL;
+
+#ifdef SMG_0
char *e, *buf;
size_t toread;
int skip = 0;
@@ -852,15 +1343,15 @@ PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *re
toread = maxlen;
} else {
if (delim_len == 1) {
- e = memchr(stream->readbuf + stream->readpos, *delim, stream->writepos - stream->readpos);
+ e = memchr(stream->readbuf, *delim, stream->readbuf_len);
} else {
- e = php_memnstr(stream->readbuf + stream->readpos, delim, delim_len, (stream->readbuf + stream->writepos));
+ e = php_memnstr(stream->readbuf, delim, delim_len, (stream->readbuf + stream->readbuflen));
}
if (!e) {
toread = maxlen;
} else {
- toread = e - (char *) stream->readbuf - stream->readpos;
+ toread = e - (char *) stream->readbuf;
skip = 1;
}
}
@@ -883,6 +1374,18 @@ PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *re
efree(buf);
return NULL;
}
+#endif
+}
+
+PHPAPI void _php_stream_flush_readbuf(php_stream *stream TSRMLS_DC)
+{
+ php_stream_bucket *bucket;
+
+ while ((bucket = stream->readbuf.head)) {
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
+ stream->readbuf_ofs = stream->readbuf_avail = 0;
}
/* Writes a buffer directly to a stream, using multiple of the chunk size */
@@ -893,8 +1396,11 @@ static size_t _php_stream_write_buffer(php_stream *stream, const char *buf, size
/* if we have a seekable stream we need to ensure that data is written at the
* current stream->position. This means invalidating the read buffer and then
* performing a low-level seek */
+/* UTODO: FIX this
if (stream->ops->seek && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0 && stream->readpos != stream->writepos) {
- stream->readpos = stream->writepos = 0;
+*/
+ if (stream->ops->seek && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0) {
+ php_stream_flush_readbuf(stream);
stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position TSRMLS_CC);
}
@@ -931,7 +1437,7 @@ static size_t _php_stream_write_buffer(php_stream *stream, const char *buf, size
* This may trigger a real write to the stream.
* Returns the number of bytes consumed from buf by the first filter in the chain.
* */
-static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, size_t count, int flags TSRMLS_DC)
+static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, size_t count, int flags, int is_unicode TSRMLS_DC)
{
size_t consumed = 0;
php_stream_bucket *bucket;
@@ -941,16 +1447,18 @@ static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, si
php_stream_filter *filter;
if (buf) {
- bucket = php_stream_bucket_new(stream, (char *)buf, count, 0, 0 TSRMLS_CC);
- php_stream_bucket_append(&brig_in, bucket TSRMLS_CC);
+ if (is_unicode) {
+ bucket = php_stream_bucket_new_unicode(stream, (UChar *)buf, count, 0, 0 TSRMLS_CC);
+ } else {
+ bucket = php_stream_bucket_new(stream, (char *)buf, count, 0, 0 TSRMLS_CC);
+ }
+ php_stream_bucket_append(brig_inp, bucket TSRMLS_CC);
}
for (filter = stream->writefilters.head; filter; filter = filter->next) {
/* for our return value, we are interested in the number of bytes consumed from
* the first filter in the chain */
- status = filter->fops->filter(stream, filter, brig_inp, brig_outp,
- filter == stream->writefilters.head ? &consumed : NULL, flags TSRMLS_CC);
-
+ status = filter->fops->filter(stream, filter, brig_inp, brig_outp, (filter == stream->writefilters.head) ? &consumed : NULL, flags TSRMLS_CC);
if (status != PSFS_PASS_ON) {
break;
}
@@ -969,7 +1477,11 @@ static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, si
* underlying stream */
while (brig_inp->head) {
bucket = brig_inp->head;
- _php_stream_write_buffer(stream, bucket->buf, bucket->buflen TSRMLS_CC);
+ if (bucket->is_unicode) {
+ _php_stream_write_buffer(stream, (char *)bucket->buf.ustr.val, UBYTES(bucket->buf.ustr.len) TSRMLS_CC);
+ } else {
+ _php_stream_write_buffer(stream, bucket->buf.str.val, bucket->buf.str.len TSRMLS_CC);
+ }
/* Potential error situation - eg: no space on device. Perhaps we should keep this brigade
* hanging around and try to write it later.
* At the moment, we just drop it on the floor
@@ -992,12 +1504,53 @@ static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, si
return consumed;
}
+PHPAPI int _php_stream_will_read_unicode(php_stream *stream TSRMLS_DC)
+{
+ php_stream_filter *filter;
+ int inverted = 0;
+
+ if (stream->readbuf.head) {
+ /* If there are buckets available, what do they hold */
+ return stream->readbuf.head->is_unicode;
+ }
+
+ if (!stream->readfilters.head) {
+ /* Not filtered == reads as string */
+ return 0;
+ }
+
+ for(filter = stream->readfilters.tail; filter; filter = filter->prev) {
+ if (filter->flags & PSFO_FLAG_OUTPUTS_SAME) {
+ continue;
+ }
+ if (filter->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) {
+ inverted ^= 1;
+ continue;
+ }
+ if (filter->flags & PSFO_FLAG_OUTPUTS_ANY) {
+ /* Indeterminate */
+ return -1;
+ }
+ if (filter->flags & PSFO_FLAG_OUTPUTS_STRING) {
+ /* If an inversion happens, it'll be unicode, otherwise string */
+ return inverted;
+ }
+ if (filter->flags & PSFO_FLAG_OUTPUTS_UNICODE) {
+ /* If an inversion happens, it'll be string, otherwise unicode */
+ return inverted ^ 1;
+ }
+ }
+
+ /* string comes from stream so apply same logic as filter outputting string */
+ return inverted;
+}
+
PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC)
{
int ret = 0;
if (stream->writefilters.head) {
- _php_stream_write_filtered(stream, NULL, 0, closing ? PSFS_FLAG_FLUSH_CLOSE : PSFS_FLAG_FLUSH_INC TSRMLS_CC);
+ _php_stream_write_filtered(stream, NULL, 0, closing ? PSFS_FLAG_FLUSH_CLOSE : PSFS_FLAG_FLUSH_INC, 0 TSRMLS_CC);
}
if (stream->ops->flush) {
@@ -1014,12 +1567,33 @@ PHPAPI size_t _php_stream_write(php_stream *stream, const char *buf, size_t coun
}
if (stream->writefilters.head) {
- return _php_stream_write_filtered(stream, buf, count, PSFS_FLAG_NORMAL TSRMLS_CC);
+ return _php_stream_write_filtered(stream, buf, count, PSFS_FLAG_NORMAL, 0 TSRMLS_CC);
} else {
return _php_stream_write_buffer(stream, buf, count TSRMLS_CC);
}
}
+PHPAPI size_t _php_stream_u_write(php_stream *stream, const UChar *buf, int32_t count TSRMLS_DC)
+{
+ if (buf == NULL || count == 0 || stream->ops->write == NULL) {
+ return 0;
+ }
+
+ if (stream->writefilters.head) {
+ return _php_stream_write_filtered(stream, (const char*)buf, count, PSFS_FLAG_NORMAL, 1 TSRMLS_CC);
+ } else {
+ int32_t ret;
+
+ ret = _php_stream_write_buffer(stream, (const char*)buf, UBYTES(count) TSRMLS_CC);
+
+ /* Return data points, not bytes */
+ if (ret > 0) {
+ ret >>= 1;
+ }
+ return ret;
+ }
+}
+
PHPAPI size_t _php_stream_printf(php_stream *stream TSRMLS_DC, const char *fmt, ...)
{
size_t count;
@@ -1050,19 +1624,44 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_
/* handle the case where we are in the buffer */
if ((stream->flags & PHP_STREAM_FLAG_NO_BUFFER) == 0) {
switch(whence) {
+ case SEEK_SET:
+ if (offset < stream->position ||
+ offset > stream->position + stream->readbuf_avail) {
+ break;
+ }
+ /* act like SEEK_CUR */
+ whence = SEEK_CUR;
+ offset -= stream->position;
+ /* fall through */
case SEEK_CUR:
- if (offset > 0 && offset < stream->writepos - stream->readpos) {
- stream->readpos += offset;
- stream->position += offset;
- stream->eof = 0;
+ if (offset == 0) {
+ /* nothing to do */
return 0;
}
- break;
- case SEEK_SET:
- if (offset > stream->position &&
- offset < stream->position + stream->writepos - stream->readpos) {
- stream->readpos += offset - stream->position;
- stream->position = offset;
+
+ if (offset > 0 && offset <= stream->readbuf_avail) {
+ php_stream_bucket *bucket;
+
+ while (offset && (bucket = stream->readbuf.head)) {
+ int consume = bucket->buf.str.len - stream->readbuf_ofs;
+
+ if (consume > offset) {
+ /* seeking within this bucket */
+ stream->readbuf_ofs += offset;
+ stream->readbuf_avail -= offset;
+ stream->position += offset;
+ break;
+ }
+
+ /* consume the remaining bucket */
+ stream->position += consume;
+ stream->readbuf_ofs = 0;
+ stream->readbuf_avail -= consume;
+ offset -= consume;
+
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ }
stream->eof = 0;
return 0;
}
@@ -1077,7 +1676,7 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_
if (stream->writefilters.head) {
_php_stream_flush(stream, 0 TSRMLS_CC);
}
-
+
switch(whence) {
case SEEK_CUR:
offset = stream->position + offset;
@@ -1092,7 +1691,7 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_
}
/* invalidate the buffer contents */
- stream->readpos = stream->writepos = 0;
+ php_stream_flush_readbuf(stream);
return ret;
}
@@ -1748,7 +2347,8 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio
char *path_to_open;
int persistent = options & STREAM_OPEN_PERSISTENT;
char *copy_of_path = NULL;
-
+ int implicit_mode[16];
+ int modelen = strlen(mode);
if (opened_path) {
*opened_path = NULL;
@@ -1766,10 +2366,20 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio
return NULL;
}
+ memcpy(implicit_mode, mode, modelen);
+ if (context && context->default_mode && modelen < 15 && !strchr(mode, 't') && !strchr(mode, 'b')) {
+ if (context->default_mode & PHP_FILE_BINARY) {
+ implicit_mode[modelen++] = 'b';
+ } else if (context->default_mode & PHP_FILE_TEXT) {
+ implicit_mode[modelen++] = 't';
+ }
+ implicit_mode[modelen] = 0;
+ }
+
if (wrapper) {
stream = wrapper->wops->stream_opener(wrapper,
- path_to_open, mode, options ^ REPORT_ERRORS,
+ path_to_open, implicit_mode, options ^ REPORT_ERRORS,
opened_path, context STREAMS_REL_CC TSRMLS_CC);
/* if the caller asked for a persistent stream but the wrapper did not
@@ -1783,6 +2393,7 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio
if (stream) {
stream->wrapper = wrapper;
+ memcpy(stream->mode, implicit_mode, modelen + 1);
}
}
@@ -1829,6 +2440,45 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio
}
}
+ /* Output encoding on text mode streams defaults to utf8 unless specified in context parameter */
+ if (stream && strchr(implicit_mode, 't') && (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+'))) {
+ php_stream_filter *filter;
+ char *encoding = (context && context->output_encoding) ? context->output_encoding : "utf8";
+ char *filtername;
+ int encoding_len = strlen(encoding);
+
+ filtername = emalloc(encoding_len + sizeof("unicode.to."));
+ memcpy(filtername, "unicode.to.", sizeof("unicode.to.") - 1);
+ memcpy(filtername + sizeof("unicode.to.") - 1, encoding, encoding_len + 1);
+
+ filter = php_stream_filter_create(filtername, NULL, persistent TSRMLS_CC);
+ if (!filter) {
+ php_stream_wrapper_log_error(wrapper, options TSRMLS_CC, "Failed applying output encoding");
+ } else {
+ php_stream_filter_append(&stream->writefilters, filter);
+ }
+ efree(filtername);
+ }
+
+ if (stream && strchr(implicit_mode, 't') && (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+'))) {
+ php_stream_filter *filter;
+ char *filtername;
+ char *encoding = (context && context->input_encoding) ? context->input_encoding : "utf8";
+ int input_encoding_len = strlen(encoding);
+
+ filtername = emalloc(input_encoding_len + sizeof("unicode.from."));
+ memcpy(filtername, "unicode.from.", sizeof("unicode.from.") - 1);
+ memcpy(filtername + sizeof("unicode.from.") - 1, encoding, input_encoding_len + 1);
+
+ filter = php_stream_filter_create(filtername, NULL, persistent TSRMLS_CC);
+ if (!filter) {
+ php_stream_wrapper_log_error(wrapper, options TSRMLS_CC, "Failed applying input encoding");
+ } else {
+ php_stream_filter_append(&stream->readfilters, filter);
+ }
+ efree(filtername);
+ }
+
if (stream == NULL && (options & REPORT_ERRORS)) {
php_stream_display_wrapper_errors(wrapper, path, "failed to open stream" TSRMLS_CC);
}
@@ -1838,6 +2488,8 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio
pefree(copy_of_path, persistent);
}
#endif
+
+
return stream;
}
/* }}} */
@@ -1867,6 +2519,12 @@ PHPAPI void php_stream_context_free(php_stream_context *context)
php_stream_notification_free(context->notifier);
context->notifier = NULL;
}
+ if (context->input_encoding) {
+ efree(context->input_encoding);
+ }
+ if (context->output_encoding) {
+ efree(context->output_encoding);
+ }
if (context->links) {
zval_ptr_dtor(&context->links);
context->links = NULL;