diff options
Diffstat (limited to 'main/streams/streams.c')
| -rwxr-xr-x | main/streams/streams.c | 996 |
1 files changed, 827 insertions, 169 deletions
diff --git a/main/streams/streams.c b/main/streams/streams.c index d3ede8fc44..fd69a4b36f 100755 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -368,9 +368,11 @@ fprintf(stderr, "stream_free: %s:%p[%s] preserve_handle=%d release_cast=%d remov stream->wrapperdata = NULL; } - if (stream->readbuf) { - pefree(stream->readbuf, stream->is_persistent); - stream->readbuf = NULL; + while (stream->readbuf.head) { + php_stream_bucket *bucket = stream->readbuf.head; + + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); } if (stream->is_persistent && (close_options & PHP_STREAM_FREE_PERSISTENT)) { @@ -422,8 +424,6 @@ fprintf(stderr, "stream_free: %s:%p[%s] preserve_handle=%d release_cast=%d remov static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_DC) { - /* allocate/fill the buffer */ - if (stream->readfilters.head) { char *chunk_buf; int err_flag = 0; @@ -433,7 +433,7 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D /* allocate a buffer for reading chunks */ chunk_buf = emalloc(stream->chunk_size); - while (!err_flag && (stream->writepos - stream->readpos < (off_t)size)) { + while (!err_flag && (stream->readbuf_avail < (off_t)size)) { size_t justread = 0; int flags; php_stream_bucket *bucket; @@ -475,22 +475,38 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D /* we get here when the last filter in the chain has data to pass on. * in this situation, we are passing the brig_in brigade into the * stream read buffer */ - while (brig_inp->head) { - bucket = brig_inp->head; - /* grow buffer to hold this bucket - * TODO: this can fail for persistent streams */ - if (stream->readbuflen - stream->writepos < bucket->buflen) { - stream->readbuflen += bucket->buflen; - stream->readbuf = perealloc(stream->readbuf, stream->readbuflen, - stream->is_persistent); - } - memcpy(stream->readbuf + stream->writepos, bucket->buf, bucket->buflen); - stream->writepos += bucket->buflen; - + while ((bucket = brig_inp->head)) { + php_stream_bucket *tail = stream->readbuf.tail; php_stream_bucket_unlink(bucket TSRMLS_CC); - php_stream_bucket_delref(bucket TSRMLS_CC); + if (bucket->is_unicode && + U16_IS_SURROGATE(*bucket->buf.ustr.val) && + !U16_IS_SURROGATE_LEAD(*bucket->buf.ustr.val) && + tail && tail->is_unicode && + U16_IS_SURROGATE(tail->buf.ustr.val[tail->buf.ustr.len - 1]) && + U16_IS_SURROGATE_LEAD(tail->buf.ustr.val[tail->buf.ustr.len - 1])) { + /* Surrogate pair got split between buckets -- Unlikely */ + UChar *tmp; + + tmp = peumalloc(bucket->buf.ustr.len + 1, bucket->is_persistent); + *tmp = stream->readbuf.tail->buf.ustr.val[--tail->buf.ustr.len]; + memmove(tmp + UBYTES(1), bucket->buf.ustr.val, UBYTES(bucket->buf.ustr.len)); + pefree(bucket->buf.ustr.val, bucket->is_persistent); + bucket->buf.ustr.val = tmp; + + if (tail->buf.ustr.len <= 0) { + /* Tail was only a one UChar bucket */ + php_stream_bucket_unlink(tail TSRMLS_CC); + php_stream_bucket_delref(tail TSRMLS_CC); + } else if (tail == stream->readbuf.head && (tail->buf.ustr.len <= stream->readbuf_ofs)) { + /* Tail was head and last char was only unused portion */ + php_stream_bucket_unlink(tail TSRMLS_CC); + php_stream_bucket_delref(tail TSRMLS_CC); + stream->readbuf_ofs = 0; + } + } + php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC); + stream->readbuf_avail += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len; } - break; case PSFS_FEED_ME: @@ -520,30 +536,22 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D } else { /* is there enough data in the buffer ? */ - if (stream->writepos - stream->readpos < (off_t)size) { + if (stream->readbuf_avail < (off_t)size) { + char *chunk_buf; size_t justread = 0; + int is_persistent = php_stream_is_persistent(stream); - /* reduce buffer memory consumption if possible, to avoid a realloc */ - if (stream->readbuf && stream->readbuflen - stream->writepos < stream->chunk_size) { - memmove(stream->readbuf, stream->readbuf + stream->readpos, stream->readbuflen - stream->readpos); - stream->writepos -= stream->readpos; - stream->readpos = 0; - } - - /* grow the buffer if required - * TODO: this can fail for persistent streams */ - if (stream->readbuflen - stream->writepos < stream->chunk_size) { - stream->readbuflen += stream->chunk_size; - stream->readbuf = perealloc(stream->readbuf, stream->readbuflen, - stream->is_persistent); - } + chunk_buf = pemalloc(stream->chunk_size, is_persistent); + justread = stream->ops->read(stream, chunk_buf, stream->chunk_size TSRMLS_CC); - justread = stream->ops->read(stream, stream->readbuf + stream->writepos, - stream->readbuflen - stream->writepos - TSRMLS_CC); + if (justread == (size_t)-1 || justread == 0) { + pefree(chunk_buf, is_persistent); + } else { + php_stream_bucket *bucket; - if (justread != (size_t)-1) { - stream->writepos += justread; + bucket = php_stream_bucket_new(stream, chunk_buf, justread, 1, is_persistent TSRMLS_CC); + php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC); + stream->readbuf_avail += justread; } } } @@ -551,23 +559,32 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS_DC) { + php_stream_bucket *bucket; size_t toread = 0, didread = 0; while (size > 0) { - /* take from the read buffer first. * It is possible that a buffered stream was switched to non-buffered, so we * drain the remainder of the buffer before using the "raw" read mode for * the excess */ - if (stream->writepos > stream->readpos) { - toread = stream->writepos - stream->readpos; + while (size > 0 && (bucket = stream->readbuf.head)) { + if (bucket->is_unicode) { + /* This is an string read func, convert to string first */ + php_stream_bucket_tostring(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC); + } + toread = bucket->buf.str.len - stream->readbuf_ofs; if (toread > size) { toread = size; } - - memcpy(buf, stream->readbuf + stream->readpos, toread); - stream->readpos += toread; + memcpy(buf, bucket->buf.str.val + stream->readbuf_ofs, toread); + stream->readbuf_ofs += toread; + stream->readbuf_avail -= toread; + if (stream->readbuf_ofs >= bucket->buf.str.len) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + stream->readbuf_ofs = 0; + } size -= toread; buf += toread; didread += toread; @@ -578,32 +595,90 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS break; } + /* just break anyway, to avoid greedy read */ + if (didread > 0 && (stream->wrapper != &php_plain_files_wrapper)) { + break; + } + if (!stream->readfilters.head && (stream->flags & PHP_STREAM_FLAG_NO_BUFFER || stream->chunk_size == 1)) { toread = stream->ops->read(stream, buf, size TSRMLS_CC); - } else { - php_stream_fill_read_buffer(stream, size TSRMLS_CC); + if (toread <= 0) { + break; + } + buf += toread; + size -= toread; + didread += toread; + continue; + } + + php_stream_fill_read_buffer(stream, size TSRMLS_CC); + if (stream->readbuf_avail <= 0) { + /* EOF, or temporary end of data (for non-blocking mode). */ + break; + } + } + + if (didread > 0) { + stream->position += didread; + } + return didread; +} - toread = stream->writepos - stream->readpos; +PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int32_t size TSRMLS_DC) +{ + php_stream_bucket *bucket; + size_t toread = 0, didread = 0; + + while (size > 0) { + /* take from the read buffer first. + * It is possible that a buffered stream was switched to non-buffered, so we + * drain the remainder of the buffer before using the "raw" read mode for + * the excess */ + + while (size > 0 && (bucket = stream->readbuf.head)) { + UChar lastchar = 0; + + if (!bucket->is_unicode) { + /* This is a unicode read func, convert to unicode first */ + php_stream_bucket_tounicode(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC); + } + toread = bucket->buf.ustr.len - stream->readbuf_ofs; if (toread > size) { toread = size; } - - if (toread > 0) { - memcpy(buf, stream->readbuf + stream->readpos, toread); - stream->readpos += toread; + lastchar = *(bucket->buf.ustr.val + stream->readbuf_ofs + toread - 1); + if (U16_IS_SURROGATE(lastchar) && U16_IS_SURROGATE_LEAD(lastchar)) { + toread--; + /* The only time we should encounter a split surrogate is when the buffer size is truncating the data + In this case, reduce size along with toread to avoid getting stuck */ + size--; + } + memcpy(buf, bucket->buf.ustr.val + stream->readbuf_ofs, toread * sizeof(UChar)); + stream->readbuf_ofs += toread; + stream->readbuf_avail -= toread; + if (stream->readbuf_ofs >= bucket->buf.ustr.len) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + stream->readbuf_ofs = 0; } - } - if (toread > 0) { - didread += toread; - buf += toread; size -= toread; - } else { - /* EOF, or temporary end of data (for non-blocking mode). */ + buf += toread; + didread += toread; + } + + /* ignore eof here; the underlying state might have changed */ + if (size == 0) { break; } /* just break anyway, to avoid greedy read */ - if (stream->wrapper != &php_plain_files_wrapper) { + if (didread > 0 && (stream->wrapper != &php_plain_files_wrapper)) { + break; + } + + php_stream_fill_read_buffer(stream, size * sizeof(UChar) TSRMLS_CC); + if (stream->readbuf_avail <= 0) { + /* EOF, or temporary end of data (for non-blocking mode). */ break; } } @@ -615,10 +690,182 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS return didread; } +/* buf mabe NULL (in which case it will be allocated) + num_bytes and num_chars must be initialized upon entry to maximum for each (-1 for no maximum) + num_bytes/num_chars will be set on exit to actual contents of buf + Will return unicode/string type dependent on the first character unit in the read buf + Will return as many characters as possible (and permitted by max lengths) without changing unicode/string type + Will not split surrogate pairs */ +PHPAPI void *_php_stream_u_read(php_stream *stream, void *buf, int32_t *pnum_bytes, int32_t *pnum_chars, int *pis_unicode TSRMLS_DC) +{ + int grow_mode = 0; + int32_t num_bytes = 0, num_chars = 0; + int32_t max_bytes = *pnum_bytes, max_chars = *pnum_chars; + int32_t buflen = buf ? max_bytes : 2048; + int32_t bufpos = 0; + int is_unicode; + php_stream_bucket *bucket; + + /* It's possible that we have a readbuf, but that it's only half of a surrogate pair */ + if (!stream->readbuf.head || + (stream->readbuf.head == stream->readbuf.tail && stream->readbuf.head->is_unicode && + (stream->readbuf.head->buf.ustr.len - stream->readbuf_ofs) == 1 && + U16_IS_SURROGATE(stream->readbuf.head->buf.ustr.val[stream->readbuf.head->buf.ustr.len-1]))) { + php_stream_fill_read_buffer(stream, max_bytes ? max_bytes : (max_chars ? max_chars : stream->chunk_size) TSRMLS_CC); + } + + + if (!stream->readbuf.head || + (stream->readbuf.head == stream->readbuf.tail && stream->readbuf.head->is_unicode && + (stream->readbuf.head->buf.ustr.len - stream->readbuf_ofs) == 1 && + U16_IS_SURROGATE(stream->readbuf.head->buf.ustr.val[stream->readbuf.head->buf.ustr.len-1]))) { + /* Nothing to return */ + *pnum_bytes = 0; + *pnum_chars = 0; + *pis_unicode = 0; + return NULL; + } + + + if (!buf) { + grow_mode = 1; + buf = emalloc(buflen); + } + + is_unicode = stream->readbuf.head->is_unicode; + if (is_unicode) { + /* normalize byte boundary */ + if (max_bytes >= 0 && (max_bytes % sizeof(UChar))) { + max_bytes -= (max_bytes % sizeof(UChar)); + } + if (max_bytes >= 0 && max_bytes < UBYTES(max_chars)) { + /* max_bytes needs to be at least twice max_chars when both are provided */ + max_chars = (max_bytes / sizeof(UChar)); + } + } else { + if (max_chars < 0 && max_bytes >= 0) { + max_chars = max_bytes; + } else if (max_chars >= 0 && grow_mode) { + max_bytes = max_chars; + } + } + + for (;;) { + if (buflen - bufpos < 1024 && max_bytes >= 0 && max_bytes > buflen) { + buflen += 1024; + if (buflen > max_bytes) { + buflen = max_bytes; + } + buf = erealloc(buf, buflen); + } + + if ((bucket = stream->readbuf.head)) { + if ((bucket->is_unicode && !is_unicode) || + (!bucket->is_unicode && is_unicode)) { + /* data type swap, exit now */ + break; + } + if (bucket->is_unicode) { + UChar *s = bucket->buf.ustr.val + stream->readbuf_ofs, *p; + int bytes_in_buf, chars_in_buf; + int32_t ofs = 0; + + chars_in_buf = u_countChar32(s, bucket->buf.ustr.len - stream->readbuf_ofs); + + if (chars_in_buf > max_chars && max_chars >= 0) { + chars_in_buf = max_chars; + } + /* u_countChar32 tells us that we won't overrun anyway */ + U16_FWD_N_UNSAFE(s, ofs, chars_in_buf); + p = s + ofs; + bytes_in_buf = UBYTES(ofs); + if (bytes_in_buf > (max_bytes - num_bytes)) { + bytes_in_buf = max_bytes - num_bytes; + bytes_in_buf -= bytes_in_buf & 1; /* normalize */ + p = s + (bytes_in_buf >> 1); + if (p > s && U16_IS_SURROGATE(p[-1]) && U16_IS_SURROGATE_LEAD(p[-1])) { + /* Don't split surrogate pairs */ + p--; + bytes_in_buf -= UBYTES(1); + } + if (bytes_in_buf <= 0) { + /* No room to copy data (surrogate pair) */ + break; + } + chars_in_buf = u_countChar32(s, p - s); + } + memcpy((char *)buf + num_bytes, s, bytes_in_buf); + num_bytes += bytes_in_buf; + num_chars += chars_in_buf; + stream->readbuf_ofs += p - s; + stream->readbuf_avail -= p - s; + if (stream->readbuf_ofs >= bucket->buf.ustr.len) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + stream->readbuf_ofs = 0; + } else if (stream->readbuf_ofs == (bucket->buf.ustr.len - 1) && + U16_IS_SURROGATE(bucket->buf.ustr.val[bucket->buf.ustr.len - 1]) && + bucket->next && bucket->next->is_unicode) { + /* Only one char left in the bucket, avoid already split surrogates getting "stuck" -- Should never happen thanks to fill_read_buffer */ + php_stream_bucket *next_bucket = bucket->next; + + bucket->buf.ustr.val = peurealloc(bucket->buf.ustr.val, next_bucket->buf.ustr.len + 1, bucket->is_persistent); + bucket->buf.ustr.val[0] = bucket->buf.ustr.val[bucket->buf.ustr.len - 1]; + memcpy(bucket->buf.ustr.val + 1, next_bucket->buf.ustr.val, UBYTES(next_bucket->buf.ustr.len)); + php_stream_bucket_unlink(next_bucket TSRMLS_CC); + php_stream_bucket_delref(next_bucket TSRMLS_CC); + stream->readbuf_ofs = 0; + } else { + /* Reached max limits */ + break; + } + } else { + int want = (max_chars < 0 || max_chars >= buflen) ? (buflen - num_bytes) : (max_chars - num_chars); + int avail = bucket->buf.str.len - stream->readbuf_ofs; + + if (max_bytes >= 0 && want > max_bytes) { + want = max_bytes; + } + + if (want > avail) { + want = avail; + } + + memcpy((char *)buf + num_bytes, bucket->buf.str.val + stream->readbuf_ofs, want); + stream->readbuf_ofs += want; + stream->readbuf_avail -= want; + num_bytes += want; + num_chars += want; + if (stream->readbuf_ofs >= bucket->buf.str.len) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + stream->readbuf_ofs = 0; + } else { + /* Reached max limit */ + break; + } + } + } else { + /* No more data */ + break; + } + } + /* Successful exit */ + *pnum_bytes = num_bytes; + *pnum_chars = num_chars; + *pis_unicode = is_unicode; + + if (num_chars == 0 && grow_mode) { + efree(buf); + buf = NULL; + } + return buf; +} + PHPAPI int _php_stream_eof(php_stream *stream TSRMLS_DC) { /* if there is data in the buffer, it's not EOF */ - if (stream->writepos - stream->readpos > 0) { + if (stream->readbuf_avail > 0) { return 0; } @@ -684,6 +931,8 @@ PHPAPI int _php_stream_stat(php_stream *stream, php_stream_statbuf *ssb TSRMLS_D return (stream->ops->stat)(stream, ssb TSRMLS_CC); } +/* buf != NULL Still used by file() in ext/standard/file.c + buf == NULL semantics no longer supported */ PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len TSRMLS_DC) { size_t avail; @@ -691,8 +940,7 @@ PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len char *readptr; if (!buf) { - readptr = stream->readbuf + stream->readpos; - avail = stream->writepos - stream->readpos; + return NULL; } else { readptr = buf; avail = buf_len; @@ -725,123 +973,366 @@ PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len /* If buf == NULL, the buffer will be allocated automatically and will be of an * appropriate length to hold the line, regardless of the line length, memory - * permitting */ + * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL */ PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen, size_t *returned_len TSRMLS_DC) { - size_t avail = 0; - size_t current_buf_size = 0; + php_stream_bucket *bucket; size_t total_copied = 0; - int grow_mode = 0; - char *bufstart = buf; + int growmode = 0; - if (buf == NULL) { - grow_mode = 1; - } else if (maxlen == 0) { - return NULL; + if (!buf) { + maxlen = stream->chunk_size + 1; + buf = emalloc(maxlen); + growmode = 1; } - /* - * If the underlying stream operations block when no new data is readable, - * we need to take extra precautions. - * - * If there is buffered data available, we check for a EOL. If it exists, - * we pass the data immediately back to the caller. This saves a call - * to the read implementation and will not block where blocking - * is not necessary at all. - * - * If the stream buffer contains more data than the caller requested, - * we can also avoid that costly step and simply return that data. - */ + /* Leave room for NULL */ + maxlen--; - for (;;) { - avail = stream->writepos - stream->readpos; + for(;;) { + /* Fill buf with buffered data + until no space is left in the buffer + or EOL is found */ + char lastchar = 0; - if (avail > 0) { - size_t cpysz = 0; - char *readptr; + /* consumed readbuf if possible */ + while ((bucket = stream->readbuf.head)) { char *eol; - int done = 0; + size_t tocopy; + size_t wanted = maxlen - total_copied; + int bucket_consumed = 0; - readptr = stream->readbuf + stream->readpos; - eol = php_stream_locate_eol(stream, NULL, 0 TSRMLS_CC); + if (bucket->is_unicode) { + /* This is a string read func, convert to string first */ + php_stream_bucket_tostring(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC); + } - if (eol) { - cpysz = eol - readptr + 1; - done = 1; + if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && lastchar == '\r') { + /* Line ending was actually found in the last char of the last bucket + Since it was \r it could have been MAC or DOS */ + stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL; + if (bucket->buf.str.val[stream->readbuf_ofs] == '\n') { + /* First byte here is a \n, put them together and you get DOS line endings */ + stream->readbuf_ofs++; + stream->readbuf_avail--; + buf[total_copied++] = '\n'; + /* unlikely -- It'd mean a one byte bucket -- possible though */ + if (stream->readbuf_ofs >= bucket->buf.str.len) { + stream->readbuf_ofs = 0; + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + } + } else { + /* Seeing no \n in the first char of this bucket, we know it was MAC */ + stream->flags |= PHP_STREAM_FLAG_EOL_MAC; + } + goto exit_getline; + } else if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) { + char *cr, *lf; + lf = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\n', bucket->buf.str.len - stream->readbuf_ofs); + cr = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\r', bucket->buf.str.len - stream->readbuf_ofs); + eol = (cr && (!lf || cr < (lf - 1))) ? cr : lf; + } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) { + eol = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\r', bucket->buf.str.len - stream->readbuf_ofs); } else { - cpysz = avail; + eol = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\n', bucket->buf.str.len - stream->readbuf_ofs); } - if (grow_mode) { - /* allow room for a NUL. If this realloc is really a realloc - * (ie: second time around), we get an extra byte. In most - * cases, with the default chunk size of 8K, we will only - * incur that overhead once. When people have lines longer - * than 8K, we waste 1 byte per additional 8K or so. - * That seems acceptable to me, to avoid making this code - * hard to follow */ - bufstart = erealloc(bufstart, current_buf_size + cpysz + 1); - current_buf_size += cpysz + 1; - buf = bufstart + total_copied; - } else { - if (cpysz >= maxlen - 1) { - cpysz = maxlen - 1; - done = 1; + /* No \r or \n found in bucket -- grab it all */ + if (!eol) { + eol = bucket->buf.str.val + bucket->buf.str.len - 1; + } + tocopy = eol - (bucket->buf.str.val + stream->readbuf_ofs) + 1; + + /* maxlen exceeded */ + if (tocopy > wanted && growmode) { + if (tocopy - wanted > stream->chunk_size) { + maxlen += tocopy - wanted; + } else { + maxlen += stream->chunk_size; } + buf = erealloc(buf, maxlen + 1); + wanted = maxlen - total_copied; } - memcpy(buf, readptr, cpysz); + if (tocopy > wanted) { + tocopy = wanted; + } - stream->position += cpysz; - stream->readpos += cpysz; - buf += cpysz; - maxlen -= cpysz; - total_copied += cpysz; + memcpy(buf + total_copied, bucket->buf.str.val + stream->readbuf_ofs, tocopy); + total_copied += tocopy; + stream->readbuf_ofs += tocopy; + stream->readbuf_avail -= tocopy; + lastchar = buf[total_copied-1]; - if (done) { - break; + if (stream->readbuf_ofs >= bucket->buf.str.len) { + stream->readbuf_ofs = 0; + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + bucket_consumed = 1; } - } else if (stream->eof) { - break; - } else { - /* XXX: Should be fine to always read chunk_size */ - size_t toread; - - if (grow_mode) { - toread = stream->chunk_size; - } else { - toread = maxlen - 1; - if (toread > stream->chunk_size) { - toread = stream->chunk_size; - } + + if (total_copied >= maxlen) { + goto exit_getline; } - php_stream_fill_read_buffer(stream, toread TSRMLS_CC); + if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && + bucket_consumed && lastchar == '\r') { + /* Could be MAC, could be DOS... + Need to check the first char of the next bucket to be sure */ + continue; + } - if (stream->writepos - stream->readpos == 0) { - break; + if (lastchar == '\r' || lastchar == '\n') { + stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL; + if (lastchar == '\r') { + /* if there were a \n in this bucket after the \r, we would be looking at it */ + stream->flags |= PHP_STREAM_FLAG_EOL_MAC; + } + goto exit_getline; } } - } - if (total_copied == 0) { - if (grow_mode) { - assert(bufstart == NULL); + if (stream->eof) { + if (total_copied == 0) { + if (growmode) { + efree(buf); + } + return NULL; + } + goto exit_getline; } - return NULL; + + if (maxlen - total_copied) { + size_t bufneeded = maxlen - total_copied; + + if (growmode) { + bufneeded = stream->chunk_size; + } + php_stream_fill_read_buffer(stream, bufneeded TSRMLS_CC); + } + } - buf[0] = '\0'; + exit_getline: + if (returned_len) { *returned_len = total_copied; } + buf[total_copied] = 0; + stream->position += total_copied; + + return buf; +} + +/* If buf == NULL, the buffer will be allocated automatically and will be of an + * appropriate length to hold the line, regardless of the line length, memory + * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL */ +PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC) +{ + php_stream_bucket *bucket; + int32_t num_bytes = 0, num_chars = 0; + int32_t max_bytes = *pmax_bytes, max_chars = *pmax_chars; + int growmode = 0, is_unicode; + + while (!stream->readbuf.head) { + /* Nothing buffered, get an idea of the data type by polling */ + int32_t fillsize = (max_chars > 0) ? max_chars : ((max_bytes > 0) ? max_bytes : stream->chunk_size); + + php_stream_fill_read_buffer(stream, fillsize TSRMLS_CC); + if (!stream->readbuf.head) { + *pmax_bytes = 0; + *pmax_chars = 0; + *pis_unicode = 0; + return NULL; + } + } + + *pis_unicode = is_unicode = stream->readbuf.head->is_unicode; + + if (!is_unicode) { + /* Wrap normal get_line() */ + int returned_len; + char *retbuf = php_stream_get_line(stream, (char*)buf, max_chars, &returned_len); + + *pmax_chars = returned_len; + *pmax_bytes = returned_len; + return (UChar*)retbuf; + } + + /* Now act like php_stream_u_read(), but stopping at 000A, 000D, or 000D 000A */ + + if (!buf) { + max_bytes = UBYTES(257); + buf = emalloc(max_bytes); + growmode = 1; + } + + /* Leave room for NULL */ + max_bytes -= UBYTES(1); + + for(;;) { + /* Fill buf with buffered data + until no space is left in the buffer + or EOL is found */ + UChar lastchar = 0; + + /* consumed readbuf if possible */ + while ((bucket = stream->readbuf.head)) { + UChar *eol, *s; + int32_t want_chars = max_chars - num_chars; + int32_t want_bytes = max_bytes - num_bytes; + int32_t count_chars; + int32_t count_bytes; + int bucket_consumed = 0; + + if (!bucket->is_unicode) { + /* Done with unicode data, bail as though EOL was reached (even though it wasn't) */ + goto exit_ugetline; + } + + if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && lastchar == '\r') { + /* Line ending was actually found in the last char of the last bucket + Since it was \r it could have been MAC or DOS */ + stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL; + if (bucket->buf.ustr.val[stream->readbuf_ofs] == '\n') { + /* First byte here is a \n, put them together and you get DOS line endings */ + stream->readbuf_ofs++; + stream->readbuf_avail--; + buf[num_bytes >> 1] = '\n'; /* Can't use num_chars here, surrogate pairs will foul it up */ + num_bytes += UBYTES(1); + num_chars++; + /* unlikely -- It'd mean a one UChar bucket -- possible though */ + if (stream->readbuf_ofs >= bucket->buf.ustr.len) { + stream->readbuf_ofs = 0; + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + } + } else { + /* Seeing no \n in the first char of this bucket, we know it was MAC */ + stream->flags |= PHP_STREAM_FLAG_EOL_MAC; + } + goto exit_ugetline; + } else if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) { + UChar *cr, *lf; + lf = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\n', bucket->buf.ustr.len - stream->readbuf_ofs); + cr = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\r', bucket->buf.ustr.len - stream->readbuf_ofs); + eol = (cr && (!lf || cr < (lf - 1))) ? cr : lf; + } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) { + eol = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\r', bucket->buf.ustr.len - stream->readbuf_ofs); + } else { + eol = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\n', bucket->buf.ustr.len - stream->readbuf_ofs); + } + + /* No \r or \n found in bucket -- grab it all */ + if (!eol) { + eol = bucket->buf.ustr.val + bucket->buf.ustr.len - 1; + } + s = bucket->buf.ustr.val + stream->readbuf_ofs; + + count_bytes = UBYTES(eol - s + 1); + if (count_bytes > want_bytes && growmode) { + max_bytes = num_bytes + count_bytes + UBYTES(256); + want_bytes = max_bytes - num_bytes; + buf = erealloc(buf, max_bytes + UBYTES(1)); + } else if (count_bytes > want_bytes) { + count_bytes = want_bytes; + } + if (U16_IS_SURROGATE(s[(count_bytes >> 1) - 1]) && + U16_IS_SURROGATE_LEAD(s[(count_bytes >> 1) - 1])) { + count_bytes -= UBYTES(1); + } + if (count_bytes <= 0) { + /* Not enough space in buffer, just break out */ + goto exit_ugetline; + } + count_chars = u_countChar32(s, count_bytes >> 1); + + if (max_chars >= 0 && count_chars > want_chars) { + count_chars = want_chars; + count_bytes = 0; + U16_FWD_N_UNSAFE(s, count_bytes, count_chars); + count_bytes <<= 1; /* translate U16 to bytes */ + } + + memcpy(buf + num_bytes, s, count_bytes); + num_bytes += count_bytes; + num_chars += count_chars; + stream->readbuf_ofs += count_bytes >> 1; + stream->readbuf_avail -= count_bytes >> 1; + + lastchar = buf[(num_bytes >> 1) - 1]; + + if (stream->readbuf_ofs >= bucket->buf.ustr.len) { + stream->readbuf_ofs = 0; + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + bucket_consumed = 1; + } + + if ((max_bytes >= 0 && num_bytes >= max_bytes) || + (max_chars >= 0 && num_chars >= max_chars)) { + goto exit_ugetline; + } + + if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && + bucket_consumed && lastchar == '\r') { + /* Could be MAC, could be DOS... + Need to check the first char of the next bucket to be sure */ + continue; + } + + if (lastchar == '\r' || lastchar == '\n') { + stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL; + if (lastchar == '\r') { + /* if there were a \n in this bucket after the \r, we would be looking at it */ + stream->flags |= PHP_STREAM_FLAG_EOL_MAC; + } + goto exit_ugetline; + } + } + + if (stream->eof) { + if (num_bytes == 0) { + if (growmode) { + efree(buf); + } + buf = NULL; + } + goto exit_ugetline; + } + + if (max_bytes - num_bytes) { + int32_t want_bytes = max_bytes - num_bytes; + + if (growmode) { + want_bytes = stream->chunk_size; + } + php_stream_fill_read_buffer(stream, want_bytes TSRMLS_CC); + } + + } + + exit_ugetline: - return bufstart; + *pmax_chars = num_chars; + *pmax_bytes = num_bytes; + *pis_unicode = is_unicode; + if (buf) { + buf[num_bytes >> 1] = 0; + } + stream->position += num_bytes; + + return buf; } PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *returned_len, char *delim, size_t delim_len TSRMLS_DC) { + /* UTODO: Needs desperate rewriting for unicode conversion */ + return NULL; + +#ifdef SMG_0 char *e, *buf; size_t toread; int skip = 0; @@ -852,15 +1343,15 @@ PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *re toread = maxlen; } else { if (delim_len == 1) { - e = memchr(stream->readbuf + stream->readpos, *delim, stream->writepos - stream->readpos); + e = memchr(stream->readbuf, *delim, stream->readbuf_len); } else { - e = php_memnstr(stream->readbuf + stream->readpos, delim, delim_len, (stream->readbuf + stream->writepos)); + e = php_memnstr(stream->readbuf, delim, delim_len, (stream->readbuf + stream->readbuflen)); } if (!e) { toread = maxlen; } else { - toread = e - (char *) stream->readbuf - stream->readpos; + toread = e - (char *) stream->readbuf; skip = 1; } } @@ -883,6 +1374,18 @@ PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *re efree(buf); return NULL; } +#endif +} + +PHPAPI void _php_stream_flush_readbuf(php_stream *stream TSRMLS_DC) +{ + php_stream_bucket *bucket; + + while ((bucket = stream->readbuf.head)) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + } + stream->readbuf_ofs = stream->readbuf_avail = 0; } /* Writes a buffer directly to a stream, using multiple of the chunk size */ @@ -893,8 +1396,11 @@ static size_t _php_stream_write_buffer(php_stream *stream, const char *buf, size /* if we have a seekable stream we need to ensure that data is written at the * current stream->position. This means invalidating the read buffer and then * performing a low-level seek */ +/* UTODO: FIX this if (stream->ops->seek && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0 && stream->readpos != stream->writepos) { - stream->readpos = stream->writepos = 0; +*/ + if (stream->ops->seek && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0) { + php_stream_flush_readbuf(stream); stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position TSRMLS_CC); } @@ -931,7 +1437,7 @@ static size_t _php_stream_write_buffer(php_stream *stream, const char *buf, size * This may trigger a real write to the stream. * Returns the number of bytes consumed from buf by the first filter in the chain. * */ -static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, size_t count, int flags TSRMLS_DC) +static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, size_t count, int flags, int is_unicode TSRMLS_DC) { size_t consumed = 0; php_stream_bucket *bucket; @@ -941,16 +1447,18 @@ static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, si php_stream_filter *filter; if (buf) { - bucket = php_stream_bucket_new(stream, (char *)buf, count, 0, 0 TSRMLS_CC); - php_stream_bucket_append(&brig_in, bucket TSRMLS_CC); + if (is_unicode) { + bucket = php_stream_bucket_new_unicode(stream, (UChar *)buf, count, 0, 0 TSRMLS_CC); + } else { + bucket = php_stream_bucket_new(stream, (char *)buf, count, 0, 0 TSRMLS_CC); + } + php_stream_bucket_append(brig_inp, bucket TSRMLS_CC); } for (filter = stream->writefilters.head; filter; filter = filter->next) { /* for our return value, we are interested in the number of bytes consumed from * the first filter in the chain */ - status = filter->fops->filter(stream, filter, brig_inp, brig_outp, - filter == stream->writefilters.head ? &consumed : NULL, flags TSRMLS_CC); - + status = filter->fops->filter(stream, filter, brig_inp, brig_outp, (filter == stream->writefilters.head) ? &consumed : NULL, flags TSRMLS_CC); if (status != PSFS_PASS_ON) { break; } @@ -969,7 +1477,11 @@ static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, si * underlying stream */ while (brig_inp->head) { bucket = brig_inp->head; - _php_stream_write_buffer(stream, bucket->buf, bucket->buflen TSRMLS_CC); + if (bucket->is_unicode) { + _php_stream_write_buffer(stream, (char *)bucket->buf.ustr.val, UBYTES(bucket->buf.ustr.len) TSRMLS_CC); + } else { + _php_stream_write_buffer(stream, bucket->buf.str.val, bucket->buf.str.len TSRMLS_CC); + } /* Potential error situation - eg: no space on device. Perhaps we should keep this brigade * hanging around and try to write it later. * At the moment, we just drop it on the floor @@ -992,12 +1504,53 @@ static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, si return consumed; } +PHPAPI int _php_stream_will_read_unicode(php_stream *stream TSRMLS_DC) +{ + php_stream_filter *filter; + int inverted = 0; + + if (stream->readbuf.head) { + /* If there are buckets available, what do they hold */ + return stream->readbuf.head->is_unicode; + } + + if (!stream->readfilters.head) { + /* Not filtered == reads as string */ + return 0; + } + + for(filter = stream->readfilters.tail; filter; filter = filter->prev) { + if (filter->flags & PSFO_FLAG_OUTPUTS_SAME) { + continue; + } + if (filter->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) { + inverted ^= 1; + continue; + } + if (filter->flags & PSFO_FLAG_OUTPUTS_ANY) { + /* Indeterminate */ + return -1; + } + if (filter->flags & PSFO_FLAG_OUTPUTS_STRING) { + /* If an inversion happens, it'll be unicode, otherwise string */ + return inverted; + } + if (filter->flags & PSFO_FLAG_OUTPUTS_UNICODE) { + /* If an inversion happens, it'll be string, otherwise unicode */ + return inverted ^ 1; + } + } + + /* string comes from stream so apply same logic as filter outputting string */ + return inverted; +} + PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC) { int ret = 0; if (stream->writefilters.head) { - _php_stream_write_filtered(stream, NULL, 0, closing ? PSFS_FLAG_FLUSH_CLOSE : PSFS_FLAG_FLUSH_INC TSRMLS_CC); + _php_stream_write_filtered(stream, NULL, 0, closing ? PSFS_FLAG_FLUSH_CLOSE : PSFS_FLAG_FLUSH_INC, 0 TSRMLS_CC); } if (stream->ops->flush) { @@ -1014,12 +1567,33 @@ PHPAPI size_t _php_stream_write(php_stream *stream, const char *buf, size_t coun } if (stream->writefilters.head) { - return _php_stream_write_filtered(stream, buf, count, PSFS_FLAG_NORMAL TSRMLS_CC); + return _php_stream_write_filtered(stream, buf, count, PSFS_FLAG_NORMAL, 0 TSRMLS_CC); } else { return _php_stream_write_buffer(stream, buf, count TSRMLS_CC); } } +PHPAPI size_t _php_stream_u_write(php_stream *stream, const UChar *buf, int32_t count TSRMLS_DC) +{ + if (buf == NULL || count == 0 || stream->ops->write == NULL) { + return 0; + } + + if (stream->writefilters.head) { + return _php_stream_write_filtered(stream, (const char*)buf, count, PSFS_FLAG_NORMAL, 1 TSRMLS_CC); + } else { + int32_t ret; + + ret = _php_stream_write_buffer(stream, (const char*)buf, UBYTES(count) TSRMLS_CC); + + /* Return data points, not bytes */ + if (ret > 0) { + ret >>= 1; + } + return ret; + } +} + PHPAPI size_t _php_stream_printf(php_stream *stream TSRMLS_DC, const char *fmt, ...) { size_t count; @@ -1050,19 +1624,44 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_ /* handle the case where we are in the buffer */ if ((stream->flags & PHP_STREAM_FLAG_NO_BUFFER) == 0) { switch(whence) { + case SEEK_SET: + if (offset < stream->position || + offset > stream->position + stream->readbuf_avail) { + break; + } + /* act like SEEK_CUR */ + whence = SEEK_CUR; + offset -= stream->position; + /* fall through */ case SEEK_CUR: - if (offset > 0 && offset < stream->writepos - stream->readpos) { - stream->readpos += offset; - stream->position += offset; - stream->eof = 0; + if (offset == 0) { + /* nothing to do */ return 0; } - break; - case SEEK_SET: - if (offset > stream->position && - offset < stream->position + stream->writepos - stream->readpos) { - stream->readpos += offset - stream->position; - stream->position = offset; + + if (offset > 0 && offset <= stream->readbuf_avail) { + php_stream_bucket *bucket; + + while (offset && (bucket = stream->readbuf.head)) { + int consume = bucket->buf.str.len - stream->readbuf_ofs; + + if (consume > offset) { + /* seeking within this bucket */ + stream->readbuf_ofs += offset; + stream->readbuf_avail -= offset; + stream->position += offset; + break; + } + + /* consume the remaining bucket */ + stream->position += consume; + stream->readbuf_ofs = 0; + stream->readbuf_avail -= consume; + offset -= consume; + + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + } stream->eof = 0; return 0; } @@ -1077,7 +1676,7 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_ if (stream->writefilters.head) { _php_stream_flush(stream, 0 TSRMLS_CC); } - + switch(whence) { case SEEK_CUR: offset = stream->position + offset; @@ -1092,7 +1691,7 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_ } /* invalidate the buffer contents */ - stream->readpos = stream->writepos = 0; + php_stream_flush_readbuf(stream); return ret; } @@ -1748,7 +2347,8 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio char *path_to_open; int persistent = options & STREAM_OPEN_PERSISTENT; char *copy_of_path = NULL; - + int implicit_mode[16]; + int modelen = strlen(mode); if (opened_path) { *opened_path = NULL; @@ -1766,10 +2366,20 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio return NULL; } + memcpy(implicit_mode, mode, modelen); + if (context && context->default_mode && modelen < 15 && !strchr(mode, 't') && !strchr(mode, 'b')) { + if (context->default_mode & PHP_FILE_BINARY) { + implicit_mode[modelen++] = 'b'; + } else if (context->default_mode & PHP_FILE_TEXT) { + implicit_mode[modelen++] = 't'; + } + implicit_mode[modelen] = 0; + } + if (wrapper) { stream = wrapper->wops->stream_opener(wrapper, - path_to_open, mode, options ^ REPORT_ERRORS, + path_to_open, implicit_mode, options ^ REPORT_ERRORS, opened_path, context STREAMS_REL_CC TSRMLS_CC); /* if the caller asked for a persistent stream but the wrapper did not @@ -1783,6 +2393,7 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio if (stream) { stream->wrapper = wrapper; + memcpy(stream->mode, implicit_mode, modelen + 1); } } @@ -1829,6 +2440,45 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio } } + /* Output encoding on text mode streams defaults to utf8 unless specified in context parameter */ + if (stream && strchr(implicit_mode, 't') && (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+'))) { + php_stream_filter *filter; + char *encoding = (context && context->output_encoding) ? context->output_encoding : "utf8"; + char *filtername; + int encoding_len = strlen(encoding); + + filtername = emalloc(encoding_len + sizeof("unicode.to.")); + memcpy(filtername, "unicode.to.", sizeof("unicode.to.") - 1); + memcpy(filtername + sizeof("unicode.to.") - 1, encoding, encoding_len + 1); + + filter = php_stream_filter_create(filtername, NULL, persistent TSRMLS_CC); + if (!filter) { + php_stream_wrapper_log_error(wrapper, options TSRMLS_CC, "Failed applying output encoding"); + } else { + php_stream_filter_append(&stream->writefilters, filter); + } + efree(filtername); + } + + if (stream && strchr(implicit_mode, 't') && (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+'))) { + php_stream_filter *filter; + char *filtername; + char *encoding = (context && context->input_encoding) ? context->input_encoding : "utf8"; + int input_encoding_len = strlen(encoding); + + filtername = emalloc(input_encoding_len + sizeof("unicode.from.")); + memcpy(filtername, "unicode.from.", sizeof("unicode.from.") - 1); + memcpy(filtername + sizeof("unicode.from.") - 1, encoding, input_encoding_len + 1); + + filter = php_stream_filter_create(filtername, NULL, persistent TSRMLS_CC); + if (!filter) { + php_stream_wrapper_log_error(wrapper, options TSRMLS_CC, "Failed applying input encoding"); + } else { + php_stream_filter_append(&stream->readfilters, filter); + } + efree(filtername); + } + if (stream == NULL && (options & REPORT_ERRORS)) { php_stream_display_wrapper_errors(wrapper, path, "failed to open stream" TSRMLS_CC); } @@ -1838,6 +2488,8 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio pefree(copy_of_path, persistent); } #endif + + return stream; } /* }}} */ @@ -1867,6 +2519,12 @@ PHPAPI void php_stream_context_free(php_stream_context *context) php_stream_notification_free(context->notifier); context->notifier = NULL; } + if (context->input_encoding) { + efree(context->input_encoding); + } + if (context->output_encoding) { + efree(context->output_encoding); + } if (context->links) { zval_ptr_dtor(&context->links); context->links = NULL; |
