diff options
author | cookedm <cookedm@localhost> | 2007-05-10 18:14:29 +0000 |
---|---|---|
committer | cookedm <cookedm@localhost> | 2007-05-10 18:14:29 +0000 |
commit | 4474ba35cd167483589e398539007088dd64b9c5 (patch) | |
tree | 800ab03aca2c83de80c12ff0c854539945af9a6c | |
parent | 9dacbb9dfb3373865193def48ce880a547de0100 (diff) | |
download | numpy-4474ba35cd167483589e398539007088dd64b9c5.tar.gz |
Improvement of separator handling for fromstring and fromfile.
* fromstring and fromfile should behave identically on text.
* added more test cases for fromstring
* the dtype gets passed to the C code doing the type-specific string
conversions. We don't use it, but someone making their own dtype could.
* separator handling for fromfile is moved out of the type-specific conversion.
I've left the argument in for backwards compatibility; when the API version
is next bumped up, it can be removed.
* separator handling in fromfile is now safe (no fscanf(fp, sep) anymore)
-rw-r--r-- | numpy/core/include/numpy/ndarrayobject.h | 25 | ||||
-rw-r--r-- | numpy/core/src/arraytypes.inc.src | 37 | ||||
-rw-r--r-- | numpy/core/src/multiarraymodule.c | 704 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 23 |
4 files changed, 437 insertions, 352 deletions
diff --git a/numpy/core/include/numpy/ndarrayobject.h b/numpy/core/include/numpy/ndarrayobject.h index 58270ac3b..c7600abe6 100644 --- a/numpy/core/include/numpy/ndarrayobject.h +++ b/numpy/core/include/numpy/ndarrayobject.h @@ -9,17 +9,18 @@ extern "C" CONFUSE_EMACS #undef CONFUSE_EMACS #undef CONFUSE_EMACS2 -/* ... otherwise a semi-smart idententer (like emacs) tries to indent +/* ... otherwise a semi-smart identer (like emacs) tries to indent everything when you're typing */ #endif /* This is auto-generated by the installer */ #include "config.h" -/* There are several places in the code where an array of dimensions is */ -/* allocated statically. This is the size of that static allocation. */ -/* The array creation itself could have arbitrary dimensions but - * all the places where static allocation is used would need to - * be changed to dynamic (including inside of several structures) +/* There are several places in the code where an array of dimensions is + * allocated statically. This is the size of that static allocation. + * + * The array creation itself could have arbitrary dimensions but + * all the places where static allocation is used would need to + * be changed to dynamic (including inside of several structures) */ #define NPY_MAXDIMS 32 @@ -1004,6 +1005,8 @@ typedef Py_uintptr_t npy_uintp; #define PyDimMem_RENEW(ptr,size) \ ((npy_intp *)PyArray_realloc(ptr,size*sizeof(npy_intp))) +/* forward declaration */ +struct _PyArray_Descr; /* These must deal with unaligned and swapped data if necessary */ typedef PyObject * (PyArray_GetItemFunc) (void *, void *); @@ -1028,8 +1031,12 @@ typedef void (PyArray_DotFunc)(void *, npy_intp, void *, npy_intp, void *, typedef void (PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *, void *); -typedef int (PyArray_ScanFunc)(FILE *, void *, void *, void *); -typedef int (PyArray_FromStrFunc)(char *, void *, char **, void *); +/* XXX the ignore argument should be removed next time the API version + is bumped. It used to be the separator. */ +typedef int (PyArray_ScanFunc)(FILE *fp, void *dptr, + char *ignore, struct _PyArray_Descr *); +typedef int (PyArray_FromStrFunc)(char *s, void *dptr, char **endptr, + struct _PyArray_Descr *); typedef int (PyArray_FillFunc)(void *, npy_intp, void *); @@ -1157,7 +1164,7 @@ typedef struct { PyDataType_FLAGCHK(dtype, NPY_ITEM_REFCOUNT) /* Change dtype hasobject to 32-bit in 1.1 and change its name */ -typedef struct { +typedef struct _PyArray_Descr { PyObject_HEAD PyTypeObject *typeobj; /* the type object representing an instance of this type -- should not diff --git a/numpy/core/src/arraytypes.inc.src b/numpy/core/src/arraytypes.inc.src index 844890434..3ebc52f05 100644 --- a/numpy/core/src/arraytypes.inc.src +++ b/numpy/core/src/arraytypes.inc.src @@ -11,7 +11,7 @@ MyPyLong_AsLongLong(PyObject *vv) if (mylong == NULL) return (longlong) -1; vv = mylong; } - else Py_INCREF(vv); + else Py_INCREF(vv); ret = PyLong_AsLongLong(vv); Py_DECREF(vv); @@ -867,18 +867,9 @@ static void /****************** scan *************************************/ -#define _ENDSCAN \ - if (num != 1) { \ - if (num == 0) return -3; \ - if (num == EOF) return -4; \ - return -5; \ - } \ - if (sep != NULL) { \ - num = fscanf(fp, sep); \ - if (num == 0) return 0; \ - if (num == EOF) return -1; \ - } \ - return 0 +/* The first ignore argument is for backwards compatibility. + Should be removed when the API version is bumped up. + */ /**begin repeat @@ -887,11 +878,9 @@ static void #format="hd","hu","d","u","ld","lu",LONGLONG_FMT,ULONGLONG_FMT,"f","lf","Lf"# */ static int -@fname@_scan (FILE *fp, @type@ *ip, char *sep, void *ignore) +@fname@_scan (FILE *fp, @type@ *ip, void *ignore, PyArray_Descr *ignore2) { - int num; - num = fscanf(fp, "%"@format@, ip); - _ENDSCAN; + return fscanf(fp, "%"@format@, ip); } /**end repeat**/ @@ -903,24 +892,24 @@ static int #format="d","u"# */ static int -@fname@_scan (FILE *fp, @type@ *ip, char *sep, void *ignore) +@fname@_scan (FILE *fp, @type@ *ip, void *ignore, PyArray_Descr *ignore2) { @btype@ temp; int num; num = fscanf(fp, "%"@format@, &temp); *ip = (@type@) temp; - _ENDSCAN; + return num; } /**end repeat**/ static int -BOOL_scan (FILE *fp, Bool *ip, char *sep, void *ignore) +BOOL_scan (FILE *fp, Bool *ip, void *ignore, PyArray_Descr *ignore2) { int temp; int num; num = fscanf(fp, "%d", &temp); *ip = (Bool) (temp != 0); - _ENDSCAN; + return num; } /**begin repeat @@ -929,8 +918,6 @@ BOOL_scan (FILE *fp, Bool *ip, char *sep, void *ignore) #define @fname@_scan NULL /**end repeat**/ -#undef _ENDSCAN - /****************** fromstr *************************************/ /**begin repeat @@ -940,7 +927,7 @@ BOOL_scan (FILE *fp, Bool *ip, char *sep, void *ignore) #btype=(long,ulong)*5# */ static int -@fname@_fromstr(char *str, @type@ *ip, char **endptr, void *ignore) +@fname@_fromstr(char *str, @type@ *ip, char **endptr, PyArray_Descr *ignore) { @btype@ result; @@ -956,7 +943,7 @@ static int */ #if (PY_VERSION_HEX >= 0x02040000) || defined(PyOS_ascii_strtod) static int -@fname@_fromstr(char *str, @type@ *ip, char **endptr, void *ignore) +@fname@_fromstr(char *str, @type@ *ip, char **endptr, PyArray_Descr *ignore) { double result; diff --git a/numpy/core/src/multiarraymodule.c b/numpy/core/src/multiarraymodule.c index cd062efae..b8e05d6bf 100644 --- a/numpy/core/src/multiarraymodule.c +++ b/numpy/core/src/multiarraymodule.c @@ -5828,34 +5828,237 @@ array_set_typeDict(PyObject *ignored, PyObject *args) return Py_None; } + +/* Reading from a file or a string. + + As much as possible, we try to use the same code for both files and strings, + so the semantics for fromstring and fromfile are the same, especially with + regards to the handling of text representations. + */ + + +typedef int (*next_element)(void **, void *, PyArray_Descr *, void *); +typedef int (*skip_separator)(void **, const char *, void *); + static int -_skip_sep(char **ptr, char *sep) +fromstr_next_element(char **s, void *dptr, PyArray_Descr *dtype, + const char *end) { - char *a; - int n; - n = strlen(sep); - a = *ptr; - while(*a != '\0' && (strncmp(a, sep, n) != 0)) - a++; - if (*a == '\0') return -1; - *ptr = a+strlen(sep); - return 0; + int r = dtype->f->fromstr(*s, dptr, s, dtype); + if (end != NULL && *s > end) { + return -1; + } + return r; } -/* steals a reference to dtype -- accepts NULL */ -/*OBJECT_API*/ +static int +fromfile_next_element(FILE **fp, void *dptr, PyArray_Descr *dtype, + void *stream_data) +{ + /* the NULL argument is for backwards-compatibility */ + return dtype->f->scanfunc(*fp, dptr, NULL, dtype); +} + +/* Remove multiple whitespace from the separator, and add a space to the + beginning and end. This simplifies the separator-skipping code below. +*/ +static char * +swab_separator(char *sep) +{ + int skip_space = 0; + char *s, *start; + s = start = malloc(strlen(sep)+3); + /* add space to front if there isn't one */ + if (*sep != '\0' && !isspace(*sep)) { + *s = ' '; s++; + } + while (*sep != '\0') { + if (isspace(*sep)) { + if (skip_space) { + sep++; + } else { + *s = ' '; + s++; sep++; + skip_space = 1; + } + } else { + *s = *sep; + s++; sep++; + skip_space = 0; + } + } + /* add space to end if there isn't one */ + if (s != start && s[-1] == ' ') { + *s = ' '; + s++; + } + *s = '\0'; + return start; +} + +/* Assuming that the separator is the next bit in the string (file), skip it. + + Single spaces in the separator are matched to arbitrary-long sequences + of whitespace in the input. + + If we can't match the separator, return -2. + If we hit the end of the string (file), return -1. + Otherwise, return 0. + */ + +static int +fromstr_skip_separator(char **s, const char *sep, const char *end) +{ + char *string = *s; + int result = 0; + while (1) { + char c = *string; + if (c == '\0' || (end != NULL && string >= end)) { + result = -1; + break; + } else if (*sep == '\0') { + /* matched separator */ + result = 0; + break; + } else if (*sep == ' ') { + if (!isspace(c)) { + sep++; + continue; + } + } else if (*sep != c) { + result = -2; + break; + } else { + sep++; + } + string++; + } + *s = string; + return result; +} + +static int +fromfile_skip_separator(FILE **fp, const char *sep, void *stream_data) +{ + int result = 0; + while (1) { + int c = fgetc(*fp); + if (c == EOF) { + result = -1; + break; + } else if (*sep == '\0') { + /* matched separator */ + ungetc(c, *fp); + result = 0; + break; + } else if (*sep == ' ') { + if (!isspace(c)) { + sep++; + ungetc(c, *fp); + } + } else if (*sep != c) { + ungetc(c, *fp); + result = -2; + break; + } else { + sep++; + } + } + return result; +} + +/* Create an array by reading from the given stream, using the passed + next_element and skip_separator functions. + */ + +#define FROM_BUFFER_SIZE 4096 +static PyArrayObject * +array_from_text(PyArray_Descr *dtype, intp num, char *sep, size_t *nread, + void *stream, next_element next, skip_separator skip_sep, + void *stream_data) +{ + PyArrayObject *r; + intp i; + char *dptr, *clean_sep; + + intp thisbuf = 0; + intp size; + intp bytes, totalbytes; + + size = (num >= 0) ? num : FROM_BUFFER_SIZE; + + r = (PyArrayObject *) + PyArray_NewFromDescr(&PyArray_Type, + dtype, + 1, &size, + NULL, NULL, + 0, NULL); + if (r == NULL) return NULL; + clean_sep = swab_separator(sep); + NPY_BEGIN_ALLOW_THREADS; + totalbytes = bytes = size * dtype->elsize; + dptr = r->data; + for (i=0; num < 0 || i < num; i++) { + if (next(&stream, dptr, dtype, stream_data) < 0) + break; + *nread += 1; + thisbuf += 1; + dptr += dtype->elsize; + if (num < 0 && thisbuf == size) { + totalbytes += bytes; + r->data = PyDataMem_RENEW(r->data, totalbytes); + dptr = r->data + (totalbytes - bytes); + thisbuf = 0; + } + if (skip_sep(&stream, clean_sep, stream_data) < 0) + break; + } + if (num < 0) { + r->data = PyDataMem_RENEW(r->data, (*nread)*dtype->elsize); + PyArray_DIM(r,0) = *nread; + } + NPY_END_ALLOW_THREADS; + free(clean_sep); + if (PyErr_Occurred()) { + Py_DECREF(r); + return NULL; + } + return r; +} +#undef FROM_BUFFER_SIZE + +/*OBJECT_API + + Given a pointer to a string ``data``, a string length ``slen``, and + a ``PyArray_Descr``, return an array corresponding to the data + encoded in that string. + + If the dtype is NULL, the default array type is used (double). + If non-null, the reference is stolen. + + If ``slen`` is < 0, then the end of string is used for text data. + It is an error for ``slen`` to be < 0 for binary data (since embedded NULLs + would be the norm). + + The number of elements to read is given as ``num``; if it is < 0, then + then as many as possible are read. + + If ``sep`` is NULL or empty, then binary data is assumed, else + text data, with ``sep`` as the separator between elements. Whitespace in + the separator matches any length of whitespace in the text, and a match + for whitespace around the separator is added. + */ static PyObject * PyArray_FromString(char *data, intp slen, PyArray_Descr *dtype, - intp n, char *sep) + intp num, char *sep) { int itemsize; PyArrayObject *ret; Bool binary; - if (dtype == NULL) dtype=PyArray_DescrFromType(PyArray_DEFAULT); - + if (PyDataType_FLAGCHK(dtype, NPY_ITEM_IS_POINTER)) { PyErr_SetString(PyExc_ValueError, "Cannot create an object array from" \ @@ -5874,7 +6077,7 @@ PyArray_FromString(char *data, intp slen, PyArray_Descr *dtype, binary = ((sep == NULL) || (strlen(sep) == 0)); if (binary) { - if (n < 0 ) { + if (num < 0 ) { if (slen % itemsize != 0) { PyErr_SetString(PyExc_ValueError, "string size must be a "\ @@ -5882,9 +6085,9 @@ PyArray_FromString(char *data, intp slen, PyArray_Descr *dtype, Py_DECREF(dtype); return NULL; } - n = slen/itemsize; + num = slen/itemsize; } else { - if (slen < n*itemsize) { + if (slen < num*itemsize) { PyErr_SetString(PyExc_ValueError, "string is smaller than " \ "requested size"); @@ -5893,111 +6096,40 @@ PyArray_FromString(char *data, intp slen, PyArray_Descr *dtype, } } - if ((ret = (PyArrayObject *)\ - PyArray_NewFromDescr(&PyArray_Type, dtype, - 1, &n, NULL, NULL, - 0, NULL)) == NULL) - return NULL; - memcpy(ret->data, data, n*dtype->elsize); - return (PyObject *)ret; - } - else { /* read from character-based string */ - char *ptr; - PyArray_FromStrFunc *fromstr; - char *dptr; - intp nread=0; - intp index; - - fromstr = dtype->f->fromstr; - if (fromstr == NULL) { + ret = (PyArrayObject *) + PyArray_NewFromDescr(&PyArray_Type, dtype, + 1, &num, NULL, NULL, + 0, NULL); + if (ret == NULL) return NULL; + memcpy(ret->data, data, num*dtype->elsize); + } else { + /* read from character-based string */ + size_t nread = 0; + char *end; + if (dtype->f->scanfunc == NULL) { PyErr_SetString(PyExc_ValueError, "don't know how to read " \ - "character strings for given " \ + "character strings with that " \ "array type"); Py_DECREF(dtype); return NULL; } - - if (n!=-1) { - ret = (PyArrayObject *) \ - PyArray_NewFromDescr(&PyArray_Type, - dtype, 1, &n, NULL, - NULL, 0, NULL); - if (ret == NULL) return NULL; - NPY_BEGIN_ALLOW_THREADS - ptr = data; - dptr = ret->data; - for (index=0; index < n; index++) { - if (fromstr(ptr, dptr, &ptr, ret) < 0) - break; - nread += 1; - dptr += dtype->elsize; - if (_skip_sep(&ptr, sep) < 0) - break; - } - if (nread < n) { - fprintf(stderr, "%ld items requested but "\ - "only %ld read\n", - (long) n, (long) nread); - ret->data = \ - PyDataMem_RENEW(ret->data, - nread * \ - ret->descr->elsize); - PyArray_DIM(ret,0) = nread; - - } - NPY_END_ALLOW_THREADS - } - else { -#define _FILEBUFNUM 4096 - intp thisbuf=0; - intp size = _FILEBUFNUM; - intp bytes; - intp totalbytes; - char *end; - int val; - - ret = (PyArrayObject *)\ - PyArray_NewFromDescr(&PyArray_Type, - dtype, - 1, &size, - NULL, NULL, - 0, NULL); - if (ret==NULL) return NULL; - NPY_BEGIN_ALLOW_THREADS - totalbytes = bytes = size * dtype->elsize; - dptr = ret->data; - ptr = data; - end = data+slen; - while (ptr < end) { - val = fromstr(ptr, dptr, &ptr, ret); - if (val < 0) break; - nread += 1; - val = _skip_sep(&ptr, sep); - if (val < 0) break; - thisbuf += 1; - dptr += dtype->elsize; - if (thisbuf == size) { - totalbytes += bytes; - ret->data = PyDataMem_RENEW(ret->data, - totalbytes); - dptr = ret->data + \ - (totalbytes - bytes); - thisbuf = 0; - } - } - ret->data = PyDataMem_RENEW(ret->data, - nread*ret->descr->elsize); - PyArray_DIM(ret,0) = nread; -#undef _FILEBUFNUM - NPY_END_ALLOW_THREADS + if (slen < 0) { + end = NULL; + } else { + end = data + slen; } + ret = array_from_text(dtype, num, sep, &nread, + data, + (next_element) fromstr_next_element, + (skip_separator) fromstr_skip_separator, + end); } return (PyObject *)ret; } static PyObject * -array_fromString(PyObject *ignored, PyObject *args, PyObject *keywds) +array_fromstring(PyObject *ignored, PyObject *args, PyObject *keywds) { char *data; Py_ssize_t nin=-1; @@ -6018,6 +6150,148 @@ array_fromString(PyObject *ignored, PyObject *args, PyObject *keywds) } + +static PyArrayObject * +array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, intp num, size_t *nread) +{ + PyArrayObject *r; + intp start, numbytes; + + if (num < 0) { + int fail=0; + start = (intp )ftell(fp); + if (start < 0) fail=1; + if (fseek(fp, 0, SEEK_END) < 0) fail=1; + numbytes = (intp) ftell(fp); + if (numbytes < 0) fail=1; + numbytes -= start; + if (fseek(fp, start, SEEK_SET) < 0) fail=1; + if (fail) { + PyErr_SetString(PyExc_IOError, + "could not seek in file"); + Py_DECREF(dtype); + return NULL; + } + num = numbytes / dtype->elsize; + } + r = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, + dtype, + 1, &num, + NULL, NULL, + 0, NULL); + if (r==NULL) return NULL; + NPY_BEGIN_ALLOW_THREADS; + *nread = fread(r->data, dtype->elsize, num, fp); + NPY_END_ALLOW_THREADS; + return r; +} + +/*OBJECT_API + + Given a ``FILE *`` pointer ``fp``, and a ``PyArray_Descr``, return an + array corresponding to the data encoded in that file. + + If the dtype is NULL, the default array type is used (double). + If non-null, the reference is stolen. + + The number of elements to read is given as ``num``; if it is < 0, then + then as many as possible are read. + + If ``sep`` is NULL or empty, then binary data is assumed, else + text data, with ``sep`` as the separator between elements. Whitespace in + the separator matches any length of whitespace in the text, and a match + for whitespace around the separator is added. + + For memory-mapped files, use the buffer interface. No more data than + necessary is read by this routine. +*/ +static PyObject * +PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, intp num, char *sep) +{ + PyArrayObject *ret; + size_t nread = 0; + + if (PyDataType_REFCHK(dtype)) { + PyErr_SetString(PyExc_ValueError, + "cannot read into object array"); + Py_DECREF(dtype); + return NULL; + } + if (dtype->elsize == 0) { + PyErr_SetString(PyExc_ValueError, "0-sized elements."); + Py_DECREF(dtype); + return NULL; + } + + if ((sep == NULL) || (strlen(sep) == 0)) { + ret = array_fromfile_binary(fp, dtype, num, &nread); + } else { + if (dtype->f->scanfunc == NULL) { + PyErr_SetString(PyExc_ValueError, + "don't know how to read " \ + "character files with that " \ + "array type"); + Py_DECREF(dtype); + return NULL; + } + ret = array_from_text(dtype, num, sep, &nread, + fp, + (next_element) fromfile_next_element, + (skip_separator) fromfile_skip_separator, + NULL); + } + if (((intp) nread) < num) { + fprintf(stderr, "%ld items requested but only %ld read\n", + (long) num, (long) nread); + ret->data = PyDataMem_RENEW(ret->data, + nread * ret->descr->elsize); + PyArray_DIM(ret,0) = nread; + } + return (PyObject *)ret; +} + +static PyObject * +array_fromfile(PyObject *ignored, PyObject *args, PyObject *keywds) +{ + PyObject *file=NULL, *ret; + FILE *fp; + char *sep=""; + Py_ssize_t nin=-1; + static char *kwlist[] = {"file", "dtype", "count", "sep", NULL}; + PyArray_Descr *type=NULL; + + if (!PyArg_ParseTupleAndKeywords(args, keywds, + "O|O&" NPY_SSIZE_T_PYFMT "s", + kwlist, + &file, + PyArray_DescrConverter, &type, + &nin, &sep)) { + return NULL; + } + + if (type == NULL) type = PyArray_DescrFromType(PyArray_DEFAULT); + + if (PyString_Check(file) || PyUnicode_Check(file)) { + file = PyObject_CallFunction((PyObject *)&PyFile_Type, + "Os", file, "rb"); + if (file==NULL) return NULL; + } + else { + Py_INCREF(file); + } + fp = PyFile_AsFile(file); + if (fp == NULL) { + PyErr_SetString(PyExc_IOError, + "first argument must be an open file"); + Py_DECREF(file); + return NULL; + } + ret = PyArray_FromFile(fp, type, (intp) nin, sep); + Py_DECREF(file); + return ret; +} + + /* steals a reference to dtype (which cannot be NULL) */ /*OBJECT_API */ static PyObject * @@ -6108,7 +6382,7 @@ done: } static PyObject * -array_fromIter(PyObject *ignored, PyObject *args, PyObject *keywds) +array_fromiter(PyObject *ignored, PyObject *args, PyObject *keywds) { PyObject *iter; Py_ssize_t nin=-1; @@ -6128,208 +6402,6 @@ array_fromIter(PyObject *ignored, PyObject *args, PyObject *keywds) } - - -/* This needs an open file object and reads it in directly. - memory-mapped files handled differently through buffer interface. - -file pointer number in resulting 1d array -(can easily reshape later, -1 for to end of file) -type of array -sep is a separator string for character-based data (or NULL for binary) - " " means whitespace -*/ - -/*OBJECT_API*/ -static PyObject * -PyArray_FromFile(FILE *fp, PyArray_Descr *typecode, intp num, char *sep) -{ - PyArrayObject *r; - size_t nread = 0; - PyArray_ScanFunc *scan; - Bool binary; - - if (PyDataType_REFCHK(typecode)) { - PyErr_SetString(PyExc_ValueError, "cannot read into" - "object array"); - Py_DECREF(typecode); - return NULL; - } - if (typecode->elsize == 0) { - PyErr_SetString(PyExc_ValueError, "0-sized elements."); - Py_DECREF(typecode); - return NULL; - } - - binary = ((sep == NULL) || (strlen(sep) == 0)); - if (num == -1 && binary) { /* Get size for binary file*/ - intp start, numbytes; - int fail=0; - start = (intp )ftell(fp); - if (start < 0) fail=1; - if (fseek(fp, 0, SEEK_END) < 0) fail=1; - numbytes = (intp) ftell(fp); - if (numbytes < 0) fail=1; - numbytes -= start; - if (fseek(fp, start, SEEK_SET) < 0) fail=1; - if (fail) { - PyErr_SetString(PyExc_IOError, - "could not seek in file"); - Py_DECREF(typecode); - return NULL; - } - num = numbytes / typecode->elsize; - } - - if (binary) { /* binary data */ - r = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, - typecode, - 1, &num, - NULL, NULL, - 0, NULL); - if (r==NULL) return NULL; - NPY_BEGIN_ALLOW_THREADS - nread = fread(r->data, typecode->elsize, num, fp); - NPY_END_ALLOW_THREADS - } - else { /* character reading */ - intp i; - char *dptr; - int done=0; - - scan = typecode->f->scanfunc; - if (scan == NULL) { - PyErr_SetString(PyExc_ValueError, - "don't know how to read " \ - "character files with that " \ - "array type"); - Py_DECREF(typecode); - return NULL; - } - - if (num != -1) { /* number to read is known */ - r = (PyArrayObject *)\ - PyArray_NewFromDescr(&PyArray_Type, - typecode, - 1, &num, - NULL, NULL, - 0, NULL); - if (r==NULL) return NULL; - NPY_BEGIN_ALLOW_THREADS - dptr = r->data; - for (i=0; i < num; i++) { - if (done) break; - done = scan(fp, dptr, sep, NULL); - if (done < -2) break; - nread += 1; - dptr += r->descr->elsize; - } - NPY_END_ALLOW_THREADS - if (PyErr_Occurred()) { - Py_DECREF(r); - return NULL; - } - } - else { /* we have to watch for the end of the file and - reallocate at the end */ -#define _FILEBUFNUM 4096 - intp thisbuf=0; - intp size = _FILEBUFNUM; - intp bytes; - intp totalbytes; - - r = (PyArrayObject *)\ - PyArray_NewFromDescr(&PyArray_Type, - typecode, - 1, &size, - NULL, NULL, - 0, NULL); - if (r==NULL) return NULL; - NPY_BEGIN_ALLOW_THREADS - totalbytes = bytes = size * typecode->elsize; - dptr = r->data; - while (!done) { - done = scan(fp, dptr, sep, NULL); - - /* end of file reached trying to - scan value. done is 1 or 2 - if end of file reached trying to - scan separator. Still good value. - */ - if (done < -2) break; - thisbuf += 1; - nread += 1; - dptr += r->descr->elsize; - if (!done && thisbuf == size) { - totalbytes += bytes; - r->data = PyDataMem_RENEW(r->data, - totalbytes); - dptr = r->data + (totalbytes - bytes); - thisbuf = 0; - } - } - r->data = PyDataMem_RENEW(r->data, nread*r->descr->elsize); - PyArray_DIM(r,0) = nread; - num = nread; - NPY_END_ALLOW_THREADS -#undef _FILEBUFNUM - } - if (PyErr_Occurred()) { - Py_DECREF(r); - return NULL; - } - - } - if (((intp) nread) < num) { - fprintf(stderr, "%ld items requested but only %ld read\n", - (long) num, (long) nread); - r->data = PyDataMem_RENEW(r->data, nread * r->descr->elsize); - PyArray_DIM(r,0) = nread; - } - return (PyObject *)r; -} - -static PyObject * -array_fromfile(PyObject *ignored, PyObject *args, PyObject *keywds) -{ - PyObject *file=NULL, *ret; - FILE *fp; - char *sep=""; - Py_ssize_t nin=-1; - static char *kwlist[] = {"file", "dtype", "count", "sep", NULL}; - PyArray_Descr *type=NULL; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, - "O|O&" NPY_SSIZE_T_PYFMT "s", - kwlist, - &file, - PyArray_DescrConverter, &type, - &nin, &sep)) { - return NULL; - } - - if (type == NULL) type = PyArray_DescrFromType(PyArray_DEFAULT); - - if (PyString_Check(file) || PyUnicode_Check(file)) { - file = PyObject_CallFunction((PyObject *)&PyFile_Type, - "Os", file, "rb"); - if (file==NULL) return NULL; - } - else { - Py_INCREF(file); - } - fp = PyFile_AsFile(file); - if (fp == NULL) { - PyErr_SetString(PyExc_IOError, - "first argument must be an open file"); - Py_DECREF(file); - return NULL; - } - ret = PyArray_FromFile(fp, type, (intp) nin, sep); - Py_DECREF(file); - return ret; -} - /*OBJECT_API*/ static PyObject * PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type, @@ -7213,9 +7285,9 @@ static struct PyMethodDef array_module_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"putmask", (PyCFunction)array_putmask, METH_VARARGS | METH_KEYWORDS, NULL}, - {"fromstring",(PyCFunction)array_fromString, + {"fromstring",(PyCFunction)array_fromstring, METH_VARARGS|METH_KEYWORDS, NULL}, - {"fromiter",(PyCFunction)array_fromIter, + {"fromiter",(PyCFunction)array_fromiter, METH_VARARGS|METH_KEYWORDS, NULL}, {"concatenate", (PyCFunction)array_concatenate, METH_VARARGS|METH_KEYWORDS, NULL}, diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 4ec8501c7..d7ab0be93 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -116,9 +116,28 @@ class test_fromstring(NumpyTestCase): a = fromstring('\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x80@',dtype='<f4') assert_array_equal(a, array([1,2,3,4])) + def check_string(self): + a = fromstring('1,2,3,4', sep=',') + assert_array_equal(a, [1., 2., 3., 4.]) + + def check_counted_string(self): + a = fromstring('1,2,3,4', count=4, sep=',') + assert_array_equal(a, [1., 2., 3., 4.]) + a = fromstring('1,2,3,4', count=3, sep=',') + assert_array_equal(a, [1., 2., 3.]) + + def check_string_with_ws(self): + a = fromstring('1 2 3 4 ', dtype=int, sep=' ') + assert_array_equal(a, [1, 2, 3, 4]) + + def check_counted_string_with_ws(self): + a = fromstring('1 2 3 4 ', count=3, dtype=int, sep=' ') + assert_array_equal(a, [1, 2, 3]) + def check_ascii(self): - a = fromstring('1 , 2 , 3 , 4',sep=',') - b = fromstring('1,2,3,4',dtype=float,sep=',') + a = fromstring('1 , 2 , 3 , 4', sep=',') + b = fromstring('1,2,3,4', dtype=float, sep=',') + assert_array_equal(a, [1.,2.,3.,4.]) assert_array_equal(a,b) class test_zero_rank(NumpyTestCase): |