diff options
author | pierregm <pierregm@localhost> | 2010-05-16 23:05:30 +0000 |
---|---|---|
committer | pierregm <pierregm@localhost> | 2010-05-16 23:05:30 +0000 |
commit | 97a38c4a4233fb133b2f2fa8b4fad9e65657f572 (patch) | |
tree | 70a91968a539cfbae47c8d6553fa75886c69ccf9 /numpy/lib | |
parent | ccf308399107ec304b7e0d36692f9d929b6d3416 (diff) | |
download | numpy-97a38c4a4233fb133b2f2fa8b4fad9e65657f572.tar.gz |
* add a `replace_space` option to NameValidator
* Force a file to be opened in 'U' mode (bug #1473)
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/_iotools.py | 13 | ||||
-rw-r--r-- | numpy/lib/npyio.py | 15 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 24 |
3 files changed, 46 insertions, 6 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index b2a7d4729..c9d81048f 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -258,6 +258,8 @@ class NameValidator: * If 'lower', field names are converted to lower case. The default value is True. + replace_space: '_', optional + Character(s) used in replacement of white spaces. Notes ----- @@ -281,7 +283,8 @@ class NameValidator: defaultexcludelist = ['return', 'file', 'print'] defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") # - def __init__(self, excludelist=None, deletechars=None, case_sensitive=None): + def __init__(self, excludelist=None, deletechars=None, + case_sensitive=None, replace_space='_'): # Process the exclusion list .. if excludelist is None: excludelist = [] @@ -303,6 +306,8 @@ class NameValidator: self.case_converter = lambda x: x.lower() else: self.case_converter = lambda x: x + # + self.replace_space = replace_space def validate(self, names, defaultfmt="f%i", nbfields=None): """ @@ -347,14 +352,16 @@ class NameValidator: deletechars = self.deletechars excludelist = self.excludelist case_converter = self.case_converter + replace_space = self.replace_space # Initializes some variables ... validatednames = [] seen = dict() nbempty = 0 # for item in names: - item = case_converter(item) - item = item.strip().replace(' ', '_') + item = case_converter(item).strip() + if replace_space: + item = item.replace(' ', replace_space) item = ''.join([c for c in item if c not in deletechars]) if item == '': item = defaultfmt % nbempty diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 4310aacd0..99a79637b 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -69,6 +69,8 @@ def seek_gzip_factory(f): return f + + class BagObj(object): """ BagObj(obj) @@ -100,6 +102,8 @@ class BagObj(object): except KeyError: raise AttributeError, key + + class NpzFile(object): """ NpzFile(fid) @@ -921,7 +925,8 @@ def fromregex(file, regexp, dtype): def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0, skip_header=0, skip_footer=0, converters=None, missing='', missing_values=None, filling_values=None, - usecols=None, names=None, excludelist=None, deletechars=None, + usecols=None, names=None, + excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt="f%i", unpack=None, usemask=False, loose=True, invalid_raise=True): """ @@ -978,6 +983,9 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, A format used to define default field names, such as "f%i" or "f_%02i". autostrip : bool, optional Whether to automatically strip white spaces from the variables. + replace_space : char, optional + Character(s) used in replacement of white spaces in the variables names. + By default, use a '_'. case_sensitive : {True, False, 'upper', 'lower'}, optional If True, field names are case sensitive. If False or 'upper', field names are converted to upper case. @@ -1076,7 +1084,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Initialize the filehandle, the LineSplitter and the NameValidator if isinstance(fname, basestring): - fhd = np.lib._datasource.open(fname) + fhd = np.lib._datasource.open(fname, 'U') elif not hasattr(fname, 'read'): raise TypeError("The input should be a string or a filehandle. "\ "(got %s instead)" % type(fname)) @@ -1086,7 +1094,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, autostrip=autostrip)._handyman validate_names = NameValidator(excludelist=excludelist, deletechars=deletechars, - case_sensitive=case_sensitive) + case_sensitive=case_sensitive, + replace_space=replace_space) # Get the first valid lines after the first skiprows ones .. if skiprows: diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 73d3c3599..d7becc284 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -1062,6 +1062,30 @@ M 33 21.99 dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')]) assert_equal(mtest, ctrl) + def test_replace_space(self): + "Test the 'replace_space' option" + txt = "A.A, B (B), C:C\n1, 2, 3.14" + # Test default: replace ' ' by '_' and delete non-alphanum chars + test = np.genfromtxt(StringIO(txt), + delimiter=",", names=True, dtype=None) + ctrl_dtype = [("AA", int), ("B_B", int), ("CC", float)] + ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no replace, no delete + test = np.genfromtxt(StringIO(txt), + delimiter=",", names=True, dtype=None, + replace_space='', deletechars='') + ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", float)] + ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no delete (spaces are replaced by _) + test = np.genfromtxt(StringIO(txt), + delimiter=",", names=True, dtype=None, + deletechars='') + ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", float)] + ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) + assert_equal(test, ctrl) + def test_incomplete_names(self): "Test w/ incomplete names" data = "A,,C\n0,1,2\n3,4,5" |