* add a `replace_space` option to NameValidator

* Force a file to be opened in 'U' mode (bug #1473)
author: pierregm <pierregm@localhost> 2010-05-16 23:05:30 +0000
committer: pierregm <pierregm@localhost> 2010-05-16 23:05:30 +0000
commit: 97a38c4a4233fb133b2f2fa8b4fad9e65657f572 (patch)
tree: 70a91968a539cfbae47c8d6553fa75886c69ccf9 /numpy/lib
parent: ccf308399107ec304b7e0d36692f9d929b6d3416 (diff)
download: numpy-97a38c4a4233fb133b2f2fa8b4fad9e65657f572.tar.gz
3 files changed, 46 insertions, 6 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index b2a7d4729..c9d81048f 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -258,6 +258,8 @@ class NameValidator:
         * If 'lower', field names are converted to lower case.
 
         The default value is True.
+    replace_space: '_', optional
+        Character(s) used in replacement of white spaces.
 
     Notes
     -----
@@ -281,7 +283,8 @@ class NameValidator:
     defaultexcludelist = ['return', 'file', 'print']
     defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
     #
-    def __init__(self, excludelist=None, deletechars=None, case_sensitive=None):
+    def __init__(self, excludelist=None, deletechars=None,
+                 case_sensitive=None, replace_space='_'):
         # Process the exclusion list ..
         if excludelist is None:
             excludelist = []
@@ -303,6 +306,8 @@ class NameValidator:
             self.case_converter = lambda x: x.lower()
         else:
             self.case_converter = lambda x: x
+        #
+        self.replace_space = replace_space
 
     def validate(self, names, defaultfmt="f%i", nbfields=None):
         """
@@ -347,14 +352,16 @@ class NameValidator:
         deletechars = self.deletechars
         excludelist = self.excludelist
         case_converter = self.case_converter
+        replace_space = self.replace_space
         # Initializes some variables ...
         validatednames = []
         seen = dict()
         nbempty = 0
         #
         for item in names:
-            item = case_converter(item)
-            item = item.strip().replace(' ', '_')
+            item = case_converter(item).strip()
+            if replace_space:
+                item = item.replace(' ', replace_space)
             item = ''.join([c for c in item if c not in deletechars])
             if item == '':
                 item = defaultfmt % nbempty
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 4310aacd0..99a79637b 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -69,6 +69,8 @@ def seek_gzip_factory(f):
 
     return f
 
+
+
 class BagObj(object):
     """
     BagObj(obj)
@@ -100,6 +102,8 @@ class BagObj(object):
         except KeyError:
             raise AttributeError, key
 
+
+
 class NpzFile(object):
     """
     NpzFile(fid)
@@ -921,7 +925,8 @@ def fromregex(file, regexp, dtype):
 def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
                skiprows=0, skip_header=0, skip_footer=0, converters=None,
                missing='', missing_values=None, filling_values=None,
-               usecols=None, names=None, excludelist=None, deletechars=None,
+               usecols=None, names=None,
+               excludelist=None, deletechars=None, replace_space='_',
                autostrip=False, case_sensitive=True, defaultfmt="f%i",
                unpack=None, usemask=False, loose=True, invalid_raise=True):
     """
@@ -978,6 +983,9 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         A format used to define default field names, such as "f%i" or "f_%02i".
     autostrip : bool, optional
         Whether to automatically strip white spaces from the variables.
+    replace_space : char, optional
+        Character(s) used in replacement of white spaces in the variables names.
+        By default, use a '_'.
     case_sensitive : {True, False, 'upper', 'lower'}, optional
         If True, field names are case sensitive.
         If False or 'upper', field names are converted to upper case.
@@ -1076,7 +1084,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
 
     # Initialize the filehandle, the LineSplitter and the NameValidator
     if isinstance(fname, basestring):
-        fhd = np.lib._datasource.open(fname)
+        fhd = np.lib._datasource.open(fname, 'U')
     elif not hasattr(fname, 'read'):
         raise TypeError("The input should be a string or a filehandle. "\
                         "(got %s instead)" % type(fname))
@@ -1086,7 +1094,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
                               autostrip=autostrip)._handyman
     validate_names = NameValidator(excludelist=excludelist,
                                    deletechars=deletechars,
-                                   case_sensitive=case_sensitive)
+                                   case_sensitive=case_sensitive,
+                                   replace_space=replace_space)
 
     # Get the first valid lines after the first skiprows ones ..
     if skiprows:
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 73d3c3599..d7becc284 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -1062,6 +1062,30 @@ M   33  21.99
                         dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')])
         assert_equal(mtest, ctrl)
 
+    def test_replace_space(self):
+        "Test the 'replace_space' option"
+        txt = "A.A, B (B), C:C\n1, 2, 3.14"
+        # Test default: replace ' ' by '_' and delete non-alphanum chars
+        test = np.genfromtxt(StringIO(txt),
+                             delimiter=",", names=True, dtype=None)
+        ctrl_dtype = [("AA", int), ("B_B", int), ("CC", float)]
+        ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype)
+        assert_equal(test, ctrl)
+        # Test: no replace, no delete
+        test = np.genfromtxt(StringIO(txt),
+                             delimiter=",", names=True, dtype=None,
+                             replace_space='', deletechars='')
+        ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", float)]
+        ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype)
+        assert_equal(test, ctrl)
+        # Test: no delete (spaces are replaced by _)
+        test = np.genfromtxt(StringIO(txt),
+                             delimiter=",", names=True, dtype=None,
+                             deletechars='')
+        ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", float)]
+        ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype)
+        assert_equal(test, ctrl)
+
     def test_incomplete_names(self):
         "Test w/ incomplete names"
         data = "A,,C\n0,1,2\n3,4,5"
author	pierregm <pierregm@localhost>	2010-05-16 23:05:30 +0000
committer	pierregm <pierregm@localhost>	2010-05-16 23:05:30 +0000
commit	97a38c4a4233fb133b2f2fa8b4fad9e65657f572 (patch)
tree	70a91968a539cfbae47c8d6553fa75886c69ccf9 /numpy/lib
parent	ccf308399107ec304b7e0d36692f9d929b6d3416 (diff)
download	numpy-97a38c4a4233fb133b2f2fa8b4fad9e65657f572.tar.gz