diff options
-rw-r--r-- | HISTORY.md | 1 | ||||
-rw-r--r-- | docs/formats.rst | 9 | ||||
-rw-r--r-- | src/tablib/formats/_xlsx.py | 8 | ||||
-rw-r--r-- | tests/files/bad_dimensions.xlsx | bin | 0 -> 9220 bytes | |||
-rwxr-xr-x | tests/test_tablib.py | 7 |
5 files changed, 21 insertions, 4 deletions
@@ -9,6 +9,7 @@ ### Improvements - Added Python 3.9 support +- Added read_only option to xlsx file reader (#482). ### Bugfixes diff --git a/docs/formats.rst b/docs/formats.rst index 0c46733..2357efe 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -206,6 +206,15 @@ Import/export data in Excel 07+ Spreadsheet representation. This format is optional, install Tablib with ``pip install "tablib[xlsx]"`` to make the format available. +The ``import_set()`` and ``import_book()`` methods accept keyword +argument ``read_only``. If its value is ``True`` (the default), the +XLSX data source is read lazily. Lazy reading generally reduces time +and memory consumption, especially for large spreadsheets. However, +it relies on the XLSX data source declaring correct dimensions. Some +programs generate XLSX files with incorrect dimensions. Such files +may need to be loaded with this optimization turned off by passing +``read_only=False``. + .. note:: When reading an ``xlsx`` file containing formulas in its cells, Tablib will diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index e2a3fde..34911e9 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -59,12 +59,12 @@ class XLSXFormat: return stream.getvalue() @classmethod - def import_set(cls, dset, in_stream, headers=True): + def import_set(cls, dset, in_stream, headers=True, read_only=True): """Returns databook from XLS stream.""" dset.wipe() - xls_book = load_workbook(in_stream, read_only=True, data_only=True) + xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) sheet = xls_book.active dset.title = sheet.title @@ -77,12 +77,12 @@ class XLSXFormat: dset.append(row_vals) @classmethod - def import_book(cls, dbook, in_stream, headers=True): + def import_book(cls, dbook, in_stream, headers=True, read_only=True): """Returns databook from XLS stream.""" dbook.wipe() - xls_book = load_workbook(in_stream, read_only=True, data_only=True) + xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) for sheet in xls_book.worksheets: data = tablib.Dataset() diff --git a/tests/files/bad_dimensions.xlsx b/tests/files/bad_dimensions.xlsx Binary files differnew file mode 100644 index 0000000..8493760 --- /dev/null +++ b/tests/files/bad_dimensions.xlsx diff --git a/tests/test_tablib.py b/tests/test_tablib.py index b13d17c..ccb28e1 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1040,6 +1040,13 @@ class XLSXTests(BaseTestCase): data = tablib.Dataset().load(fh) self.assertEqual(data.headers[0], 'Hello World') + def test_xlsx_bad_dimensions(self): + """Test loading file with bad dimension. Must be done with + read_only=False.""" + xls_source = Path(__file__).parent / 'files' / 'bad_dimensions.xlsx' + with xls_source.open('rb') as fh: + data = tablib.Dataset().load(fh, read_only=False) + self.assertEqual(data.height, 3) class JSONTests(BaseTestCase): def test_json_format_detect(self): |