diff options
| author | Jean Jordaan <jean.jordaan@gmail.com> | 2019-03-03 13:29:21 +0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-03-03 13:29:21 +0700 |
| commit | addaa090efb4d4beacb2bd2cc6dbdb7e158fa2a9 (patch) | |
| tree | de3b532ed49a0d1e8a2e333b549b129d534bff91 /tablib/formats | |
| parent | cd67a63b434c33430d8fd32d6d15027b42811d9d (diff) | |
| parent | 79dc77de49b8375d616a6cabb4503feb990bb51d (diff) | |
| download | tablib-addaa090efb4d4beacb2bd2cc6dbdb7e158fa2a9.tar.gz | |
Merge branch 'master' into master
Diffstat (limited to 'tablib/formats')
| -rw-r--r-- | tablib/formats/__init__.py | 5 | ||||
| -rw-r--r-- | tablib/formats/_csv.py | 11 | ||||
| -rw-r--r-- | tablib/formats/_dbf.py | 4 | ||||
| -rw-r--r-- | tablib/formats/_df.py | 49 | ||||
| -rw-r--r-- | tablib/formats/_jira.py | 39 | ||||
| -rw-r--r-- | tablib/formats/_json.py | 19 | ||||
| -rw-r--r-- | tablib/formats/_ods.py | 13 | ||||
| -rw-r--r-- | tablib/formats/_rst.py | 273 | ||||
| -rw-r--r-- | tablib/formats/_tsv.py | 4 | ||||
| -rw-r--r-- | tablib/formats/_xls.py | 7 | ||||
| -rw-r--r-- | tablib/formats/_xlsx.py | 43 | ||||
| -rw-r--r-- | tablib/formats/_yaml.py | 13 |
12 files changed, 412 insertions, 68 deletions
diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 5cca19f..418e607 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -13,5 +13,8 @@ from . import _xlsx as xlsx from . import _ods as ods from . import _dbf as dbf from . import _latex as latex +from . import _df as df +from . import _rst as rst +from . import _jira as jira -available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods) +available = (json, xls, yaml, csv, dbf, tsv, html, jira, latex, xlsx, ods, df, rst) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 994b23b..8b536a7 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -3,15 +3,14 @@ """ Tablib - *SV Support. """ -from tablib.compat import is_py3, csv, StringIO +from tablib.compat import csv, StringIO, unicode title = 'csv' extensions = ('csv',) -DEFAULT_ENCODING = 'utf-8' -DEFAULT_DELIMITER = ',' +DEFAULT_DELIMITER = unicode(',') def export_set(dataset, **kwargs): @@ -19,8 +18,6 @@ def export_set(dataset, **kwargs): stream = StringIO() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) - if not is_py3: - kwargs.setdefault('encoding', DEFAULT_ENCODING) _csv = csv.writer(stream, **kwargs) @@ -36,15 +33,13 @@ def import_set(dset, in_stream, headers=True, **kwargs): dset.wipe() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) - if not is_py3: - kwargs.setdefault('encoding', DEFAULT_ENCODING) rows = csv.reader(StringIO(in_stream), **kwargs) for i, row in enumerate(rows): if (i == 0) and (headers): dset.headers = row - else: + elif row: dset.append(row) diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py index 41c2ef4..710797d 100644 --- a/tablib/formats/_dbf.py +++ b/tablib/formats/_dbf.py @@ -55,6 +55,7 @@ def export_set(dataset): else: stream = StringIO(dbf_stream.read()) dbf_stream.close() + os.close(temp_file) os.remove(temp_uri) return stream.getvalue() @@ -88,6 +89,3 @@ def detect(stream): # When unpacking a string argument with less than 8 chars, struct.error is # raised. return False - - - diff --git a/tablib/formats/_df.py b/tablib/formats/_df.py new file mode 100644 index 0000000..44b967f --- /dev/null +++ b/tablib/formats/_df.py @@ -0,0 +1,49 @@ +""" Tablib - DataFrame Support. +""" + + +import sys + + +if sys.version_info[0] > 2: + from io import BytesIO +else: + from cStringIO import StringIO as BytesIO + +try: + from pandas import DataFrame +except ImportError: + DataFrame = None + +import tablib + +from tablib.compat import unicode + +title = 'df' +extensions = ('df', ) + +def detect(stream): + """Returns True if given stream is a DataFrame.""" + if DataFrame is None: + return False + try: + DataFrame(stream) + return True + except ValueError: + return False + + +def export_set(dset, index=None): + """Returns DataFrame representation of DataBook.""" + if DataFrame is None: + raise NotImplementedError( + 'DataFrame Format requires `pandas` to be installed.' + ' Try `pip install tablib[pandas]`.') + dataframe = DataFrame(dset.dict, columns=dset.headers) + return dataframe + + +def import_set(dset, in_stream): + """Returns dataset from DataFrame.""" + dset.wipe() + dset.dict = in_stream.to_dict(orient='records') diff --git a/tablib/formats/_jira.py b/tablib/formats/_jira.py new file mode 100644 index 0000000..55fce52 --- /dev/null +++ b/tablib/formats/_jira.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +"""Tablib - Jira table export support. + + Generates a Jira table from the dataset. +""" +from tablib.compat import unicode + +title = 'jira' + + +def export_set(dataset): + """Formats the dataset according to the Jira table syntax: + + ||heading 1||heading 2||heading 3|| + |col A1|col A2|col A3| + |col B1|col B2|col B3| + + :param dataset: dataset to serialize + :type dataset: tablib.core.Dataset + """ + + header = _get_header(dataset.headers) if dataset.headers else '' + body = _get_body(dataset) + return '%s\n%s' % (header, body) if header else body + + +def _get_body(dataset): + return '\n'.join([_serialize_row(row) for row in dataset]) + + +def _get_header(headers): + return _serialize_row(headers, delimiter='||') + + +def _serialize_row(row, delimiter='|'): + return '%s%s%s' % (delimiter, + delimiter.join([unicode(item) if item else ' ' for item in row]), + delimiter) diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py index 777040a..bbd2c96 100644 --- a/tablib/formats/_json.py +++ b/tablib/formats/_json.py @@ -2,29 +2,34 @@ """ Tablib - JSON Support """ +import decimal +import json +from uuid import UUID import tablib -import sys -from tablib.packages import omnijson as json - title = 'json' extensions = ('json', 'jsn') -def date_handler(obj): - return obj.isoformat() if hasattr(obj, 'isoformat') else obj +def serialize_objects_handler(obj): + if isinstance(obj, decimal.Decimal) or isinstance(obj, UUID): + return str(obj) + elif hasattr(obj, 'isoformat'): + return obj.isoformat() + else: + return obj def export_set(dataset): """Returns JSON representation of Dataset.""" - return json.dumps(dataset.dict, default=date_handler) + return json.dumps(dataset.dict, default=serialize_objects_handler) def export_book(databook): """Returns JSON representation of Databook.""" - return json.dumps(databook._package(), default=date_handler) + return json.dumps(databook._package(), default=serialize_objects_handler) def import_set(dset, in_stream): diff --git a/tablib/formats/_ods.py b/tablib/formats/_ods.py index f43a26c..c7af891 100644 --- a/tablib/formats/_ods.py +++ b/tablib/formats/_ods.py @@ -3,15 +3,8 @@ """ Tablib - ODF Support. """ -import sys - - -if sys.version_info[0] > 2: - from io import BytesIO -else: - from cStringIO import StringIO as BytesIO - -from tablib.compat import opendocument, style, table, text, unicode +from odf import opendocument, style, table, text +from tablib.compat import BytesIO, unicode title = 'ods' extensions = ('ods',) @@ -97,4 +90,4 @@ def dset_sheet(dataset, ws): ws.addElement(odf_row) cell = table.TableCell() cell.addElement(text.P(text=col)) - odf_row.addElement(cell)
\ No newline at end of file + odf_row.addElement(cell) diff --git a/tablib/formats/_rst.py b/tablib/formats/_rst.py new file mode 100644 index 0000000..4b53ad7 --- /dev/null +++ b/tablib/formats/_rst.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- + +""" Tablib - reStructuredText Support +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from textwrap import TextWrapper + +from tablib.compat import ( + median, + unicode, + izip_longest, +) + + +title = 'rst' +extensions = ('rst',) + + +MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words. + + +JUSTIFY_LEFT = 'left' +JUSTIFY_CENTER = 'center' +JUSTIFY_RIGHT = 'right' +JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT) + + +def to_unicode(value): + if isinstance(value, bytes): + return value.decode('utf-8') + return unicode(value) + + +def _max_word_len(text): + """ + Return the length of the longest word in `text`. + + + >>> _max_word_len('Python Module for Tabular Datasets') + 8 + + """ + return max((len(word) for word in text.split())) + + +def _get_column_string_lengths(dataset): + """ + Returns a list of string lengths of each column, and a list of + maximum word lengths. + """ + if dataset.headers: + column_lengths = [[len(h)] for h in dataset.headers] + word_lens = [_max_word_len(h) for h in dataset.headers] + else: + column_lengths = [[] for _ in range(dataset.width)] + word_lens = [0 for _ in range(dataset.width)] + for row in dataset.dict: + values = iter(row.values() if hasattr(row, 'values') else row) + for i, val in enumerate(values): + text = to_unicode(val) + column_lengths[i].append(len(text)) + word_lens[i] = max(word_lens[i], _max_word_len(text)) + return column_lengths, word_lens + + +def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT): + """ + Returns a table row of wrapped values as a list of lines + """ + if justify not in JUSTIFY_VALUES: + raise ValueError('Value of "justify" must be one of "{}"'.format( + '", "'.join(JUSTIFY_VALUES) + )) + if justify == JUSTIFY_LEFT: + just = lambda text, width: text.ljust(width) + elif justify == JUSTIFY_CENTER: + just = lambda text, width: text.center(width) + else: + just = lambda text, width: text.rjust(width) + lpad = sep + ' ' if sep else '' + rpad = ' ' + sep if sep else '' + pad = ' ' + sep + ' ' + cells = [] + for value, width in zip(values, widths): + wrapper.width = width + text = to_unicode(value) + cell = wrapper.wrap(text) + cells.append(cell) + lines = izip_longest(*cells, fillvalue='') + lines = ( + (just(cell_line, widths[i]) for i, cell_line in enumerate(line)) + for line in lines + ) + lines = [''.join((lpad, pad.join(line), rpad)) for line in lines] + return lines + + +def _get_column_widths(dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3): + """ + Returns a list of column widths proportional to the median length + of the text in their cells. + """ + str_lens, word_lens = _get_column_string_lengths(dataset) + median_lens = [int(median(lens)) for lens in str_lens] + total = sum(median_lens) + if total > max_table_width - (pad_len * len(median_lens)): + column_widths = (max_table_width * l // total for l in median_lens) + else: + column_widths = (l for l in median_lens) + # Allow for separator and padding: + column_widths = (w - pad_len if w > pad_len else w for w in column_widths) + # Rather widen table than break words: + column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)] + return column_widths + + +def export_set_as_simple_table(dataset, column_widths=None): + """ + Returns reStructuredText grid table representation of dataset. + """ + lines = [] + wrapper = TextWrapper() + if column_widths is None: + column_widths = _get_column_widths(dataset, pad_len=2) + border = ' '.join(['=' * w for w in column_widths]) + + lines.append(border) + if dataset.headers: + lines.extend(_row_to_lines( + dataset.headers, + column_widths, + wrapper, + sep='', + justify=JUSTIFY_CENTER, + )) + lines.append(border) + for row in dataset.dict: + values = iter(row.values() if hasattr(row, 'values') else row) + lines.extend(_row_to_lines(values, column_widths, wrapper, '')) + lines.append(border) + return '\n'.join(lines) + + +def export_set_as_grid_table(dataset, column_widths=None): + """ + Returns reStructuredText grid table representation of dataset. + + + >>> from tablib import Dataset + >>> from tablib.formats import rst + >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) + >>> data = Dataset() + >>> data.headers = ['A', 'B', 'A and B'] + >>> for a, b in bits: + ... data.append([bool(a), bool(b), bool(a * b)]) + >>> print(rst.export_set(data, force_grid=True)) + +-------+-------+-------+ + | A | B | A and | + | | | B | + +=======+=======+=======+ + | False | False | False | + +-------+-------+-------+ + | True | False | False | + +-------+-------+-------+ + | False | True | False | + +-------+-------+-------+ + | True | True | True | + +-------+-------+-------+ + + """ + lines = [] + wrapper = TextWrapper() + if column_widths is None: + column_widths = _get_column_widths(dataset) + header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+' + row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+' + + lines.append(row_sep) + if dataset.headers: + lines.extend(_row_to_lines( + dataset.headers, + column_widths, + wrapper, + justify=JUSTIFY_CENTER, + )) + lines.append(header_sep) + for row in dataset.dict: + values = iter(row.values() if hasattr(row, 'values') else row) + lines.extend(_row_to_lines(values, column_widths, wrapper)) + lines.append(row_sep) + return '\n'.join(lines) + + +def _use_simple_table(head0, col0, width0): + """ + Use a simple table if the text in the first column is never wrapped + + + >>> _use_simple_table('menu', ['egg', 'bacon'], 10) + True + >>> _use_simple_table(None, ['lobster thermidor', 'spam'], 10) + False + + """ + if head0 is not None: + head0 = to_unicode(head0) + if len(head0) > width0: + return False + for cell in col0: + cell = to_unicode(cell) + if len(cell) > width0: + return False + return True + + +def export_set(dataset, **kwargs): + """ + Returns reStructuredText table representation of dataset. + + Returns a simple table if the text in the first column is never + wrapped, otherwise returns a grid table. + + + >>> from tablib import Dataset + >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) + >>> data = Dataset() + >>> data.headers = ['A', 'B', 'A and B'] + >>> for a, b in bits: + ... data.append([bool(a), bool(b), bool(a * b)]) + >>> table = data.rst + >>> table.split('\\n') == [ + ... '===== ===== =====', + ... ' A B A and', + ... ' B ', + ... '===== ===== =====', + ... 'False False False', + ... 'True False False', + ... 'False True False', + ... 'True True True ', + ... '===== ===== =====', + ... ] + True + + """ + if not dataset.dict: + return '' + force_grid = kwargs.get('force_grid', False) + max_table_width = kwargs.get('max_table_width', MAX_TABLE_WIDTH) + column_widths = _get_column_widths(dataset, max_table_width) + + use_simple_table = _use_simple_table( + dataset.headers[0] if dataset.headers else None, + dataset.get_col(0), + column_widths[0], + ) + if use_simple_table and not force_grid: + return export_set_as_simple_table(dataset, column_widths) + else: + return export_set_as_grid_table(dataset, column_widths) + + +def export_book(databook): + """ + reStructuredText representation of a Databook. + + Tables are separated by a blank line. All tables use the grid + format. + """ + return '\n\n'.join(export_set(dataset, force_grid=True) + for dataset in databook._datasets) diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index 9380b3b..1c6d6a1 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -3,6 +3,7 @@ """ Tablib - TSV (Tab Separated Values) Support. """ +from tablib.compat import unicode from tablib.formats._csv import ( export_set as export_set_wrapper, import_set as import_set_wrapper, @@ -12,8 +13,7 @@ from tablib.formats._csv import ( title = 'tsv' extensions = ('tsv',) -DEFAULT_ENCODING = 'utf-8' -DELIMITER = '\t' +DELIMITER = unicode('\t') def export_set(dataset): """Returns TSV representation of Dataset.""" diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 787907a..82f8b86 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -5,8 +5,11 @@ import sys -from tablib.compat import BytesIO, xlwt, xlrd, XLRDError, xrange +from tablib.compat import BytesIO, xrange import tablib +import xlrd +import xlwt +from xlrd.biffh import XLRDError title = 'xls' extensions = ('xls',) @@ -22,7 +25,7 @@ def detect(stream): xlrd.open_workbook(file_contents=stream) return True except (TypeError, XLRDError): - pass + pass try: xlrd.open_workbook(file_contents=stream.read()) return True diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py index 411e0fc..ebafc4f 100644 --- a/tablib/formats/_xlsx.py +++ b/tablib/formats/_xlsx.py @@ -11,12 +11,12 @@ if sys.version_info[0] > 2: else: from cStringIO import StringIO as BytesIO -from tablib.compat import openpyxl +import openpyxl import tablib Workbook = openpyxl.workbook.Workbook ExcelWriter = openpyxl.writer.excel.ExcelWriter -get_column_letter = openpyxl.cell.get_column_letter +get_column_letter = openpyxl.utils.get_column_letter from tablib.compat import unicode @@ -51,7 +51,8 @@ def export_book(databook, freeze_panes=True): """Returns XLSX representation of DataBook.""" wb = Workbook() - wb.worksheets = [] + for sheet in wb.worksheets: + wb.remove(sheet) for i, dset in enumerate(databook._datasets): ws = wb.create_sheet() ws.title = dset.title if dset.title else 'Sheet%s' % (i) @@ -70,7 +71,7 @@ def import_set(dset, in_stream, headers=True): dset.wipe() xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream)) - sheet = xls_book.get_active_sheet() + sheet = xls_book.active dset.title = sheet.title @@ -111,42 +112,36 @@ def dset_sheet(dataset, ws, freeze_panes=True): _offset = i _package.insert((sep[0] + _offset), (sep[1],)) + bold = openpyxl.styles.Font(bold=True) + wrap_text = openpyxl.styles.Alignment(wrap_text=True) + for i, row in enumerate(_package): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) + cell = ws['%s%s' % (col_idx, row_number)] # bold headers if (row_number == 1) and dataset.headers: - # ws.cell('%s%s'%(col_idx, row_number)).value = unicode( - # '%s' % col, errors='ignore') - ws.cell('%s%s'%(col_idx, row_number)).value = unicode(col) - style = ws.get_style('%s%s' % (col_idx, row_number)) - style.font.bold = True + # cell.value = unicode('%s' % col, errors='ignore') + cell.value = unicode(col) + cell.font = bold if freeze_panes: - # As already done in #53, but after Merge lost: # Export Freeze only after first Line ws.freeze_panes = 'A2' - + # bold separators elif len(row) < dataset.width: - ws.cell('%s%s'%(col_idx, row_number)).value = unicode( - '%s' % col, errors='ignore') - style = ws.get_style('%s%s' % (col_idx, row_number)) - style.font.bold = True + cell.value = unicode('%s' % col, errors='ignore') + cell.font = bold # wrap the rest else: try: if '\n' in col: - ws.cell('%s%s'%(col_idx, row_number)).value = unicode( - '%s' % col, errors='ignore') - style = ws.get_style('%s%s' % (col_idx, row_number)) - style.alignment.wrap_text + cell.value = unicode('%s' % col, errors='ignore') + cell.alignment = wrap_text else: - ws.cell('%s%s'%(col_idx, row_number)).value = unicode( - '%s' % col, errors='ignore') + cell.value = unicode('%s' % col, errors='ignore') except TypeError: - ws.cell('%s%s'%(col_idx, row_number)).value = unicode(col) - - + cell.value = unicode(col) diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py index f2359cf..3d17baf 100644 --- a/tablib/formats/_yaml.py +++ b/tablib/formats/_yaml.py @@ -3,17 +3,8 @@ """ Tablib - YAML Support. """ -import sys - -try: - import yaml -except ImportError: - if sys.version_info[0] > 2: - import tablib.packages.yaml3 as yaml - else: - import tablib.packages.yaml as yaml - import tablib +import yaml title = 'yaml' extensions = ('yaml', 'yml') @@ -42,7 +33,7 @@ def import_book(dbook, in_stream): dbook.wipe() - for sheet in yaml.load(in_stream): + for sheet in yaml.safe_load(in_stream): data = tablib.Dataset() data.title = sheet['title'] data.dict = sheet['data'] |
