summaryrefslogtreecommitdiff
path: root/tablib/formats
diff options
context:
space:
mode:
authorJean Jordaan <jean.jordaan@gmail.com>2019-03-03 13:29:21 +0700
committerGitHub <noreply@github.com>2019-03-03 13:29:21 +0700
commitaddaa090efb4d4beacb2bd2cc6dbdb7e158fa2a9 (patch)
treede3b532ed49a0d1e8a2e333b549b129d534bff91 /tablib/formats
parentcd67a63b434c33430d8fd32d6d15027b42811d9d (diff)
parent79dc77de49b8375d616a6cabb4503feb990bb51d (diff)
downloadtablib-addaa090efb4d4beacb2bd2cc6dbdb7e158fa2a9.tar.gz
Merge branch 'master' into master
Diffstat (limited to 'tablib/formats')
-rw-r--r--tablib/formats/__init__.py5
-rw-r--r--tablib/formats/_csv.py11
-rw-r--r--tablib/formats/_dbf.py4
-rw-r--r--tablib/formats/_df.py49
-rw-r--r--tablib/formats/_jira.py39
-rw-r--r--tablib/formats/_json.py19
-rw-r--r--tablib/formats/_ods.py13
-rw-r--r--tablib/formats/_rst.py273
-rw-r--r--tablib/formats/_tsv.py4
-rw-r--r--tablib/formats/_xls.py7
-rw-r--r--tablib/formats/_xlsx.py43
-rw-r--r--tablib/formats/_yaml.py13
12 files changed, 412 insertions, 68 deletions
diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py
index 5cca19f..418e607 100644
--- a/tablib/formats/__init__.py
+++ b/tablib/formats/__init__.py
@@ -13,5 +13,8 @@ from . import _xlsx as xlsx
from . import _ods as ods
from . import _dbf as dbf
from . import _latex as latex
+from . import _df as df
+from . import _rst as rst
+from . import _jira as jira
-available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods)
+available = (json, xls, yaml, csv, dbf, tsv, html, jira, latex, xlsx, ods, df, rst)
diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py
index 994b23b..8b536a7 100644
--- a/tablib/formats/_csv.py
+++ b/tablib/formats/_csv.py
@@ -3,15 +3,14 @@
""" Tablib - *SV Support.
"""
-from tablib.compat import is_py3, csv, StringIO
+from tablib.compat import csv, StringIO, unicode
title = 'csv'
extensions = ('csv',)
-DEFAULT_ENCODING = 'utf-8'
-DEFAULT_DELIMITER = ','
+DEFAULT_DELIMITER = unicode(',')
def export_set(dataset, **kwargs):
@@ -19,8 +18,6 @@ def export_set(dataset, **kwargs):
stream = StringIO()
kwargs.setdefault('delimiter', DEFAULT_DELIMITER)
- if not is_py3:
- kwargs.setdefault('encoding', DEFAULT_ENCODING)
_csv = csv.writer(stream, **kwargs)
@@ -36,15 +33,13 @@ def import_set(dset, in_stream, headers=True, **kwargs):
dset.wipe()
kwargs.setdefault('delimiter', DEFAULT_DELIMITER)
- if not is_py3:
- kwargs.setdefault('encoding', DEFAULT_ENCODING)
rows = csv.reader(StringIO(in_stream), **kwargs)
for i, row in enumerate(rows):
if (i == 0) and (headers):
dset.headers = row
- else:
+ elif row:
dset.append(row)
diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py
index 41c2ef4..710797d 100644
--- a/tablib/formats/_dbf.py
+++ b/tablib/formats/_dbf.py
@@ -55,6 +55,7 @@ def export_set(dataset):
else:
stream = StringIO(dbf_stream.read())
dbf_stream.close()
+ os.close(temp_file)
os.remove(temp_uri)
return stream.getvalue()
@@ -88,6 +89,3 @@ def detect(stream):
# When unpacking a string argument with less than 8 chars, struct.error is
# raised.
return False
-
-
-
diff --git a/tablib/formats/_df.py b/tablib/formats/_df.py
new file mode 100644
index 0000000..44b967f
--- /dev/null
+++ b/tablib/formats/_df.py
@@ -0,0 +1,49 @@
+""" Tablib - DataFrame Support.
+"""
+
+
+import sys
+
+
+if sys.version_info[0] > 2:
+ from io import BytesIO
+else:
+ from cStringIO import StringIO as BytesIO
+
+try:
+ from pandas import DataFrame
+except ImportError:
+ DataFrame = None
+
+import tablib
+
+from tablib.compat import unicode
+
+title = 'df'
+extensions = ('df', )
+
+def detect(stream):
+ """Returns True if given stream is a DataFrame."""
+ if DataFrame is None:
+ return False
+ try:
+ DataFrame(stream)
+ return True
+ except ValueError:
+ return False
+
+
+def export_set(dset, index=None):
+ """Returns DataFrame representation of DataBook."""
+ if DataFrame is None:
+ raise NotImplementedError(
+ 'DataFrame Format requires `pandas` to be installed.'
+ ' Try `pip install tablib[pandas]`.')
+ dataframe = DataFrame(dset.dict, columns=dset.headers)
+ return dataframe
+
+
+def import_set(dset, in_stream):
+ """Returns dataset from DataFrame."""
+ dset.wipe()
+ dset.dict = in_stream.to_dict(orient='records')
diff --git a/tablib/formats/_jira.py b/tablib/formats/_jira.py
new file mode 100644
index 0000000..55fce52
--- /dev/null
+++ b/tablib/formats/_jira.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+"""Tablib - Jira table export support.
+
+ Generates a Jira table from the dataset.
+"""
+from tablib.compat import unicode
+
+title = 'jira'
+
+
+def export_set(dataset):
+ """Formats the dataset according to the Jira table syntax:
+
+ ||heading 1||heading 2||heading 3||
+ |col A1|col A2|col A3|
+ |col B1|col B2|col B3|
+
+ :param dataset: dataset to serialize
+ :type dataset: tablib.core.Dataset
+ """
+
+ header = _get_header(dataset.headers) if dataset.headers else ''
+ body = _get_body(dataset)
+ return '%s\n%s' % (header, body) if header else body
+
+
+def _get_body(dataset):
+ return '\n'.join([_serialize_row(row) for row in dataset])
+
+
+def _get_header(headers):
+ return _serialize_row(headers, delimiter='||')
+
+
+def _serialize_row(row, delimiter='|'):
+ return '%s%s%s' % (delimiter,
+ delimiter.join([unicode(item) if item else ' ' for item in row]),
+ delimiter)
diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py
index 777040a..bbd2c96 100644
--- a/tablib/formats/_json.py
+++ b/tablib/formats/_json.py
@@ -2,29 +2,34 @@
""" Tablib - JSON Support
"""
+import decimal
+import json
+from uuid import UUID
import tablib
-import sys
-from tablib.packages import omnijson as json
-
title = 'json'
extensions = ('json', 'jsn')
-def date_handler(obj):
- return obj.isoformat() if hasattr(obj, 'isoformat') else obj
+def serialize_objects_handler(obj):
+ if isinstance(obj, decimal.Decimal) or isinstance(obj, UUID):
+ return str(obj)
+ elif hasattr(obj, 'isoformat'):
+ return obj.isoformat()
+ else:
+ return obj
def export_set(dataset):
"""Returns JSON representation of Dataset."""
- return json.dumps(dataset.dict, default=date_handler)
+ return json.dumps(dataset.dict, default=serialize_objects_handler)
def export_book(databook):
"""Returns JSON representation of Databook."""
- return json.dumps(databook._package(), default=date_handler)
+ return json.dumps(databook._package(), default=serialize_objects_handler)
def import_set(dset, in_stream):
diff --git a/tablib/formats/_ods.py b/tablib/formats/_ods.py
index f43a26c..c7af891 100644
--- a/tablib/formats/_ods.py
+++ b/tablib/formats/_ods.py
@@ -3,15 +3,8 @@
""" Tablib - ODF Support.
"""
-import sys
-
-
-if sys.version_info[0] > 2:
- from io import BytesIO
-else:
- from cStringIO import StringIO as BytesIO
-
-from tablib.compat import opendocument, style, table, text, unicode
+from odf import opendocument, style, table, text
+from tablib.compat import BytesIO, unicode
title = 'ods'
extensions = ('ods',)
@@ -97,4 +90,4 @@ def dset_sheet(dataset, ws):
ws.addElement(odf_row)
cell = table.TableCell()
cell.addElement(text.P(text=col))
- odf_row.addElement(cell) \ No newline at end of file
+ odf_row.addElement(cell)
diff --git a/tablib/formats/_rst.py b/tablib/formats/_rst.py
new file mode 100644
index 0000000..4b53ad7
--- /dev/null
+++ b/tablib/formats/_rst.py
@@ -0,0 +1,273 @@
+# -*- coding: utf-8 -*-
+
+""" Tablib - reStructuredText Support
+"""
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from textwrap import TextWrapper
+
+from tablib.compat import (
+ median,
+ unicode,
+ izip_longest,
+)
+
+
+title = 'rst'
+extensions = ('rst',)
+
+
+MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words.
+
+
+JUSTIFY_LEFT = 'left'
+JUSTIFY_CENTER = 'center'
+JUSTIFY_RIGHT = 'right'
+JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT)
+
+
+def to_unicode(value):
+ if isinstance(value, bytes):
+ return value.decode('utf-8')
+ return unicode(value)
+
+
+def _max_word_len(text):
+ """
+ Return the length of the longest word in `text`.
+
+
+ >>> _max_word_len('Python Module for Tabular Datasets')
+ 8
+
+ """
+ return max((len(word) for word in text.split()))
+
+
+def _get_column_string_lengths(dataset):
+ """
+ Returns a list of string lengths of each column, and a list of
+ maximum word lengths.
+ """
+ if dataset.headers:
+ column_lengths = [[len(h)] for h in dataset.headers]
+ word_lens = [_max_word_len(h) for h in dataset.headers]
+ else:
+ column_lengths = [[] for _ in range(dataset.width)]
+ word_lens = [0 for _ in range(dataset.width)]
+ for row in dataset.dict:
+ values = iter(row.values() if hasattr(row, 'values') else row)
+ for i, val in enumerate(values):
+ text = to_unicode(val)
+ column_lengths[i].append(len(text))
+ word_lens[i] = max(word_lens[i], _max_word_len(text))
+ return column_lengths, word_lens
+
+
+def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT):
+ """
+ Returns a table row of wrapped values as a list of lines
+ """
+ if justify not in JUSTIFY_VALUES:
+ raise ValueError('Value of "justify" must be one of "{}"'.format(
+ '", "'.join(JUSTIFY_VALUES)
+ ))
+ if justify == JUSTIFY_LEFT:
+ just = lambda text, width: text.ljust(width)
+ elif justify == JUSTIFY_CENTER:
+ just = lambda text, width: text.center(width)
+ else:
+ just = lambda text, width: text.rjust(width)
+ lpad = sep + ' ' if sep else ''
+ rpad = ' ' + sep if sep else ''
+ pad = ' ' + sep + ' '
+ cells = []
+ for value, width in zip(values, widths):
+ wrapper.width = width
+ text = to_unicode(value)
+ cell = wrapper.wrap(text)
+ cells.append(cell)
+ lines = izip_longest(*cells, fillvalue='')
+ lines = (
+ (just(cell_line, widths[i]) for i, cell_line in enumerate(line))
+ for line in lines
+ )
+ lines = [''.join((lpad, pad.join(line), rpad)) for line in lines]
+ return lines
+
+
+def _get_column_widths(dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3):
+ """
+ Returns a list of column widths proportional to the median length
+ of the text in their cells.
+ """
+ str_lens, word_lens = _get_column_string_lengths(dataset)
+ median_lens = [int(median(lens)) for lens in str_lens]
+ total = sum(median_lens)
+ if total > max_table_width - (pad_len * len(median_lens)):
+ column_widths = (max_table_width * l // total for l in median_lens)
+ else:
+ column_widths = (l for l in median_lens)
+ # Allow for separator and padding:
+ column_widths = (w - pad_len if w > pad_len else w for w in column_widths)
+ # Rather widen table than break words:
+ column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)]
+ return column_widths
+
+
+def export_set_as_simple_table(dataset, column_widths=None):
+ """
+ Returns reStructuredText grid table representation of dataset.
+ """
+ lines = []
+ wrapper = TextWrapper()
+ if column_widths is None:
+ column_widths = _get_column_widths(dataset, pad_len=2)
+ border = ' '.join(['=' * w for w in column_widths])
+
+ lines.append(border)
+ if dataset.headers:
+ lines.extend(_row_to_lines(
+ dataset.headers,
+ column_widths,
+ wrapper,
+ sep='',
+ justify=JUSTIFY_CENTER,
+ ))
+ lines.append(border)
+ for row in dataset.dict:
+ values = iter(row.values() if hasattr(row, 'values') else row)
+ lines.extend(_row_to_lines(values, column_widths, wrapper, ''))
+ lines.append(border)
+ return '\n'.join(lines)
+
+
+def export_set_as_grid_table(dataset, column_widths=None):
+ """
+ Returns reStructuredText grid table representation of dataset.
+
+
+ >>> from tablib import Dataset
+ >>> from tablib.formats import rst
+ >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
+ >>> data = Dataset()
+ >>> data.headers = ['A', 'B', 'A and B']
+ >>> for a, b in bits:
+ ... data.append([bool(a), bool(b), bool(a * b)])
+ >>> print(rst.export_set(data, force_grid=True))
+ +-------+-------+-------+
+ | A | B | A and |
+ | | | B |
+ +=======+=======+=======+
+ | False | False | False |
+ +-------+-------+-------+
+ | True | False | False |
+ +-------+-------+-------+
+ | False | True | False |
+ +-------+-------+-------+
+ | True | True | True |
+ +-------+-------+-------+
+
+ """
+ lines = []
+ wrapper = TextWrapper()
+ if column_widths is None:
+ column_widths = _get_column_widths(dataset)
+ header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+'
+ row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+'
+
+ lines.append(row_sep)
+ if dataset.headers:
+ lines.extend(_row_to_lines(
+ dataset.headers,
+ column_widths,
+ wrapper,
+ justify=JUSTIFY_CENTER,
+ ))
+ lines.append(header_sep)
+ for row in dataset.dict:
+ values = iter(row.values() if hasattr(row, 'values') else row)
+ lines.extend(_row_to_lines(values, column_widths, wrapper))
+ lines.append(row_sep)
+ return '\n'.join(lines)
+
+
+def _use_simple_table(head0, col0, width0):
+ """
+ Use a simple table if the text in the first column is never wrapped
+
+
+ >>> _use_simple_table('menu', ['egg', 'bacon'], 10)
+ True
+ >>> _use_simple_table(None, ['lobster thermidor', 'spam'], 10)
+ False
+
+ """
+ if head0 is not None:
+ head0 = to_unicode(head0)
+ if len(head0) > width0:
+ return False
+ for cell in col0:
+ cell = to_unicode(cell)
+ if len(cell) > width0:
+ return False
+ return True
+
+
+def export_set(dataset, **kwargs):
+ """
+ Returns reStructuredText table representation of dataset.
+
+ Returns a simple table if the text in the first column is never
+ wrapped, otherwise returns a grid table.
+
+
+ >>> from tablib import Dataset
+ >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
+ >>> data = Dataset()
+ >>> data.headers = ['A', 'B', 'A and B']
+ >>> for a, b in bits:
+ ... data.append([bool(a), bool(b), bool(a * b)])
+ >>> table = data.rst
+ >>> table.split('\\n') == [
+ ... '===== ===== =====',
+ ... ' A B A and',
+ ... ' B ',
+ ... '===== ===== =====',
+ ... 'False False False',
+ ... 'True False False',
+ ... 'False True False',
+ ... 'True True True ',
+ ... '===== ===== =====',
+ ... ]
+ True
+
+ """
+ if not dataset.dict:
+ return ''
+ force_grid = kwargs.get('force_grid', False)
+ max_table_width = kwargs.get('max_table_width', MAX_TABLE_WIDTH)
+ column_widths = _get_column_widths(dataset, max_table_width)
+
+ use_simple_table = _use_simple_table(
+ dataset.headers[0] if dataset.headers else None,
+ dataset.get_col(0),
+ column_widths[0],
+ )
+ if use_simple_table and not force_grid:
+ return export_set_as_simple_table(dataset, column_widths)
+ else:
+ return export_set_as_grid_table(dataset, column_widths)
+
+
+def export_book(databook):
+ """
+ reStructuredText representation of a Databook.
+
+ Tables are separated by a blank line. All tables use the grid
+ format.
+ """
+ return '\n\n'.join(export_set(dataset, force_grid=True)
+ for dataset in databook._datasets)
diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py
index 9380b3b..1c6d6a1 100644
--- a/tablib/formats/_tsv.py
+++ b/tablib/formats/_tsv.py
@@ -3,6 +3,7 @@
""" Tablib - TSV (Tab Separated Values) Support.
"""
+from tablib.compat import unicode
from tablib.formats._csv import (
export_set as export_set_wrapper,
import_set as import_set_wrapper,
@@ -12,8 +13,7 @@ from tablib.formats._csv import (
title = 'tsv'
extensions = ('tsv',)
-DEFAULT_ENCODING = 'utf-8'
-DELIMITER = '\t'
+DELIMITER = unicode('\t')
def export_set(dataset):
"""Returns TSV representation of Dataset."""
diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py
index 787907a..82f8b86 100644
--- a/tablib/formats/_xls.py
+++ b/tablib/formats/_xls.py
@@ -5,8 +5,11 @@
import sys
-from tablib.compat import BytesIO, xlwt, xlrd, XLRDError, xrange
+from tablib.compat import BytesIO, xrange
import tablib
+import xlrd
+import xlwt
+from xlrd.biffh import XLRDError
title = 'xls'
extensions = ('xls',)
@@ -22,7 +25,7 @@ def detect(stream):
xlrd.open_workbook(file_contents=stream)
return True
except (TypeError, XLRDError):
- pass
+ pass
try:
xlrd.open_workbook(file_contents=stream.read())
return True
diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py
index 411e0fc..ebafc4f 100644
--- a/tablib/formats/_xlsx.py
+++ b/tablib/formats/_xlsx.py
@@ -11,12 +11,12 @@ if sys.version_info[0] > 2:
else:
from cStringIO import StringIO as BytesIO
-from tablib.compat import openpyxl
+import openpyxl
import tablib
Workbook = openpyxl.workbook.Workbook
ExcelWriter = openpyxl.writer.excel.ExcelWriter
-get_column_letter = openpyxl.cell.get_column_letter
+get_column_letter = openpyxl.utils.get_column_letter
from tablib.compat import unicode
@@ -51,7 +51,8 @@ def export_book(databook, freeze_panes=True):
"""Returns XLSX representation of DataBook."""
wb = Workbook()
- wb.worksheets = []
+ for sheet in wb.worksheets:
+ wb.remove(sheet)
for i, dset in enumerate(databook._datasets):
ws = wb.create_sheet()
ws.title = dset.title if dset.title else 'Sheet%s' % (i)
@@ -70,7 +71,7 @@ def import_set(dset, in_stream, headers=True):
dset.wipe()
xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream))
- sheet = xls_book.get_active_sheet()
+ sheet = xls_book.active
dset.title = sheet.title
@@ -111,42 +112,36 @@ def dset_sheet(dataset, ws, freeze_panes=True):
_offset = i
_package.insert((sep[0] + _offset), (sep[1],))
+ bold = openpyxl.styles.Font(bold=True)
+ wrap_text = openpyxl.styles.Alignment(wrap_text=True)
+
for i, row in enumerate(_package):
row_number = i + 1
for j, col in enumerate(row):
col_idx = get_column_letter(j + 1)
+ cell = ws['%s%s' % (col_idx, row_number)]
# bold headers
if (row_number == 1) and dataset.headers:
- # ws.cell('%s%s'%(col_idx, row_number)).value = unicode(
- # '%s' % col, errors='ignore')
- ws.cell('%s%s'%(col_idx, row_number)).value = unicode(col)
- style = ws.get_style('%s%s' % (col_idx, row_number))
- style.font.bold = True
+ # cell.value = unicode('%s' % col, errors='ignore')
+ cell.value = unicode(col)
+ cell.font = bold
if freeze_panes:
- # As already done in #53, but after Merge lost:
# Export Freeze only after first Line
ws.freeze_panes = 'A2'
-
+
# bold separators
elif len(row) < dataset.width:
- ws.cell('%s%s'%(col_idx, row_number)).value = unicode(
- '%s' % col, errors='ignore')
- style = ws.get_style('%s%s' % (col_idx, row_number))
- style.font.bold = True
+ cell.value = unicode('%s' % col, errors='ignore')
+ cell.font = bold
# wrap the rest
else:
try:
if '\n' in col:
- ws.cell('%s%s'%(col_idx, row_number)).value = unicode(
- '%s' % col, errors='ignore')
- style = ws.get_style('%s%s' % (col_idx, row_number))
- style.alignment.wrap_text
+ cell.value = unicode('%s' % col, errors='ignore')
+ cell.alignment = wrap_text
else:
- ws.cell('%s%s'%(col_idx, row_number)).value = unicode(
- '%s' % col, errors='ignore')
+ cell.value = unicode('%s' % col, errors='ignore')
except TypeError:
- ws.cell('%s%s'%(col_idx, row_number)).value = unicode(col)
-
-
+ cell.value = unicode(col)
diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py
index f2359cf..3d17baf 100644
--- a/tablib/formats/_yaml.py
+++ b/tablib/formats/_yaml.py
@@ -3,17 +3,8 @@
""" Tablib - YAML Support.
"""
-import sys
-
-try:
- import yaml
-except ImportError:
- if sys.version_info[0] > 2:
- import tablib.packages.yaml3 as yaml
- else:
- import tablib.packages.yaml as yaml
-
import tablib
+import yaml
title = 'yaml'
extensions = ('yaml', 'yml')
@@ -42,7 +33,7 @@ def import_book(dbook, in_stream):
dbook.wipe()
- for sheet in yaml.load(in_stream):
+ for sheet in yaml.safe_load(in_stream):
data = tablib.Dataset()
data.title = sheet['title']
data.dict = sheet['data']