src/tablib/formats/_latex.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

"""Tablib - LaTeX table export support.

   Generates a LaTeX booktabs-style table from the dataset.
"""
import re


class LATEXFormat:
    title = 'latex'
    extensions = ('tex',)

    TABLE_TEMPLATE = """\
%% Note: add \\usepackage{booktabs} to your preamble
%%
\\begin{table}[!htbp]
  \\centering
  %(CAPTION)s
  \\begin{tabular}{%(COLSPEC)s}
    \\toprule
%(HEADER)s
    %(MIDRULE)s
%(BODY)s
    \\bottomrule
  \\end{tabular}
\\end{table}
"""

    TEX_RESERVED_SYMBOLS_MAP = dict([
        ('\\', '\\textbackslash{}'),
        ('{', '\\{'),
        ('}', '\\}'),
        ('$', '\\$'),
        ('&', '\\&'),
        ('#', '\\#'),
        ('^', '\\textasciicircum{}'),
        ('_', '\\_'),
        ('~', '\\textasciitilde{}'),
        ('%', '\\%'),
    ])

    TEX_RESERVED_SYMBOLS_RE = re.compile(
        '(%s)' % '|'.join(map(re.escape, TEX_RESERVED_SYMBOLS_MAP.keys())))

    @classmethod
    def export_set(cls, dataset):
        """Returns LaTeX representation of dataset

        :param dataset: dataset to serialize
        :type dataset: tablib.core.Dataset
        """

        caption = '\\caption{%s}' % dataset.title if dataset.title else '%'
        colspec = cls._colspec(dataset.width)
        header = cls._serialize_row(dataset.headers) if dataset.headers else ''
        midrule = cls._midrule(dataset.width)
        body = '\n'.join([cls._serialize_row(row) for row in dataset])
        return cls.TABLE_TEMPLATE % dict(CAPTION=caption, COLSPEC=colspec,
                                     HEADER=header, MIDRULE=midrule, BODY=body)

    @classmethod
    def _colspec(cls, dataset_width):
        """Generates the column specification for the LaTeX `tabular` environment
        based on the dataset width.

        The first column is justified to the left, all further columns are aligned
        to the right.

        .. note:: This is only a heuristic and most probably has to be fine-tuned
        post export. Column alignment should depend on the data type, e.g., textual
        content should usually be aligned to the left while numeric content almost
        always should be aligned to the right.

        :param dataset_width: width of the dataset
        """

        spec = 'l'
        for _ in range(1, dataset_width):
            spec += 'r'
        return spec

    @classmethod
    def _midrule(cls, dataset_width):
        """Generates the table `midrule`, which may be composed of several
        `cmidrules`.

        :param dataset_width: width of the dataset to serialize
        """

        if not dataset_width or dataset_width == 1:
            return '\\midrule'
        return ' '.join([cls._cmidrule(colindex, dataset_width) for colindex in
                         range(1, dataset_width + 1)])

    @classmethod
    def _cmidrule(cls, colindex, dataset_width):
        """Generates the `cmidrule` for a single column with appropriate trimming
        based on the column position.

        :param colindex: Column index
        :param dataset_width: width of the dataset
        """

        rule = '\\cmidrule(%s){%d-%d}'
        if colindex == 1:
            # Rule of first column is trimmed on the right
            return rule % ('r', colindex, colindex)
        if colindex == dataset_width:
            # Rule of last column is trimmed on the left
            return rule % ('l', colindex, colindex)
        # Inner columns are trimmed on the left and right
        return rule % ('lr', colindex, colindex)

    @classmethod
    def _serialize_row(cls, row):
        """Returns string representation of a single row.

        :param row: single dataset row
        """

        new_row = [cls._escape_tex_reserved_symbols(str(item)) if item else ''
                   for item in row]
        return 6 * ' ' + ' & '.join(new_row) + ' \\\\'

    @classmethod
    def _escape_tex_reserved_symbols(cls, input):
        """Escapes all TeX reserved symbols ('_', '~', etc.) in a string.

        :param input: String to escape
        """
        def replace(match):
            return cls.TEX_RESERVED_SYMBOLS_MAP[match.group()]
        return cls.TEX_RESERVED_SYMBOLS_RE.sub(replace, input)