pelican/writers.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270

import logging
import os
from posixpath import join as posix_join
from urllib.parse import urljoin

from feedgenerator import Atom1Feed, Rss201rev2Feed, get_tag_uri

from markupsafe import Markup

from pelican.paginator import Paginator
from pelican.plugins import signals
from pelican.utils import (get_relative_path, is_selected_for_writing,
                           path_to_url, sanitised_join, set_date_tzinfo)

logger = logging.getLogger(__name__)


class Writer:

    def __init__(self, output_path, settings=None):
        self.output_path = output_path
        self.reminder = dict()
        self.settings = settings or {}
        self._written_files = set()
        self._overridden_files = set()

        # See Content._link_replacer for details
        if "RELATIVE_URLS" in self.settings and self.settings['RELATIVE_URLS']:
            self.urljoiner = posix_join
        else:
            self.urljoiner = lambda base, url: urljoin(
                base if base.endswith('/') else base + '/', url)

    def _create_new_feed(self, feed_type, feed_title, context):
        feed_class = Rss201rev2Feed if feed_type == 'rss' else Atom1Feed
        if feed_title:
            feed_title = context['SITENAME'] + ' - ' + feed_title
        else:
            feed_title = context['SITENAME']
        feed = feed_class(
            title=Markup(feed_title).striptags(),
            link=(self.site_url + '/'),
            feed_url=self.feed_url,
            description=context.get('SITESUBTITLE', ''),
            subtitle=context.get('SITESUBTITLE', None))
        return feed

    def _add_item_to_the_feed(self, feed, item):
        title = Markup(item.title).striptags()
        link = self.urljoiner(self.site_url, item.url)

        if isinstance(feed, Rss201rev2Feed):
            # RSS feeds use a single tag called 'description' for both the full
            # content and the summary
            content = None
            if self.settings.get('RSS_FEED_SUMMARY_ONLY'):
                description = item.summary
            else:
                description = item.get_content(self.site_url)

        else:
            # Atom feeds have two different tags for full content (called
            # 'content' by feedgenerator) and summary (called 'description' by
            # feedgenerator).
            #
            # It does not make sense to have the summary be the
            # exact same thing as the full content. If we detect that
            # they are we just remove the summary.
            content = item.get_content(self.site_url)
            description = item.summary
            if description == content:
                description = None

        categories = list()
        if hasattr(item, 'category'):
            categories.append(item.category)
        if hasattr(item, 'tags'):
            categories.extend(item.tags)

        feed.add_item(
            title=title,
            link=link,
            unique_id=get_tag_uri(link, item.date),
            description=description,
            content=content,
            categories=categories if categories else None,
            author_name=getattr(item, 'author', ''),
            pubdate=set_date_tzinfo(
                item.date, self.settings.get('TIMEZONE', None)),
            updateddate=set_date_tzinfo(
                item.modified, self.settings.get('TIMEZONE', None)
                ) if hasattr(item, 'modified') else None)

    def _open_w(self, filename, encoding, override=False):
        """Open a file to write some content to it.

        Exit if we have already written to that file, unless one (and no more
        than one) of the writes has the override parameter set to True.
        """
        if filename in self._overridden_files:
            if override:
                raise RuntimeError('File %s is set to be overridden twice'
                                   % filename)
            else:
                logger.info('Skipping %s', filename)
                filename = os.devnull
        elif filename in self._written_files:
            if override:
                logger.info('Overwriting %s', filename)
            else:
                raise RuntimeError('File %s is to be overwritten' % filename)
        if override:
            self._overridden_files.add(filename)
        self._written_files.add(filename)
        return open(filename, 'w', encoding=encoding)

    def write_feed(self, elements, context, path=None, url=None,
                   feed_type='atom', override_output=False, feed_title=None):
        """Generate a feed with the list of articles provided

        Return the feed. If no path or output_path is specified, just
        return the feed object.

        :param elements: the articles to put on the feed.
        :param context: the context to get the feed metadata.
        :param path: the path to output.
        :param url: the publicly visible feed URL; if None, path is used
            instead
        :param feed_type: the feed type to use (atom or rss)
        :param override_output: boolean telling if we can override previous
            output with the same name (and if next files written with the same
            name should be skipped to keep that one)
        :param feed_title: the title of the feed.o
        """
        if not is_selected_for_writing(self.settings, path):
            return

        self.site_url = context.get(
            'SITEURL', path_to_url(get_relative_path(path)))

        self.feed_domain = context.get('FEED_DOMAIN')
        self.feed_url = self.urljoiner(self.feed_domain, url if url else path)

        feed = self._create_new_feed(feed_type, feed_title, context)

        max_items = len(elements)
        if self.settings['FEED_MAX_ITEMS']:
            max_items = min(self.settings['FEED_MAX_ITEMS'], max_items)
        for i in range(max_items):
            self._add_item_to_the_feed(feed, elements[i])

        signals.feed_generated.send(context, feed=feed)
        if path:
            complete_path = sanitised_join(self.output_path, path)

            try:
                os.makedirs(os.path.dirname(complete_path))
            except Exception:
                pass

            with self._open_w(complete_path, 'utf-8', override_output) as fp:
                feed.write(fp, 'utf-8')
                logger.info('Writing %s', complete_path)

            signals.feed_written.send(
                complete_path, context=context, feed=feed)
        return feed

    def write_file(self, name, template, context, relative_urls=False,
                   paginated=None, template_name=None, override_output=False,
                   url=None, **kwargs):
        """Render the template and write the file.

        :param name: name of the file to output
        :param template: template to use to generate the content
        :param context: dict to pass to the templates.
        :param relative_urls: use relative urls or absolutes ones
        :param paginated: dict of article list to paginate - must have the
            same length (same list in different orders)
        :param template_name: the template name, for pagination
        :param override_output: boolean telling if we can override previous
            output with the same name (and if next files written with the same
            name should be skipped to keep that one)
        :param url: url of the file (needed by the paginator)
        :param **kwargs: additional variables to pass to the templates
        """

        if name is False or \
           name == "" or \
           not is_selected_for_writing(self.settings,
                                       os.path.join(self.output_path, name)):
            return
        elif not name:
            # other stuff, just return for now
            return

        def _write_file(template, localcontext, output_path, name, override):
            """Render the template write the file."""
            # set localsiteurl for context so that Contents can adjust links
            if localcontext['localsiteurl']:
                context['localsiteurl'] = localcontext['localsiteurl']
            output = template.render(localcontext)
            path = sanitised_join(output_path, name)

            try:
                os.makedirs(os.path.dirname(path))
            except Exception:
                pass

            with self._open_w(path, 'utf-8', override=override) as f:
                f.write(output)
            logger.info('Writing %s', path)

            # Send a signal to say we're writing a file with some specific
            # local context.
            signals.content_written.send(path, context=localcontext)

        def _get_localcontext(context, name, kwargs, relative_urls):
            localcontext = context.copy()
            localcontext['localsiteurl'] = localcontext.get(
                'localsiteurl', None)
            if relative_urls:
                relative_url = path_to_url(get_relative_path(name))
                localcontext['SITEURL'] = relative_url
                localcontext['localsiteurl'] = relative_url
            localcontext['output_file'] = name
            localcontext.update(kwargs)
            return localcontext

        if paginated is None:
            paginated = {key: val for key, val in kwargs.items()
                         if key in {'articles', 'dates'}}

        # pagination
        if paginated and template_name in self.settings['PAGINATED_TEMPLATES']:
            # pagination needed
            per_page = self.settings['PAGINATED_TEMPLATES'][template_name] \
                or self.settings['DEFAULT_PAGINATION']

            # init paginators
            paginators = {key: Paginator(name, url, val, self.settings,
                                         per_page)
                          for key, val in paginated.items()}

            # generated pages, and write
            for page_num in range(list(paginators.values())[0].num_pages):
                paginated_kwargs = kwargs.copy()
                for key in paginators.keys():
                    paginator = paginators[key]
                    previous_page = paginator.page(page_num) \
                        if page_num > 0 else None
                    page = paginator.page(page_num + 1)
                    next_page = paginator.page(page_num + 2) \
                        if page_num + 1 < paginator.num_pages else None
                    paginated_kwargs.update(
                        {'%s_paginator' % key: paginator,
                         '%s_page' % key: page,
                         '%s_previous_page' % key: previous_page,
                         '%s_next_page' % key: next_page})

                localcontext = _get_localcontext(
                    context, page.save_as, paginated_kwargs, relative_urls)
                _write_file(template, localcontext, self.output_path,
                            page.save_as, override_output)
        else:
            # no pagination
            localcontext = _get_localcontext(
                context, name, kwargs, relative_urls)
            _write_file(template, localcontext, self.output_path, name,
                        override_output)