1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
|
# -*- python -*-
#
# gtk-doc - GTK DocBook documentation generator.
# Copyright (C) 1998 Damon Chaplin
# 2007-2016 Stefan Sauer
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
''"Fix cross-references in the HTML documentation.''"
import logging
import os
import re
from . import common, highlight
# This contains all the entities and their relative URLs.
Links = {}
# failing link targets we don't warn about even once
NoLinks = {
'char',
'double',
'float',
'int',
'long',
'main',
'signed',
'unsigned',
'va-list',
'void',
}
def Run(options):
logging.info('options: %s', str(options.__dict__))
LoadIndicies(options.module_dir, options.html_dir, options.extra_dir)
ReadSections(options.module)
FixCrossReferences(options.module_dir, options.module, options.src_lang)
highlight.append_style_defs(os.path.join(options.module_dir, 'style.css'))
# TODO(ensonic): try to refactor so that we get a list of path's and then just
# loop over them.
# - module_dir is by default 'html'
# - html_dir can be set by configure, defaults to $(docdir)
def LoadIndicies(module_dir, html_dir, extra_dirs):
# Cache of dirs we already scanned for index files
dir_cache = {}
path_prefix = ''
m = re.search(r'(.*?)/share/gtk-doc/html', html_dir)
if m:
path_prefix = m.group(1)
logging.info('Path prefix: %s', path_prefix)
prefix_match = r'^' + re.escape(path_prefix) + r'/'
# We scan the directory containing GLib and any directories in GNOME2_PATH
# first, but these will be overridden by any later scans.
dir = common.GetModuleDocDir('glib-2.0')
if dir and os.path.exists(dir):
# Some predefined link targets to get links into type hierarchies as these
# have no targets. These are always absolute for now.
Links['GBoxed'] = dir + '/gobject/gobject-Boxed-Types.html'
Links['GEnum'] = dir + '/gobject/gobject-Enumeration-and-Flag-Types.html'
Links['GFlags'] = dir + '/gobject/gobject-Enumeration-and-Flag-Types.html'
Links['GInterface'] = dir + '/gobject/GTypeModule.html'
if dir != html_dir:
logging.info('Scanning GLib directory: %s', dir)
ScanIndices(dir, (re.search(prefix_match, dir) is None), dir_cache)
else:
NoLinks.add('GBoxed')
NoLinks.add('GEnum')
NoLinks.add('GFlags')
NoLinks.add('GInterface')
path = os.environ.get('GNOME2_PATH')
if path:
for dir in path.split(':'):
dir += 'share/gtk-doc/html'
if os.path.exists(dir) and dir != html_dir:
logging.info('Scanning GNOME2_PATH directory: %s', dir)
ScanIndices(dir, (re.search(prefix_match, dir) is None), dir_cache)
logging.info('Scanning HTML_DIR directory: %s', html_dir)
ScanIndices(html_dir, False, dir_cache)
logging.info('Scanning MODULE_DIR directory: %s', module_dir)
ScanIndices(module_dir, False, dir_cache)
# check all extra dirs, but skip already scanned dirs or subdirs of those
for dir in extra_dirs:
dir = dir.rstrip('/')
logging.info('Scanning EXTRA_DIR directory: %s', dir)
# If the --extra-dir option is not relative and is not sharing the same
# prefix as the target directory of the docs, we need to use absolute
# directories for the links
if not dir.startswith('..') and re.search(prefix_match, dir) is None:
ScanIndices(dir, True, dir_cache)
else:
ScanIndices(dir, False, dir_cache)
def ScanIndices(scan_dir, use_absolute_links, dir_cache):
if not scan_dir or scan_dir in dir_cache:
return
dir_cache[scan_dir] = 1
logging.info('Scanning index directory: %s, absolute: %d', scan_dir, use_absolute_links)
# TODO(ensonic): this code is the same as in rebase.py
if not os.path.isdir(scan_dir):
logging.info('Cannot open dir "%s"', scan_dir)
return
subdirs = []
for entry in sorted(os.listdir(scan_dir)):
full_entry = os.path.join(scan_dir, entry)
if os.path.isdir(full_entry):
subdirs.append(full_entry)
continue
if entry.endswith('.devhelp2'):
# if devhelp-file is good don't read index.sgml
ReadDevhelp(full_entry, use_absolute_links)
elif entry == "index.sgml.gz" and not os.path.exists(os.path.join(scan_dir, 'index.sgml')):
# debian/ubuntu started to compress this as index.sgml.gz :/
print(''' Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/77138 . For now run:
gunzip %s
''' % full_entry)
elif entry.endswith('.devhelp2.gz') and not os.path.exists(full_entry[:-3]):
# debian/ubuntu started to compress this as *devhelp2.gz :/
print('''Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/1466210 . For now run:
gunzip %s
''' % full_entry)
# we could consider supporting: gzip module
# Now recursively scan the subdirectories.
for subdir in subdirs:
ScanIndices(subdir, use_absolute_links, dir_cache)
def ReadDevhelp(file, use_absolute_links):
# Determine the absolute directory, to be added to links in $file
# if we need to use an absolute link.
# $file will be something like /prefix/gnome/share/gtk-doc/html/gtk/$file
# We want the part up to 'html/.*' since the links in $file include
# the rest.
dir = "../"
if use_absolute_links:
# For uninstalled index files we'd need to map the path to where it
# will be installed to
if not file.startswith('./'):
m = re.search(r'(.*\/)(.*?)\/.*?\.devhelp2', file)
dir = m.group(1) + m.group(2) + '/'
else:
m = re.search(r'(.*\/)(.*?)\/.*?\.devhelp2', file)
if m:
dir += m.group(2) + '/'
else:
dir = ''
logging.info('Scanning index file=%s, absolute=%d, dir=%s', file, use_absolute_links, dir)
for line in open(file, 'r', encoding='utf-8'):
m = re.search(r' link="([^#]*)#([^"]*)"', line)
if m:
link = m.group(1) + '#' + m.group(2)
logging.debug('Found id: %s href: %s', m.group(2), link)
Links[m.group(2)] = dir + link
def ReadSections(module):
"""We don't warn on missing links to non-public sysmbols."""
for line in open(module + '-sections.txt', 'r', encoding='utf-8'):
m1 = re.search(r'^<SUBSECTION\s*(.*)>', line)
if line.startswith('#') or line.strip() == '':
continue
elif line.startswith('<SECTION>'):
subsection = ''
elif m1:
subsection = m1.group(1)
elif line.startswith('<SUBSECTION>') or line.startswith('</SECTION>'):
continue
elif re.search(r'^<TITLE>(.*)<\/TITLE>', line):
continue
elif re.search(r'^<FILE>(.*)<\/FILE>', line):
continue
elif re.search(r'^<INCLUDE>(.*)<\/INCLUDE>', line):
continue
else:
symbol = line.strip()
if subsection == "Standard" or subsection == "Private":
NoLinks.add(common.CreateValidSGMLID(symbol))
def FixCrossReferences(module_dir, module, src_lang):
# TODO(ensonic): use glob.glob()?
for entry in sorted(os.listdir(module_dir)):
full_entry = os.path.join(module_dir, entry)
if os.path.isdir(full_entry):
continue
elif entry.endswith('.html') or entry.endswith('.htm'):
FixHTMLFile(src_lang, module, full_entry)
def FixHTMLFile(src_lang, module, file):
logging.info('Fixing file: %s', file)
content = open(file, 'r', encoding='utf-8').read()
# FIXME: ideally we'd pass a clue about the example language to the highligher
# unfortunately the "language" attribute is not appearing in the html output
# we could patch the customization to have <code class="xxx"> inside of <pre>
def repl_func(m):
return HighlightSourcePygments(src_lang, m.group(1), m.group(2))
content = re.sub(
r'<div class=\"(example-contents|informalexample)\"><pre class=\"programlisting\">(.*?)</pre></div>',
repl_func, content, flags=re.DOTALL)
content = re.sub(r'\<GTKDOCLINK\s+HREF=\"(.*?)\"\>(.*?)\</GTKDOCLINK\>',
r'\<GTKDOCLINK\ HREF=\"\1\"\>\2\</GTKDOCLINK\>', content, flags=re.DOTALL)
# From the highlighter we get all the functions marked up. Now we can turn them into GTKDOCLINK items
def repl_func(m):
return MakeGtkDocLink(m.group(1), m.group(2), m.group(3))
content = re.sub(r'(<span class=\"function\">)(.*?)(</span>)', repl_func, content, flags=re.DOTALL)
# We can also try the first item in stuff marked up as 'normal'
content = re.sub(
r'(<span class=\"normal\">\s*)(.+?)((\s+.+?)?\s*</span>)', repl_func, content, flags=re.DOTALL)
lines = content.rstrip().split('\n')
def repl_func_with_ix(i):
def repl_func(m):
return MakeXRef(module, file, i + 1, m.group(1), m.group(2))
return repl_func
for i in range(len(lines)):
lines[i] = re.sub(r'<GTKDOCLINK\s+HREF="([^"]*)"\s*>(.*?)</GTKDOCLINK\s*>', repl_func_with_ix(i), lines[i])
if 'GTKDOCLINK' in lines[i]:
logging.info('make xref failed for line %d: "%s"', i, lines[i])
new_file = file + '.new'
content = '\n'.join(lines)
with open(new_file, 'w', encoding='utf-8') as h:
h.write(content)
os.unlink(file)
os.rename(new_file, file)
def GetXRef(id):
href = Links.get(id)
if href:
return (id, href)
# This is a workaround for some inconsistency we have with CreateValidSGMLID
if ':' in id:
tid = id.replace(':', '--')
href = Links.get(tid)
if href:
return (tid, href)
# poor mans plural support
if id.endswith('es'):
tid = id[:-2]
href = Links.get(tid)
if href:
return (tid, href)
tid += '-struct'
href = Links.get(tid)
if href:
return (tid, href)
elif id.endswith('s'):
tid = id[:-1]
href = Links.get(tid)
if href:
return (tid, href)
tid += '-struct'
href = Links.get(tid)
if href:
return (tid, href)
tid = id + '-struct'
href = Links.get(tid)
if href:
return (tid, href)
return (id, None)
def ReportBadXRef(file, line, id, text):
logging.info('no link for: id=%s, linktext=%s', id, text)
# don't warn multiple times and also skip blacklisted (ctypes)
if id in NoLinks:
return
# if it's a function, don't warn if it does not contain a "_"
# (transformed to "-")
# - gnome coding style would use '_'
# - will avoid wrong warnings for ansi c functions
if re.search(r' class=\"function\"', text) and '-' not in id:
return
# if it's a 'return value', don't warn (implicitly created link)
if re.search(r' class=\"returnvalue\"', text):
return
# if it's a 'type', don't warn if it starts with lowercase
# - gnome coding style would use CamelCase
if re.search(r' class=\"type\"', text) and id[0].islower():
return
# don't warn for self links
if text == id:
return
common.LogWarning(file, line, 'no link for: "%s" -> (%s).' % (id, text))
NoLinks.add(id)
def MakeRelativeXRef(module, href):
# if it is a link to same module, remove path to make it work uninstalled
m = re.search(r'^\.\./' + module + '/(.*)$', href)
if m:
href = m.group(1)
return href
def MakeXRef(module, file, line, id, text):
href = GetXRef(id)[1]
if href:
href = MakeRelativeXRef(module, href)
logging.info('Fixing link: %s, %s, %s', id, href, text)
return "<a href=\"%s\">%s</a>" % (href, text)
else:
ReportBadXRef(file, line, id, text)
return text
def MakeGtkDocLink(pre, symbol, post):
id = common.CreateValidSGMLID(symbol)
# these are implicitly created links in highlighted sources
# we don't want warnings for those if the links cannot be resolved.
NoLinks.add(id)
return pre + '<GTKDOCLINK HREF="' + id + '">' + symbol + '</GTKDOCLINK>' + post
def HighlightSourcePygments(src_lang, div_class, source):
# chop of leading and trailing empty lines, leave leading space in first real line
source = source.strip(' ')
source = source.strip('\n')
source = source.rstrip()
# cut common indent
m = re.search(r'^(\s+)', source)
if m:
source = re.sub(r'^' + m.group(1), '', source, flags=re.MULTILINE)
# avoid double entity replacement
source = source.replace('<', '<')
source = source.replace('>', '>')
source = source.replace('&', '&')
highlighted_source = highlight.highlight_code(source, src_lang)
if not highlighted_source:
highlighted_source = source
# chop of leading and trailing empty lines
highlighted_source = highlighted_source.strip()
# turn common urls in comments into links
highlighted_source = re.sub(r'<span class="url">(.*?)</span>',
r'<span class="url"><a href="\1">\1</a></span>',
highlighted_source, flags=re.DOTALL)
# we do own line-numbering
line_count = highlighted_source.count('\n')
source_lines = '\n'.join([str(i) for i in range(1, line_count + 2)])
return """<div class="%s">
<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
<tbody>
<tr>
<td class="listing_lines" align="right"><pre>%s</pre></td>
<td class="listing_code"><pre class="programlisting">%s</pre></td>
</tr>
</tbody>
</table>
</div>
""" % (div_class, source_lines, highlighted_source)
|