# -*- python -*-
#
# gtk-doc - GTK DocBook documentation generator.
# Copyright (C) 1998 Damon Chaplin
# 2007-2016 Stefan Sauer
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
"""
Extracts declarations of functions, macros, enums, structs and unions from
header files.
It is called with a module name, an optional source directory, an optional
output directory, and the header files to scan.
It outputs all declarations found to a file named '$MODULE-decl.txt', and the
list of decarations to another file '$MODULE-decl-list.txt'.
This second list file is typically copied to '$MODULE-sections.txt' and
organized into sections ready to output the XML pages.
"""
import logging
import os
import re
import shutil
from . import common
TYPE_MODIFIERS = ['const', 'signed', 'unsigned', 'long', 'short', 'struct', 'union', 'enum']
VAR_TYPE_MODIFIER = '(?:' + '|'.join([t + '\s+' for t in TYPE_MODIFIERS]) + ')*'
RET_TYPE_MODIFIER = '(?:' + '|'.join([t + '\s+' for t in TYPE_MODIFIERS + ['G_CONST_RETURN']]) + ')*'
# Matchers for current line
CLINE_MATCHER = [
# 0: MACROS
re.compile(
r"""^\s*\#\s*define\s+
(\w+) # 1: name
""", re.VERBOSE),
# 1-4: TYPEDEF'D FUNCTIONS
re.compile(
r"""^\s*typedef\s+
(%s\w+) # 1: return type
(\s+const)?\s* # 2: 2nd const
(\**)\s* # 3: ptr
\(\*\s*
(\w+) # 4: name
\)\s*\(""" % RET_TYPE_MODIFIER, re.VERBOSE),
re.compile(
r"""^\s*
(%s?\w+) # 1: return type
(\s+const)?\s* # 2: 2nd const
(\**)\s* # 3: ptr
\(\*\s*
(\w+) # 4: name
\)\s*\(""" % RET_TYPE_MODIFIER, re.VERBOSE),
re.compile(
r"""^\s*
(\**)\s* # 1: ptr
\(\*\s*
(\w+) # 2: name
\)\s*\(""", re.VERBOSE),
# 4: FUNCTION POINTER VARIABLES
None, # in InitScanner()
# 5-7: ENUMS
re.compile(
r"""^\s*enum\s+
_?(\w+) # 1: name
\s+\{""", re.VERBOSE),
None, # in InitScanner()
re.compile(r'^\s*typedef\s+enum'),
# 8-11: STRUCTS AND UNIONS
None, # in InitScanner()
re.compile(r'^\s*(?:struct|union)\s+_(\w+)\s*;'),
re.compile(
r"""^\s*
(struct|union)\s+ # 1: struct/union
(\w+) # 2: name
\s*;""", re.VERBOSE),
re.compile(
r"""^\s*typedef\s+
(struct|union)\s*
\w*\s*{""", re.VERBOSE),
# 12-14: OTHER TYPEDEFS
None, # in InitScanner()
None, # in InitScanner()
re.compile(r'^\s*typedef\s+'),
# 15: VARIABLES (extern'ed variables)
None, # in InitScanner()
# 16: VARIABLES
re.compile(
r"""^\s*
(?:%s\w+)
(?:\s+\*+|\*+|\s)\s*
(?:const\s+)*
([A-Za-z]\w*) # 1: name
\s*\=""" % VAR_TYPE_MODIFIER, re.VERBOSE),
# 17: G_DECLARE_*
re.compile(
r""".*(G_DECLARE_|GDK_DECLARE_)
(FINAL_TYPE|DERIVABLE_TYPE|INTERNAL_TYPE|INTERFACE) # 1: variant
\s*\(""", re.VERBOSE),
# 18-21: FUNCTIONS
None, # in InitScanner()
None, # in InitScanner()
re.compile(r'^\s*\(?([A-Za-z]\w*)\)?\s*\('),
re.compile(r'^\s*\('),
# 22-23: STRUCTS
re.compile(r'^\s*struct\s+_?(\w+)\s*\*'),
re.compile(r'^\s*struct\s+_?(\w+)'),
# 24-25: UNIONS
re.compile(r'^\s*union\s+_(\w+)\s*\*'),
re.compile(r'^\s*union\s+_?(\w+)'),
]
# Matchers for previous line
PLINE_MATCHER = [
# 0-1: TYPEDEF'D FUNCTIONS
re.compile(
r"""^\s*typedef\s*
(%s\w+) # 1: return type
(\s+const)?\s* # 2: 2nd const
(\**)\s* # 3: ptr
""" % RET_TYPE_MODIFIER, re.VERBOSE),
re.compile(r'^\s*typedef\s*'),
# 2-4 :FUNCTIONS
None, # in InitScanner()
None, # in InitScanner()
None, # in InitScanner()
]
# Matchers for 2nd previous line
PPLINE_MATCHER = None
# Matchers for sub expressions
SUB_MATCHER = [
# 0: STRUCTS AND UNIONS
re.compile(r'^(\S+)(Class|Iface|Interface)\b'),
]
def Run(options):
logging.info('options: %s', str(options.__dict__))
InitScanner(options)
if not os.path.isdir(options.output_dir):
os.mkdir(options.output_dir)
base_filename = os.path.join(options.output_dir, options.module)
old_decl_list = base_filename + '-decl-list.txt'
new_decl_list = base_filename + '-decl-list.new'
old_decl = base_filename + '-decl.txt'
new_decl = base_filename + '-decl.new'
old_types = base_filename + '.types'
new_types = base_filename + '.types.new'
sections_file = base_filename + '-sections.txt'
# If this is the very first run then we create the .types file automatically.
if not os.path.exists(sections_file) and not os.path.exists(old_types):
options.rebuild_types = True
section_list = {}
decl_list = []
get_types = []
# do not read files twice; checking it here permits to give both srcdir and
# builddir as --source-dir without fear of duplicities
seen_headers = {}
for file in options.headers:
ScanHeader(file, section_list, decl_list, get_types, seen_headers, options)
for dir in options.source_dir:
ScanHeaders(dir, section_list, decl_list, get_types, seen_headers, options)
with open(new_decl_list, 'w', encoding='utf-8') as f:
for section in sorted(section_list.keys()):
f.write(section_list[section])
common.UpdateFileIfChanged(old_decl_list, new_decl_list, True)
with open(new_decl, 'w', encoding='utf-8') as f:
for decl in decl_list:
f.write(decl)
common.UpdateFileIfChanged(old_decl, new_decl, True)
if options.rebuild_types:
with open(new_types, 'w', encoding='utf-8') as f:
for func in sorted(get_types):
f.write(func + '\n')
# remove the file if empty
if len(get_types) == 0:
os.unlink(new_types)
if os.path.exists(old_types):
os.rename(old_types, old_types + '.bak')
else:
common.UpdateFileIfChanged(old_types, new_types, True)
# If there is no MODULE-sections.txt file yet or we are asked to rebuild it,
# we copy the MODULE-decl-list.txt file into its place. The user can tweak it
# later if they want.
if options.rebuild_sections or not os.path.exists(sections_file):
new_sections_file = base_filename + '-sections.new'
shutil.copyfile(old_decl_list, new_sections_file)
common.UpdateFileIfChanged(sections_file, new_sections_file, False)
# If there is no MODULE-overrides.txt file we create an empty one
# because EXTRA_DIST in gtk-doc.make requires it.
overrides_file = base_filename + '-overrides.txt'
if not os.path.exists(overrides_file):
open(overrides_file, 'w', encoding='utf-8').close()
def InitScanner(options):
"""Apply options to regexps.
"""
# avoid generating regex with |'' (matching no string)
# TODO(ensonic): keep in sync with ScanHeaderContent()
ignore_decorators = ''
optional_decorators_regex = ''
if options.ignore_decorators:
ignore_decorators = '|' + options.ignore_decorators.replace('()', '\(\w*\)')
optional_decorators_regex = '(?:\s+(?:%s))?' % ignore_decorators[1:]
# FUNCTION POINTER VARIABLES
CLINE_MATCHER[4] = re.compile(
r"""^\s*(?:\b(?:extern|static|inline|G_INLINE_FUNC%s)\s*)*
((?:const\s+|G_CONST_RETURN\s+)?\w+) # 1: 1st const
(\s+const)?\s* # 2: 2nd const
(\**)\s* # 3: ptr
\(\*\s*
(\w+) # 4: name
\)\s*\(""" % ignore_decorators, re.VERBOSE)
CLINE_MATCHER[6] = re.compile(r'^\s*typedef\s+enum\s+_?(\w+)\s+\1%s\s*;' % optional_decorators_regex)
CLINE_MATCHER[8] = re.compile(
r"""^\s*typedef\s+
(struct|union)\s+ # 1: struct/union
_(\w+)\s+\2 # 2: name
%s # 3: optional decorator
\s*;""" % optional_decorators_regex, re.VERBOSE)
# OTHER TYPEDEFS
CLINE_MATCHER[12] = re.compile(
r"""^\s*typedef\s+
(?:struct|union)\s+\w+[\s\*]+
(\w+) # 1: name
%s # 2: optional decorator
\s*;""" % optional_decorators_regex, re.VERBOSE)
CLINE_MATCHER[13] = re.compile(
r"""^\s*
(?:G_GNUC_EXTENSION\s+)?
typedef\s+
(.+?[\s\*]) # 1: e.g. 'unsigned int'
(\w+) # 2: name
(?:\s*\[[^\]]+\])*
%s # 3: optional decorator
\s*;""" % optional_decorators_regex, re.VERBOSE)
CLINE_MATCHER[15] = re.compile(
r"""^\s*
(?:extern|[A-Za-z_]+VAR%s)\s+
(?:%s\w+)
(?:\s+\*+|\*+|\s)\s*
(?:const\s+)*
([A-Za-z]\w*) # 1: name
\s*;""" % (ignore_decorators, RET_TYPE_MODIFIER), re.VERBOSE)
# FUNCTIONS
CLINE_MATCHER[18] = re.compile(
r"""^\s*
(?:\b(?:extern|static|inline|G_INLINE_FUNC%s)\s*)*
(%s\w+) # 1: return type
([\s*]+(?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s* # 2: .. cont'
(_[A-Za-z]\w*) # 3: name
\s*\(""" % (ignore_decorators, RET_TYPE_MODIFIER), re.VERBOSE)
CLINE_MATCHER[19] = re.compile(
r"""^\s*
(?:\b(?:extern|static|inline|G_INLINE_FUNC%s)\s*)*
(%s\w+) # 1: return type
([\s*]+(?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s* # 2: .. cont'
\(?([A-Za-z]\w*)\)? # 3: name
\s*\(""" % (ignore_decorators, RET_TYPE_MODIFIER), re.VERBOSE)
PLINE_MATCHER[2] = re.compile(
r"""^\s*
(?:\b(?:extern%s)\s*)*
(%s\w+) # 1: retun type
((?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*) # 2: .. cont'
\s*$""" % (ignore_decorators, RET_TYPE_MODIFIER), re.VERBOSE)
PLINE_MATCHER[3] = re.compile(
r"""^\s*(?:\b(?:extern|static|inline|G_INLINE_FUNC%s)\s*)*
(%s\w+) # 1: return type
((?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*) # 2: .. cont'
\s*$""" % (ignore_decorators, RET_TYPE_MODIFIER), re.VERBOSE)
PLINE_MATCHER[4] = re.compile(
r"""^\s*(?:\b(?:extern|static|inline|G_INLINE_FUNC%s)\s*)*
(%s\w+) # 1: return type
(\s+\*+|\*+|\s)\s* # 2: ptr?
([A-Za-z]\w*) # 3: symbols
\s*$""" % (ignore_decorators, RET_TYPE_MODIFIER), re.VERBOSE)
# Matchers for 2nd previous line
global PPLINE_MATCHER
PPLINE_MATCHER = [
# 0: FUNCTIONS
re.compile(
r"""^\s*(?:\b(?:extern|static|inline|G_INLINE_FUNC%s)\s*)*
(
(?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|struct\s+|union\s+|enum\s+)*
\w+
(?:\**\s+\**(?:const|G_CONST_RETURN))?
(?:\s+|\s*\*+)
)\s*$""" % ignore_decorators, re.VERBOSE)
]
def ScanHeaders(source_dir, section_list, decl_list, get_types, seen_headers, options):
"""Scans a directory tree looking for header files.
Args:
source_dir (str): the directory to scan.
section_list (dict): map of section to filenames.
seen_headers (set): set to avoid scanning headers twice
"""
logging.info('Scanning source directory: %s', source_dir)
# This array holds any subdirectories found.
subdirs = []
for file in sorted(os.listdir(source_dir)):
if file.startswith('.'):
continue
fullname = os.path.join(source_dir, file)
if os.path.isdir(fullname):
subdirs.append(file)
elif file.endswith('.h'):
ScanHeader(fullname, section_list, decl_list, get_types,
seen_headers, options)
# Now recursively scan the subdirectories.
for dir in subdirs:
matchstr = r'(\s|^)' + re.escape(dir) + r'(\s|$)'
if re.search(matchstr, options.ignore_headers):
continue
ScanHeaders(os.path.join(source_dir, dir), section_list, decl_list,
get_types, seen_headers, options)
def ScanHeader(input_file, section_list, decl_list, get_types, seen_headers, options):
"""Scan a header file for doc commants.
Look for doc comments and extract them. Parse each doc comments and the
symbol declaration.
Args:
input_file (str): the header file to scan.
section_list (dict): a map of section per filename
decl_list (list): a list of declarations
seen_headers (set): set to avoid scanning headers twice
"""
# Don't scan headers twice
canonical_input_file = os.path.realpath(input_file)
if canonical_input_file in seen_headers:
logging.info('File already scanned: %s', input_file)
return
seen_headers[canonical_input_file] = 1
file_basename = os.path.split(input_file)[1][:-2] # filename ends in .h
# Check if the basename is in the list of headers to ignore.
matchstr = r'(\s|^)' + re.escape(file_basename) + r'\.h(\s|$)'
if re.search(matchstr, options.ignore_headers):
logging.info('File ignored: %s', input_file)
return
# Check if the full name is in the list of headers to ignore.
matchstr = r'(\s|^)' + re.escape(input_file) + r'(\s|$)'
if re.search(matchstr, options.ignore_headers):
logging.info('File ignored: %s', input_file)
return
if not os.path.exists(input_file):
logging.warning('File does not exist: %s', input_file)
return
logging.info('Scanning %s', input_file)
with open(input_file, 'r', encoding='utf-8') as hdr:
input_lines = hdr.readlines()
try:
slist, doc_comments = ScanHeaderContent(input_lines, decl_list, get_types, options)
logging.info("Scanning %s done", input_file)
liststr = SeparateSubSections(slist, doc_comments)
if liststr != '':
if file_basename not in section_list:
section_list[file_basename] = ''
section_list[file_basename] += "\n\n" % (file_basename, liststr)
except RuntimeError as e:
common.LogWarning(input_file, 0, str(e))
def ScanHeaderContent(input_lines, decl_list, get_types, options):
"""Scan the the given content lines.
Args:
input_lines (list):
decl_list (list): symbols declarations
get_types (list): lst of symbols that have a get_type function
options: commandline options
Returns:
list: a list of symbols found and a set of symbols for which we have a
doc-comment
"""
# Holds the resulting list of declarations.
slist = []
# Holds the title of the section
title = None
# True if we are in a comment.
in_comment = 0
# The type of declaration we are in, e.g. 'function' or 'macro'.
in_declaration = ''
# True if we should skip a block.
skip_block = False
# The current symbol being declared.
symbol = None
# Holds the declaration of the current symbol.
decl = ''
# For functions and function typedefs this holds the function's return type.
ret_type = None
# The pre-previous line read in - some Gnome functions have the return type
# on one line, the function name on the next, and the rest of the
# declaration after.
pre_previous_line = ''
# The previous line read in - some Gnome functions have the return type on
# one line and the rest of the declaration after.
previous_line = ''
# Used to try to skip the standard #ifdef XXX #define XXX at the start of
# headers.
first_macro = 1
# Used to handle structs/unions which contain nested structs or unions.
level = None
# Set to 1 for internal symbols, we need to fully parse, but don't add them
# to docs
internal = 0
# Dict of forward declarations, we skip them if we find the real declaration
# later.
forward_decls = {}
# Dict of doc-comments we found. The key is lowercase symbol name, val=1.
doc_comments = {}
deprecated_conditional_nest = 0
ignore_conditional_nest = 0
deprecated = ''
doc_comment = ''
# avoid generating regex with |'' (matching no string)
# TODO(ensonic): keep in sync with InitScanner()
# TODO(ensonic): extract the remaining regexps
ignore_decorators = '' # 1 uses
optional_decorators_regex = '' # 4 uses
if options.ignore_decorators:
ignore_decorators = '|' + options.ignore_decorators.replace('()', '\(\w*\)')
optional_decorators_regex = '(?:\s+(?:%s))?' % ignore_decorators[1:]
for line in input_lines:
# If this is a private header, skip it.
# TODO: consider scanning this first, so that we don't modify: decl_list
# and get_types
if re.search(r'^\s*/\*\s*<\s*private_header\s*>\s*\*/', line):
return [], {}
# Skip to the end of the current comment.
if in_comment:
logging.info('Comment: %s', line.strip())
doc_comment += line
if re.search(r'\*/', line):
m = re.search(r'\* ([a-zA-Z][a-zA-Z0-9_]+):', doc_comment)
if m:
doc_comments[m.group(1).lower()] = 1
in_comment = 0
doc_comment = ''
continue
# Keep a count of #if, #ifdef, #ifndef nesting,
# and if we enter a deprecation-symbol-bracketed
# zone, take note.
m = re.search(r'^\s*#\s*if(?:n?def\b|\s+!?\s*defined\s*\()\s*(\w+)', line)
if m:
define_name = m.group(1)
if deprecated_conditional_nest < 1 and re.search(options.deprecated_guards, define_name):
deprecated_conditional_nest = 1
elif deprecated_conditional_nest >= 1:
deprecated_conditional_nest += 1
if ignore_conditional_nest == 0 and '__GTK_DOC_IGNORE__' in define_name:
ignore_conditional_nest = 1
elif ignore_conditional_nest > 0:
ignore_conditional_nest = 1
elif re.search(r'^\s*#\sif', line):
if deprecated_conditional_nest >= 1:
deprecated_conditional_nest += 1
if ignore_conditional_nest > 0:
ignore_conditional_nest += 1
elif re.search(r'^\s*#endif', line):
if deprecated_conditional_nest >= 1:
deprecated_conditional_nest -= 1
if ignore_conditional_nest > 0:
ignore_conditional_nest -= 1
# If we find a line containing _DEPRECATED, we hope that this is
# attribute based deprecation and also treat this as a deprecation
# guard, unless it's a macro definition or the end of a deprecation
# section (#endif /* XXX_DEPRECATED */
if deprecated_conditional_nest == 0 and '_DEPRECATED' in line:
m = re.search(r'^\s*#\s*(if*|define|endif)', line)
if not (m or in_declaration == 'enum' or in_declaration == 'struct'):
logging.info('Found deprecation annotation (decl: "%s"): "%s"',
in_declaration, line.strip())
deprecated_conditional_nest += 0.1
# set flag that is used later when we do AddSymbolToList
if deprecated_conditional_nest > 0:
deprecated = '\n'
else:
deprecated = ''
if ignore_conditional_nest:
continue
if not in_declaration:
# Skip top-level comments.
m = re.search(r'^\s*/\*', line)
if m:
re.sub(r'^\s*/\*', '', line)
if re.search(r'\*/', line):
logging.info('Found one-line comment: %s', line.strip())
else:
in_comment = 1
doc_comment = line
logging.info('Found start of comment: %s', line.strip())
continue
# Skip begin/end deprecation macros.
m = re.search(r'^\s*G_GNUC_(BEGIN|END)_IGNORE_DEPRECATIONS', line)
if m:
continue
logging.info('no decl: %s', line.strip())
cm = [m.match(line) for m in CLINE_MATCHER]
pm = [m.match(previous_line) for m in PLINE_MATCHER]
ppm = [m.match(pre_previous_line) for m in PPLINE_MATCHER]
# MACROS
if cm[0]:
symbol = cm[0].group(1)
decl = line
# We assume all macros which start with '_' are private, but
# we accept '_' itself which is the standard gettext macro.
# We also try to skip the first macro if it looks like the
# standard #ifndef HEADER_FILE #define HEADER_FILE etc.
# And we only want TRUE & FALSE defined in GLib.
if not symbol.startswith('_') \
and (not re.search(r'#ifndef\s+' + symbol, previous_line)
or first_macro == 0) \
and ((symbol != 'TRUE' and symbol != 'FALSE')
or options.module == 'glib') \
or symbol == '_':
in_declaration = 'macro'
logging.info('Macro: "%s"', symbol)
else:
logging.info('skipping Macro: "%s"', symbol)
in_declaration = 'macro'
internal = 1
first_macro = 0
# TYPEDEF'D FUNCTIONS (i.e. user functions)
elif cm[1]:
ret_type = format_ret_type(cm[1].group(1), cm[1].group(2), cm[1].group(3))
symbol = cm[1].group(4)
decl = line[cm[1].end():]
in_declaration = 'user_function'
logging.info('user function (1): "%s", Returns: "%s"', symbol, ret_type)
elif pm[1] and cm[2]:
ret_type = format_ret_type(cm[2].group(1), cm[2].group(2), cm[2].group(3))
symbol = cm[2].group(4)
decl = line[cm[2].end():]
in_declaration = 'user_function'
logging.info('user function (2): "%s", Returns: "%s"', symbol, ret_type)
elif pm[1] and cm[3]:
ret_type = cm[3].group(1)
symbol = cm[3].group(2)
decl = line[cm[3].end():]
if pm[0]:
ret_type = format_ret_type(pm[0].group(1), pm[0].group(2), pm[0].group(3)) + ret_type
in_declaration = 'user_function'
logging.info('user function (3): "%s", Returns: "%s"', symbol, ret_type)
# FUNCTION POINTER VARIABLES
elif cm[4]:
ret_type = format_ret_type(cm[4].group(1), cm[4].group(2), cm[4].group(3))
symbol = cm[4].group(4)
decl = line[cm[4].end():]
in_declaration = 'user_function'
logging.info('function pointer variable: "%s", Returns: "%s"', symbol, ret_type)
# ENUMS
elif cm[5]:
re.sub(r'^\s*enum\s+_?(\w+)\s+\{', r'enum \1 {', line)
# We assume that 'enum _ {' is really the
# declaration of enum .
symbol = cm[5].group(1)
decl = line
in_declaration = 'enum'
logging.info('plain enum: "%s"', symbol)
elif cm[6]:
# We skip 'typedef enum _;' as the enum will
# be declared elsewhere.
logging.info('skipping enum typedef: "%s"', line)
elif cm[7]:
symbol = ''
decl = line
in_declaration = 'enum'
logging.info('typedef enum: -')
# STRUCTS AND UNIONS
elif cm[8]:
# We've found a 'typedef struct _ ;'
# This could be an opaque data structure, so we output an
# empty declaration. If the structure is actually found that
# will override this (technically if will just be another entry
# in the output file and will be joined when reading the file).
structsym = cm[8].group(1).upper()
logging.info('%s typedef: "%s"', structsym, cm[8].group(2))
forward_decls[cm[8].group(2)] = '<%s>\n%s\n%s%s>\n' % (
structsym, cm[8].group(2), deprecated, structsym)
m = SUB_MATCHER[0].match(cm[8].group(2))
if m:
objectname = m.group(1)
logging.info('Found object: "%s"', objectname)
title = '%s' % objectname
elif cm[9]:
# Skip private structs/unions.
logging.info('private struct/union')
elif cm[10]:
# Do a similar thing for normal structs as for typedefs above.
# But we output the declaration as well in this case, so we
# can differentiate it from a typedef.
structsym = cm[10].group(1).upper()
logging.info('%s:%s', structsym, cm[10].group(2))
forward_decls[cm[10].group(2)] = '<%s>\n%s\n%s%s%s>\n' % (
structsym, cm[10].group(2), line, deprecated, structsym)
elif cm[11]:
symbol = ''
decl = line
level = 0
in_declaration = cm[11].group(1)
logging.info('typedef struct/union "%s"', in_declaration)
# OTHER TYPEDEFS
elif cm[12]:
logging.info('Found struct/union(*) typedef "%s": "%s"', cm[12].group(1), line)
if AddSymbolToList(slist, cm[12].group(1)):
decl_list.append('\n%s\n%s%s\n' %
(cm[12].group(1), deprecated, line))
elif cm[13]:
if cm[13].group(1).split()[0] not in ('struct', 'union'):
logging.info('Found typedef: "%s"', line)
if AddSymbolToList(slist, cm[13].group(2)):
decl_list.append(
'\n%s\n%s%s\n' % (cm[13].group(2), deprecated, line))
elif cm[14]:
logging.info('Skipping typedef: "%s"', line)
# VARIABLES (extern'ed variables)
elif cm[15]:
symbol = cm[15].group(1)
line = re.sub(r'^\s*([A-Za-z_]+VAR)\b', r'extern', line)
decl = line
logging.info('Possible extern var "%s": "%s"', symbol, decl)
if AddSymbolToList(slist, symbol):
decl_list.append('\n%s\n%s%s\n' % (symbol, deprecated, decl))
# VARIABLES
elif cm[16]:
symbol = cm[16].group(1)
decl = line
logging.info('Possible global var" %s": "%s"', symbol, decl)
if AddSymbolToList(slist, symbol):
decl_list.append('\n%s\n%s%s\n' % (symbol, deprecated, decl))
# G_DECLARE_*
elif cm[17]:
in_declaration = 'g-declare'
symbol = cm[17].group(1) + cm[17].group(2)
decl = line[cm[17].end():]
# FUNCTIONS
elif cm[18]:
# We assume that functions starting with '_' are private and skip them.
ret_type = format_ret_type(cm[18].group(1), None, cm[18].group(2))
symbol = cm[18].group(3)
decl = line[cm[18].end():]
logging.info('internal Function: "%s", Returns: "%s""%s"', symbol, cm[18].group(1), cm[18].group(2))
in_declaration = 'function'
internal = 1
skip_block |= is_inline_func(line)
elif cm[19]:
ret_type = format_ret_type(cm[19].group(1), None, cm[19].group(2))
symbol = cm[19].group(3)
decl = line[cm[19].end():]
logging.info('Function (1): "%s", Returns: "%s""%s"', symbol, cm[19].group(1), cm[19].group(2))
in_declaration = 'function'
skip_block |= is_inline_func(line)
# Try to catch function declarations which have the return type on
# the previous line. But we don't want to catch complete functions
# which have been declared G_INLINE_FUNC, e.g. g_bit_nth_lsf in
# glib, or 'static inline' functions.
elif cm[20]:
symbol = cm[20].group(1)
decl = line[cm[20].end():]
if is_inline_func(previous_line):
skip_block = True
if pm[3]:
ret_type = format_ret_type(pm[3].group(1), None, pm[3].group(2))
logging.info('Function (3): "%s", Returns: "%s"', symbol, ret_type)
in_declaration = 'function'
else:
if pm[2]:
ret_type = format_ret_type(pm[2].group(1), None, pm[2].group(2))
logging.info('Function (2): "%s", Returns: "%s"', symbol, ret_type)
in_declaration = 'function'
# Try to catch function declarations with the return type and name
# on the previous line(s), and the start of the parameters on this.
elif cm[21]:
decl = line[cm[21].end():]
if pm[4]:
ret_type = pm[4].group(1) + ' ' + pm[4].group(2).strip()
symbol = pm[4].group(3)
in_declaration = 'function'
logging.info('Function (5): "%s", Returns: "%s"', symbol, ret_type)
elif re.search(r'^\s*\w+\s*$', previous_line) and ppm[0]:
ret_type = ppm[0].group(1)
ret_type = re.sub(r'\s*\n', '', ret_type, flags=re.MULTILINE)
in_declaration = 'function'
symbol = previous_line
symbol = re.sub(r'^\s+', '', symbol)
symbol = re.sub(r'\s*\n', '', symbol, flags=re.MULTILINE)
logging.info('Function (6): "%s", Returns: "%s"', symbol, ret_type)
# } elsif (m/^extern\s+/) {
# print "DEBUG: Skipping extern: $_"
# STRUCTS
elif cm[22]:
# Skip 'struct _ *', since it could be a
# return type on its own line.
pass
elif cm[23]:
# We assume that 'struct _' is really the
# declaration of struct .
symbol = cm[23].group(1)
decl = line
# we will find the correct level as below we do $level += tr/{//
level = 0
in_declaration = 'struct'
logging.info('Struct(_): "%s"', symbol)
# UNIONS
elif cm[24]:
# Skip 'union _ *' (see above)
pass
elif cm[25]:
symbol = cm[25].group(1)
decl = line
level = 0
in_declaration = 'union'
logging.info('Union(_): "%s"', symbol)
else:
logging.info('in decl %s: skip=%s %s', in_declaration, skip_block, line.strip())
decl += line
if skip_block and '{' in decl:
(skip_block, decl) = remove_braced_content(decl)
logging.info('in decl: skip=%s decl=[%s]', skip_block, decl)
pre_previous_line = previous_line
previous_line = line
if skip_block:
logging.info('skipping, in decl %s, decl=[%s]', in_declaration, decl)
continue
if in_declaration == "g-declare":
dm = re.search(r'\s*(\w+)\s*,\s*(\w+)\s*,\s*(\w+)\s*,\s*(\w+)\s*,\s*(\w+)\s*\).*$', decl)
# FIXME the original code does s// stuff here and we don't. Is it necessary?
if dm:
ModuleObjName = dm.group(1)
module_obj_name = dm.group(2)
if options.rebuild_types:
get_types.append(module_obj_name + '_get_type')
forward_decls[ModuleObjName] = '\n%s\n%s\n' % (ModuleObjName, deprecated)
if symbol.startswith('G_DECLARE_DERIVABLE'):
forward_decls[ModuleObjName + 'Class'] = '\n%sClass\n%s\n' % (
ModuleObjName, deprecated)
if symbol.startswith('G_DECLARE_INTERFACE'):
forward_decls[ModuleObjName + 'Interface'] = '\n%sInterface\n%s\n' % (
ModuleObjName, deprecated)
in_declaration = ''
if in_declaration == 'function':
# Note that sometimes functions end in ') G_GNUC_PRINTF (2, 3);' or
# ') __attribute__ (...);'.
regex = r'\)\s*(G_GNUC_.*|.*DEPRECATED.*%s\s*|__attribute__\s*\(.*\)\s*)*;.*$' % ignore_decorators
pm = re.search(regex, decl, flags=re.MULTILINE)
if pm:
logging.info('scrubbing:[%s]', decl.strip())
decl = re.sub(regex, '', decl, flags=re.MULTILINE)
logging.info('scrubbed:[%s]', decl.strip())
if internal == 0:
decl = re.sub(r'/\*.*?\*/', '', decl, flags=re.MULTILINE) # remove comments.
decl = re.sub(r'\s*\n\s*(?!$)', ' ', decl, flags=re.MULTILINE) # remove newlines
# consolidate whitespace at start/end of lines.
decl = decl.strip()
ret_type = re.sub(r'/\*.*?\*/', '', ret_type).strip() # remove comments in ret type.
if AddSymbolToList(slist, symbol):
decl_list.append('\n%s\n%s%s\n%s\n\n' %
(symbol, deprecated, ret_type, decl))
if options.rebuild_types:
# check if this looks like a get_type function and if so remember
if symbol.endswith('_get_type') and 'GType' in ret_type and re.search(r'^(void|)$', decl):
logging.info(
"Adding get-type: [%s] [%s] [%s]", ret_type, symbol, decl)
get_types.append(symbol)
else:
internal = 0
deprecated_conditional_nest = int(deprecated_conditional_nest)
in_declaration = ''
skip_block = False
if in_declaration == 'user_function':
if re.search(r'\).*$', decl):
decl = re.sub(r'\).*$', '', decl)
# TODO: same as above
decl = re.sub(r'/\*.*?\*/', '', decl, flags=re.MULTILINE) # remove comments.
decl = re.sub(r'\s*\n\s*(?!$)', ' ', decl, flags=re.MULTILINE) # remove newlines
# TODO: don't stip here (it works above, but fails some test
# consolidate whitespace at start/end of lines.
# decl = decl.strip()
if AddSymbolToList(slist, symbol):
decl_list.append('\n%s\n%s%s\n%s\n' %
(symbol, deprecated, ret_type, decl))
deprecated_conditional_nest = int(deprecated_conditional_nest)
in_declaration = ''
if in_declaration == 'macro':
if not re.search(r'\\\s*$', decl):
if internal == 0:
if AddSymbolToList(slist, symbol):
decl_list.append('\n%s\n%s%s\n' % (symbol, deprecated, decl))
else:
logging.info('skip internal macro: [%s]', symbol)
internal = 0
deprecated_conditional_nest = int(deprecated_conditional_nest)
in_declaration = ''
else:
logging.info('skip empty macro: [%s]', symbol)
if in_declaration == 'enum':
# Examples:
# "};"
# "} MyEnum;"
# "} MyEnum DEPRECATED_FOR(NewEnum);"
# "} DEPRECATED_FOR(NewEnum);"
em = re.search(r'\n\s*\}\s*(?:(\w+)?%s)?;\s*$' % optional_decorators_regex, decl)
if em:
if symbol == '':
symbol = em.group(1)
# Enums could contain deprecated values and that doesn't mean
# the whole enum is deprecated, so they are ignored when setting
# deprecated_conditional_nest above. Here we can check if the
# _DEPRECATED is between '}' and ';' which would mean the enum
# as a whole is deprecated.
if re.search(r'\n\s*\}.*_DEPRECATED.*;\s*$', decl):
deprecated = '\n'
if AddSymbolToList(slist, symbol):
stripped_decl = re.sub(optional_decorators_regex, '', decl)
decl_list.append('\n%s\n%s%s\n' % (symbol, deprecated, stripped_decl))
deprecated_conditional_nest = int(deprecated_conditional_nest)
in_declaration = ''
# We try to handle nested structs/unions, but unmatched brackets in
# comments will cause problems.
if in_declaration == 'struct' or in_declaration == 'union':
# Same regex as for enum
sm = re.search(r'\n\}\s*(?:(\w+)?%s)?;\s*$' % optional_decorators_regex, decl)
if level <= 1 and sm:
if symbol == '':
symbol = sm.group(1)
bm = re.search(r'^(\S+)(Class|Iface|Interface)\b', symbol)
if bm:
objectname = bm.group(1)
logging.info('Found object: "%s"', objectname)
title = '%s' % objectname
logging.info('Store struct: "%s"', symbol)
# Structs could contain deprecated members and that doesn't
# mean the whole struct is deprecated, so they are ignored when
# setting deprecated_conditional_nest above. Here we can check
# if the _DEPRECATED is between '}' and ';' which would mean
# the struct as a whole is deprecated.
if re.search(r'\n\s*\}.*_DEPRECATED.*;\s*$', decl):
deprecated = '\n'
if AddSymbolToList(slist, symbol):
structsym = in_declaration.upper()
regex = r'(?:\s+(?:G_GNUC_\w+(?:\(\w*\))?%s))' % ignore_decorators
stripped_decl = re.sub(regex, '', decl)
decl_list.append('<%s>\n%s\n%s%s%s>\n' %
(structsym, symbol, deprecated, stripped_decl, structsym))
if symbol in forward_decls:
del forward_decls[symbol]
deprecated_conditional_nest = int(deprecated_conditional_nest)
in_declaration = ''
else:
# We use tr to count the brackets in the line, and adjust
# $level accordingly.
level += line.count('{')
level -= line.count('}')
logging.info('struct/union level : %d', level)
# here we want in_declaration=='', otherwise we have a partial declaration
if in_declaration != '':
raise RuntimeError('partial declaration (%s) : %s ' % (in_declaration, decl))
# print remaining forward declarations
for symbol in sorted(forward_decls.keys()):
if forward_decls[symbol]:
AddSymbolToList(slist, symbol)
decl_list.append(forward_decls[symbol])
# add title
if title:
slist = [title] + slist
return slist, doc_comments
def remove_braced_content(decl):
"""Remove all nested pairs of curly braces.
Args:
decl (str): the decl
Returns:
str: a declaration stripped of braced content
"""
skip_block = True
# Remove all nested pairs of curly braces.
brace_remover = r'{[^{]*?}'
bm = re.search(brace_remover, decl)
while bm:
decl = re.sub(brace_remover, '', decl)
logging.info('decl=[%s]' % decl)
bm = re.search(brace_remover, decl)
# If all '{' have been matched and removed, we're done
bm = re.search(r'(.*?){', decl)
if not bm:
# this is a hack to detect the end of declaration
decl = decl.rstrip() + ';'
skip_block = False
logging.info('skip_block done')
return skip_block, decl
def is_inline_func(line):
line = line.strip()
if line.startswith('G_INLINE_FUNC'):
logging.info('skip block after G_INLINE_FUNC function')
return True
if re.search(r'static\s+inline', line):
logging.info('skip block after static inline function')
return True
return False
def format_ret_type(base_type, const, ptr):
ret_type = base_type
if const:
ret_type += const
if ptr:
ret_type += ' ' + ptr.strip()
return ret_type
def SeparateSubSections(slist, doc_comments):
"""Separate the standard macros and functions.
Place them at the end of the current section, in a subsection named
'Standard'. Do this in a loop to catch objects, enums and flags.
Args:
slist (list): list of symbols
doc_comments (dict): comments for each symbol
Returns:
str: the section doc xml fomatted as string
"""
klass = lclass = prefix = lprefix = None
standard_decl = []
liststr = '\n'.join(s for s in slist if s) + '\n'
while True:
m = re.search(r'^(\S+)_IS_(\S*)_CLASS\n', liststr, flags=re.MULTILINE)
m2 = re.search(r'^(\S+)_IS_(\S*)\n', liststr, flags=re.MULTILINE)
m3 = re.search(r'^(\S+?)_(\S*)_get_type\n', liststr, flags=re.MULTILINE)
if m:
prefix = m.group(1)
lprefix = prefix.lower()
klass = m.group(2)
lclass = klass.lower()
logging.info("Found gobject type '%s_%s' from is_class macro", prefix, klass)
elif m2:
prefix = m2.group(1)
lprefix = prefix.lower()
klass = m2.group(2)
lclass = klass.lower()
logging.info("Found gobject type '%s_%s' from is_ macro", prefix, klass)
elif m3:
lprefix = m3.group(1)
prefix = lprefix.upper()
lclass = m3.group(2)
klass = lclass.upper()
logging.info("Found gobject type '%s_%s' from get_type function", prefix, klass)
else:
break
cclass = lclass
cclass = cclass.replace('_', '')
mtype = lprefix + cclass
liststr, standard_decl = replace_once(liststr, standard_decl, r'^%sPrivate\n' % mtype)
# We only leave XxYy* in the normal section if they have docs
if mtype not in doc_comments:
logging.info(" Hide instance docs for %s", mtype)
liststr, standard_decl = replace_once(liststr, standard_decl, r'^%s\n' % mtype)
if mtype + 'class' not in doc_comments:
logging.info(" Hide class docs for %s", mtype)
liststr, standard_decl = replace_once(liststr, standard_decl, r'^%sClass\n' % mtype)
if mtype + 'interface' not in doc_comments:
logging.info(" Hide iface docs for %s", mtype)
liststr, standard_decl = replace_once(liststr, standard_decl, r'%sInterface\n' % mtype)
if mtype + 'iface' not in doc_comments:
logging.info(" Hide iface docs for " + mtype)
liststr, standard_decl = replace_once(liststr, standard_decl, r'%sIface\n' % mtype)
liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_IS_%s\n' % klass)
liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_TYPE_%s\n' % klass)
liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_get_type\n' % lclass)
liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_IS_%s_CLASS\n' % klass)
liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_CLASS\n' % klass)
liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_CLASS\n' % klass)
liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_IFACE\n' % klass)
liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_INTERFACE\n' % klass)
# We do this one last, otherwise it tends to be caught by the IS_$class macro
liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s\n' % klass)
logging.info('Decl:%s---', liststr)
logging.info('Std :%s---', ''.join(sorted(standard_decl)))
if len(standard_decl):
# sort the symbols
liststr += '\n' + ''.join(sorted(standard_decl))
return liststr
def replace_once(liststr, standard_decl, regex):
mre = re.search(regex, liststr, flags=re.IGNORECASE | re.MULTILINE)
if mre:
standard_decl.append(mre.group(0))
liststr = re.sub(regex, '', liststr, flags=re.IGNORECASE | re.MULTILINE)
return liststr, standard_decl
def replace_all(liststr, standard_decl, regex):
mre = re.search(regex, liststr, flags=re.MULTILINE)
while mre:
standard_decl.append(mre.group(0))
liststr = re.sub(regex, '', liststr, flags=re.MULTILINE)
mre = re.search(regex, liststr, flags=re.MULTILINE)
return liststr, standard_decl
def AddSymbolToList(slist, symbol):
""" Adds symbol to list of declaration if not already present.
Args:
slist: The list of symbols.
symbol: The symbol to add to the list.
"""
if symbol in slist:
# logging.info('Symbol %s already in list. skipping', symbol)
# we return False to skip outputting another entry to -decl.txt
# this is to avoid redeclarations (e.g. in conditional sections).
return False
slist.append(symbol)
return True