summaryrefslogtreecommitdiff
path: root/update-error-constants.py
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2022-02-18 11:42:40 +0100
committerStefan Behnel <stefan_ml@behnel.de>2022-02-18 11:43:25 +0100
commit9660889bbbc0c961452590e261420d7b603c122d (patch)
tree88d4a28cffcf1c544963f2715faff8f61107cfd4 /update-error-constants.py
parent064ff1f6298e96e292a398ccc1922aa05785fef0 (diff)
downloadpython-lxml-9660889bbbc0c961452590e261420d7b603c122d.tar.gz
Parse libxml2 error constants from libxml2-api.xml instead of the HTML sources to avoid having to generate the documentation.
Also avoid actually writing the output files if there are no changes, to avoid useless rebuilds.
Diffstat (limited to 'update-error-constants.py')
-rw-r--r--update-error-constants.py215
1 files changed, 109 insertions, 106 deletions
diff --git a/update-error-constants.py b/update-error-constants.py
index 8a836856..02928400 100644
--- a/update-error-constants.py
+++ b/update-error-constants.py
@@ -2,23 +2,14 @@
from __future__ import print_function, absolute_import
-import sys, os, os.path, re, codecs
+import operator
+import os.path
+import sys
+import xml.etree.ElementTree as ET
BUILD_SOURCE_FILE = os.path.join("src", "lxml", "xmlerror.pxi")
BUILD_DEF_FILE = os.path.join("src", "lxml", "includes", "xmlerror.pxd")
-if len(sys.argv) < 2 or sys.argv[1].lower() in ('-h', '--help'):
- print("This script generates the constants in file %s" % BUILD_SOURCE_FILE)
- print("Call as")
- print(sys.argv[0], "/path/to/libxml2-doc-dir")
- sys.exit(len(sys.argv) > 1)
-
-HTML_DIR = os.path.join(sys.argv[1], 'html')
-os.stat(HTML_DIR) # raise an error if we can't find it
-
-sys.path.insert(0, 'src')
-from lxml import etree
-
# map enum name to Python variable name and alignment for constant name
ENUM_MAP = {
'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'),
@@ -42,6 +33,7 @@ COMMENT = """
""" % os.path.basename(sys.argv[0])
+
def split(lines):
lines = iter(lines)
pre = []
@@ -50,108 +42,119 @@ def split(lines):
if line.startswith('#') and "BEGIN: GENERATED CONSTANTS" in line:
break
pre.append('')
+ old = []
for line in lines:
if line.startswith('#') and "END: GENERATED CONSTANTS" in line:
break
+ old.append(line.rstrip('\n'))
post = ['', line]
post.extend(lines)
post.append('')
- return pre, post
+ return pre, old, post
+
def regenerate_file(filename, result):
+ new = COMMENT + '\n'.join(result)
+
# read .pxi source file
- f = codecs.open(filename, 'r', encoding="utf-8")
- pre, post = split(f)
- f.close()
+ with open(filename, 'r', encoding="utf-8") as f:
+ pre, old, post = split(f)
+
+ if new.strip() == '\n'.join(old).strip():
+ # no changes
+ return False
# write .pxi source file
- f = codecs.open(filename, 'w', encoding="utf-8")
- f.write(''.join(pre))
- f.write(COMMENT)
- f.write('\n'.join(result))
- f.write(''.join(post))
- f.close()
-
-collect_text = etree.XPath("string()")
-find_enums = etree.XPath(
- "//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]",
- namespaces = {'html' : 'http://www.w3.org/1999/xhtml'})
-
-def parse_enums(html_dir, html_filename, enum_dict):
- PARSE_ENUM_NAME = re.compile(r'\s*enum\s+(\w+)\s*{', re.I).match
- PARSE_ENUM_VALUE = re.compile(r'\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match
- tree = etree.parse(os.path.join(html_dir, html_filename))
- enums = find_enums(tree)
- for enum in enums:
- enum_name = PARSE_ENUM_NAME(collect_text(enum))
- if not enum_name:
- continue
- enum_name = enum_name.group(1)
- if enum_name not in ENUM_MAP:
+ with open(filename, 'w', encoding="utf-8") as f:
+ f.write(''.join(pre))
+ f.write(new)
+ f.write(''.join(post))
+
+ return True
+
+
+def parse_enums(doc_dir, api_filename, enum_dict):
+ tree = ET.parse(os.path.join(doc_dir, api_filename))
+ for enum in tree.iterfind('symbols/enum'):
+ enum_type = enum.get('type')
+ if enum_type not in ENUM_MAP:
continue
- print("Found enum", enum_name)
- entries = []
- for child in enum:
- name = child.text
- match = PARSE_ENUM_VALUE(child.tail)
- if not match:
- print("Ignoring enum %s (failed to parse field '%s')" % (
- enum_name, name))
- break
- value, descr = match.groups()
- entries.append((name, int(value), descr))
- else:
- enum_dict[enum_name] = entries
- return enum_dict
-
-enum_dict = {}
-parse_enums(HTML_DIR, 'libxml-xmlerror.html', enum_dict)
-#parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict)
-#parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict)
-parse_enums(HTML_DIR, 'libxml-relaxng.html', enum_dict)
-
-# regenerate source files
-pxi_result = []
-append_pxi = pxi_result.append
-pxd_result = []
-append_pxd = pxd_result.append
-
-append_pxd('cdef extern from "libxml/xmlerror.h":')
-
-ctypedef_indent = ' '*4
-constant_indent = ctypedef_indent*2
-
-for enum_name in ENUM_ORDER:
- constants = enum_dict[enum_name]
- pxi_name, prefix = ENUM_MAP[enum_name]
-
- append_pxd(ctypedef_indent + 'ctypedef enum %s:' % enum_name)
- append_pxi('cdef object %s = """\\' % pxi_name)
-
- prefix_len = len(prefix)
- length = 2 # each string ends with '\n\0'
- for name, val, descr in constants:
- if descr and descr != str(val):
- line = '%-50s = %7d # %s' % (name, val, descr)
- else:
- line = '%-50s = %7d' % (name, val)
- append_pxd(constant_indent + line)
-
- if name[:prefix_len] == prefix and len(name) > prefix_len:
- name = name[prefix_len:]
- line = '%s=%d' % (name, val)
- append_pxi(line)
- length += len(line) + 2 # + '\n\0'
-
- append_pxd('')
- append_pxi('"""')
- append_pxi('')
-
-# write source files
-print("Updating file %s" % BUILD_SOURCE_FILE)
-regenerate_file(BUILD_SOURCE_FILE, pxi_result)
-
-print("Updating file %s" % BUILD_DEF_FILE)
-regenerate_file(BUILD_DEF_FILE, pxd_result)
-
-print("Done")
+ entries = enum_dict.get(enum_type)
+ if not entries:
+ print("Found enum", enum_type)
+ entries = enum_dict[enum_type] = []
+ entries.append((
+ enum.get('name'),
+ int(enum.get('value')),
+ enum.get('info', '').strip(),
+ ))
+
+
+def main(doc_dir):
+ enum_dict = {}
+ parse_enums(doc_dir, 'libxml2-api.xml', enum_dict)
+ #parse_enums(doc_dir, 'libxml-xmlerror.html', enum_dict)
+ #parse_enums(doc_dir, 'libxml-xpath.html', enum_dict)
+ #parse_enums(doc_dir, 'libxml-xmlschemas.html', enum_dict)
+ #parse_enums(doc_dir, 'libxml-relaxng.html', enum_dict)
+
+ # regenerate source files
+ pxi_result = []
+ append_pxi = pxi_result.append
+ pxd_result = []
+ append_pxd = pxd_result.append
+
+ append_pxd('cdef extern from "libxml/xmlerror.h":')
+
+ ctypedef_indent = ' '*4
+ constant_indent = ctypedef_indent*2
+
+ for enum_name in ENUM_ORDER:
+ constants = enum_dict[enum_name]
+ constants.sort(key=operator.itemgetter(1))
+ pxi_name, prefix = ENUM_MAP[enum_name]
+
+ append_pxd(ctypedef_indent + 'ctypedef enum %s:' % enum_name)
+ append_pxi('cdef object %s = """\\' % pxi_name)
+
+ prefix_len = len(prefix)
+ length = 2 # each string ends with '\n\0'
+ for name, val, descr in constants:
+ if descr and descr != str(val):
+ line = '%-50s = %7d # %s' % (name, val, descr)
+ else:
+ line = '%-50s = %7d' % (name, val)
+ append_pxd(constant_indent + line)
+
+ if name[:prefix_len] == prefix and len(name) > prefix_len:
+ name = name[prefix_len:]
+ line = '%s=%d' % (name, val)
+ append_pxi(line)
+ length += len(line) + 2 # + '\n\0'
+
+ append_pxd('')
+ append_pxi('"""')
+ append_pxi('')
+
+ # write source files
+ print("Updating file %s" % BUILD_SOURCE_FILE)
+ updated = regenerate_file(BUILD_SOURCE_FILE, pxi_result)
+ if not updated:
+ print("No changes.")
+
+ print("Updating file %s" % BUILD_DEF_FILE)
+ updated = regenerate_file(BUILD_DEF_FILE, pxd_result)
+ if not updated:
+ print("No changes.")
+
+ print("Done")
+
+
+if __name__ == "__main__":
+ if len(sys.argv) < 2 or sys.argv[1].lower() in ('-h', '--help'):
+ print("This script generates the constants in file %s" % BUILD_SOURCE_FILE)
+ print("Call as")
+ print(sys.argv[0], "/path/to/libxml2-doc-dir")
+ sys.exit(len(sys.argv) > 1)
+
+ main(sys.argv[1])