1 files changed, 115 insertions, 0 deletions
diff --git a/ci/parse_relnotes.py b/ci/parse_relnotes.py
new file mode 100644
index 00000000..d19e6d60
--- /dev/null
+++ b/ci/parse_relnotes.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+Parse CHANGES.md into a JSON structure.
+
+Run with two arguments: the .md file to parse, and the JSON file to write:
+
+	python parse_relnotes.py CHANGES.md relnotes.json
+
+Every section that has something that looks like a version number in it will
+be recorded as the release notes for that version.
+
+"""
+
+import json
+import re
+import sys
+
+
+class TextChunkBuffer:
+    """Hold onto text chunks until needed."""
+    def __init__(self):
+        self.buffer = []
+
+    def append(self, text):
+        """Add `text` to the buffer."""
+        self.buffer.append(text)
+
+    def clear(self):
+        """Clear the buffer."""
+        self.buffer = []
+
+    def flush(self):
+        """Produce a ("text", text) tuple if there's anything here."""
+        buffered = "".join(self.buffer).strip()
+        if buffered:
+            yield ("text", buffered)
+        self.clear()
+
+
+def parse_md(lines):
+    """Parse markdown lines, producing (type, text) chunks."""
+    buffer = TextChunkBuffer()
+
+    for line in lines:
+        header_match = re.search(r"^(#+) (.+)$", line)
+        is_header = bool(header_match)
+        if is_header:
+            yield from buffer.flush()
+            hashes, text = header_match.groups()
+            yield (f"h{len(hashes)}", text)
+        else:
+            buffer.append(line)
+
+    yield from buffer.flush()
+
+
+def sections(parsed_data):
+    """Convert a stream of parsed tokens into sections with text and notes.
+
+    Yields a stream of:
+        ('h-level', 'header text', 'text')
+
+    """
+    header = None
+    text = []
+    for ttype, ttext in parsed_data:
+        if ttype.startswith('h'):
+            if header:
+                yield (*header, "\n".join(text))
+            text = []
+            header = (ttype, ttext)
+        elif ttype == "text":
+            text.append(ttext)
+        else:
+            raise Exception(f"Don't know ttype {ttype!r}")
+    yield (*header, "\n".join(text))
+
+
+def refind(regex, text):
+    """Find a regex in some text, and return the matched text, or None."""
+    m = re.search(regex, text)
+    if m:
+        return m.group()
+    else:
+        return None
+
+def relnotes(mdlines):
+    r"""Yield (version, text) pairs from markdown lines.
+
+    Each tuple is a separate version mentioned in the release notes.
+
+    A version is any section with \d\.\d in the header text.
+
+    """
+    for _, htext, text in sections(parse_md(mdlines)):
+        version = refind(r"\d+\.\d[^ ]*", htext)
+        if version:
+            prerelease = any(c in version for c in "abc")
+            when = refind(r"\d+-\d+-\d+", htext)
+            yield {
+                "version": version,
+                "text": text,
+                "prerelease": prerelease,
+                "when": when,
+            }
+
+def parse(md_filename, json_filename):
+    """Main function: parse markdown and write JSON."""
+    with open(md_filename) as mf:
+        markdown = mf.read()
+    with open(json_filename, "w") as jf:
+        json.dump(list(relnotes(markdown.splitlines(True))), jf, indent=4)
+
+if __name__ == "__main__":
+    parse(*sys.argv[1:])       # pylint: disable=no-value-for-parameter