Jörg Thalheim | 3e67e5c | 2017-05-01 02:26:56 +0200 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Zbigniew Jędrzejewski-Szmek | 35df744 | 2017-11-18 17:32:46 +0100 | [diff] [blame] | 2 | # SPDX-License-Identifier: LGPL-2.1+ |
Zbigniew Jędrzejewski-Szmek | e7098b6 | 2012-11-13 18:39:18 +0100 | [diff] [blame] | 3 | |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 4 | import sys |
| 5 | import collections |
Zbigniew Jędrzejewski-Szmek | ccc9a4f | 2013-01-26 10:47:16 -0500 | [diff] [blame] | 6 | import re |
Zbigniew Jędrzejewski-Szmek | 1c6c3ef | 2017-07-02 20:26:32 -0400 | [diff] [blame] | 7 | from xml_helper import xml_parse, xml_print, tree |
Zbigniew Jędrzejewski-Szmek | 827f70e | 2013-05-29 22:31:20 -0400 | [diff] [blame] | 8 | from copy import deepcopy |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 9 | |
Zbigniew Jędrzejewski-Szmek | 0acfdd6 | 2013-01-14 21:34:19 -0500 | [diff] [blame] | 10 | COLOPHON = '''\ |
| 11 | This index contains {count} entries in {sections} sections, |
| 12 | referring to {pages} individual manual pages. |
| 13 | ''' |
| 14 | |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 15 | def _extract_directives(directive_groups, formatting, page): |
Zbigniew Jędrzejewski-Szmek | 1a13e31 | 2013-03-29 14:22:27 -0400 | [diff] [blame] | 16 | t = xml_parse(page) |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 17 | section = t.find('./refmeta/manvolnum').text |
| 18 | pagename = t.find('./refmeta/refentrytitle').text |
Zbigniew Jędrzejewski-Szmek | a4e0b94 | 2013-02-13 21:51:31 -0500 | [diff] [blame] | 19 | |
| 20 | storopt = directive_groups['options'] |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 21 | for variablelist in t.iterfind('.//variablelist'): |
Zbigniew Jędrzejewski-Szmek | ccc9a4f | 2013-01-26 10:47:16 -0500 | [diff] [blame] | 22 | klass = variablelist.attrib.get('class') |
Jérémy Rosen | acbfdec | 2020-04-14 20:15:19 +0200 | [diff] [blame] | 23 | searchpath = variablelist.attrib.get('xpath','./varlistentry/term/varname') |
Zbigniew Jędrzejewski-Szmek | ccc9a4f | 2013-01-26 10:47:16 -0500 | [diff] [blame] | 24 | storvar = directive_groups[klass or 'miscellaneous'] |
Zbigniew Jędrzejewski-Szmek | ccc9a4f | 2013-01-26 10:47:16 -0500 | [diff] [blame] | 25 | # <option>s go in OPTIONS, unless class is specified |
Jérémy Rosen | acbfdec | 2020-04-14 20:15:19 +0200 | [diff] [blame] | 26 | for xpath, stor in ((searchpath, storvar), |
Zbigniew Jędrzejewski-Szmek | ccc9a4f | 2013-01-26 10:47:16 -0500 | [diff] [blame] | 27 | ('./varlistentry/term/option', |
| 28 | storvar if klass else storopt)): |
| 29 | for name in variablelist.iterfind(xpath): |
| 30 | text = re.sub(r'([= ]).*', r'\1', name.text).rstrip() |
Zbigniew Jędrzejewski-Szmek | 8c6c56c | 2019-11-21 22:02:07 +0100 | [diff] [blame] | 31 | if text.startswith('-'): |
| 32 | # for options, merge options with and without mandatory arg |
| 33 | text = text.partition('=')[0] |
Zbigniew Jędrzejewski-Szmek | ccc9a4f | 2013-01-26 10:47:16 -0500 | [diff] [blame] | 34 | stor[text].append((pagename, section)) |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 35 | if text not in formatting: |
| 36 | # use element as formatted display |
Zbigniew Jędrzejewski-Szmek | f8b6853 | 2019-11-21 20:33:26 +0100 | [diff] [blame] | 37 | if name.text[-1] in "= '": |
Zbigniew Jędrzejewski-Szmek | 699ad6c | 2013-02-27 21:31:02 -0500 | [diff] [blame] | 38 | name.clear() |
| 39 | else: |
| 40 | name.tail = '' |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 41 | name.text = text |
| 42 | formatting[text] = name |
Jérémy Rosen | 8906e26 | 2020-04-18 19:48:13 +0200 | [diff] [blame] | 43 | extra = variablelist.attrib.get('extra-ref') |
| 44 | if extra: |
| 45 | stor[extra].append((pagename, section)) |
| 46 | if extra not in formatting: |
| 47 | elt = tree.Element("varname") |
| 48 | elt.text= extra |
| 49 | formatting[extra] = elt |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 50 | |
Zbigniew Jędrzejewski-Szmek | a4e0b94 | 2013-02-13 21:51:31 -0500 | [diff] [blame] | 51 | storfile = directive_groups['filenames'] |
Zbigniew Jędrzejewski-Szmek | 845c532 | 2013-05-03 00:55:20 -0400 | [diff] [blame] | 52 | for xpath, absolute_only in (('.//refsynopsisdiv//filename', False), |
| 53 | ('.//refsynopsisdiv//command', False), |
| 54 | ('.//filename', True)): |
Zbigniew Jędrzejewski-Szmek | a4e0b94 | 2013-02-13 21:51:31 -0500 | [diff] [blame] | 55 | for name in t.iterfind(xpath): |
Zbigniew Jędrzejewski-Szmek | 845c532 | 2013-05-03 00:55:20 -0400 | [diff] [blame] | 56 | if absolute_only and not (name.text and name.text.startswith('/')): |
| 57 | continue |
Zbigniew Jędrzejewski-Szmek | b0343f8 | 2019-11-21 20:22:12 +0100 | [diff] [blame] | 58 | if name.attrib.get('index') == 'false': |
Zbigniew Jędrzejewski-Szmek | 845c532 | 2013-05-03 00:55:20 -0400 | [diff] [blame] | 59 | continue |
Zbigniew Jędrzejewski-Szmek | a4e0b94 | 2013-02-13 21:51:31 -0500 | [diff] [blame] | 60 | name.tail = '' |
| 61 | if name.text: |
Zbigniew Jędrzejewski-Szmek | 845c532 | 2013-05-03 00:55:20 -0400 | [diff] [blame] | 62 | if name.text.endswith('*'): |
| 63 | name.text = name.text[:-1] |
Zbigniew Jędrzejewski-Szmek | a4e0b94 | 2013-02-13 21:51:31 -0500 | [diff] [blame] | 64 | if not name.text.startswith('.'): |
| 65 | text = name.text.partition(' ')[0] |
| 66 | if text != name.text: |
| 67 | name.clear() |
| 68 | name.text = text |
Zbigniew Jędrzejewski-Szmek | 845c532 | 2013-05-03 00:55:20 -0400 | [diff] [blame] | 69 | if text.endswith('/'): |
| 70 | text = text[:-1] |
Zbigniew Jędrzejewski-Szmek | a4e0b94 | 2013-02-13 21:51:31 -0500 | [diff] [blame] | 71 | storfile[text].append((pagename, section)) |
| 72 | if text not in formatting: |
| 73 | # use element as formatted display |
| 74 | formatting[text] = name |
| 75 | else: |
| 76 | text = ' '.join(name.itertext()) |
| 77 | storfile[text].append((pagename, section)) |
| 78 | formatting[text] = name |
| 79 | |
Zbigniew Jędrzejewski-Szmek | 785a51e | 2013-06-26 19:48:19 -0400 | [diff] [blame] | 80 | storfile = directive_groups['constants'] |
| 81 | for name in t.iterfind('.//constant'): |
Zbigniew Jędrzejewski-Szmek | b0343f8 | 2019-11-21 20:22:12 +0100 | [diff] [blame] | 82 | if name.attrib.get('index') == 'false': |
Zbigniew Jędrzejewski-Szmek | 785a51e | 2013-06-26 19:48:19 -0400 | [diff] [blame] | 83 | continue |
| 84 | name.tail = '' |
| 85 | if name.text.startswith('('): # a cast, strip it |
| 86 | name.text = name.text.partition(' ')[2] |
| 87 | storfile[name.text].append((pagename, section)) |
| 88 | formatting[name.text] = name |
| 89 | |
Zbigniew Jędrzejewski-Szmek | 0d525a3 | 2020-05-07 14:59:24 +0200 | [diff] [blame^] | 90 | storfile = directive_groups['specifiers'] |
| 91 | for name in t.iterfind(".//table[@class='specifiers']//entry/literal"): |
| 92 | if name.text[0] != '%' or name.getparent().text is not None: |
| 93 | continue |
| 94 | if name.attrib.get('index') == 'false': |
| 95 | continue |
| 96 | storfile[name.text].append((pagename, section)) |
| 97 | formatting[name.text] = name |
| 98 | |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 99 | def _make_section(template, name, directives, formatting): |
Zbigniew Jędrzejewski-Szmek | eeb019b | 2013-01-14 21:02:49 -0500 | [diff] [blame] | 100 | varlist = template.find(".//*[@id='{}']".format(name)) |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 101 | for varname, manpages in sorted(directives.items()): |
| 102 | entry = tree.SubElement(varlist, 'varlistentry') |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 103 | term = tree.SubElement(entry, 'term') |
Zbigniew Jędrzejewski-Szmek | 827f70e | 2013-05-29 22:31:20 -0400 | [diff] [blame] | 104 | display = deepcopy(formatting[varname]) |
| 105 | term.append(display) |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 106 | |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 107 | para = tree.SubElement(tree.SubElement(entry, 'listitem'), 'para') |
| 108 | |
| 109 | b = None |
Zbigniew Jędrzejewski-Szmek | ccc9a4f | 2013-01-26 10:47:16 -0500 | [diff] [blame] | 110 | for manpage, manvolume in sorted(set(manpages)): |
Zbigniew Jędrzejewski-Szmek | 827f70e | 2013-05-29 22:31:20 -0400 | [diff] [blame] | 111 | if b is not None: |
| 112 | b.tail = ', ' |
| 113 | b = tree.SubElement(para, 'citerefentry') |
| 114 | c = tree.SubElement(b, 'refentrytitle') |
| 115 | c.text = manpage |
Zbigniew Jędrzejewski-Szmek | 958caa5 | 2015-11-22 23:43:36 -0500 | [diff] [blame] | 116 | c.attrib['target'] = varname |
Zbigniew Jędrzejewski-Szmek | 827f70e | 2013-05-29 22:31:20 -0400 | [diff] [blame] | 117 | d = tree.SubElement(b, 'manvolnum') |
| 118 | d.text = manvolume |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 119 | entry.tail = '\n\n' |
| 120 | |
Zbigniew Jędrzejewski-Szmek | 0acfdd6 | 2013-01-14 21:34:19 -0500 | [diff] [blame] | 121 | def _make_colophon(template, groups): |
| 122 | count = 0 |
| 123 | pages = set() |
| 124 | for group in groups: |
| 125 | count += len(group) |
| 126 | for pagelist in group.values(): |
| 127 | pages |= set(pagelist) |
| 128 | |
| 129 | para = template.find(".//para[@id='colophon']") |
| 130 | para.text = COLOPHON.format(count=count, |
| 131 | sections=len(groups), |
| 132 | pages=len(pages)) |
| 133 | |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 134 | def _make_page(template, directive_groups, formatting): |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 135 | """Create an XML tree from directive_groups. |
| 136 | |
| 137 | directive_groups = { |
| 138 | 'class': {'variable': [('manpage', 'manvolume'), ...], |
| 139 | 'variable2': ...}, |
| 140 | ... |
| 141 | } |
| 142 | """ |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 143 | for name, directives in directive_groups.items(): |
Zbigniew Jędrzejewski-Szmek | 827f70e | 2013-05-29 22:31:20 -0400 | [diff] [blame] | 144 | _make_section(template, name, directives, formatting) |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 145 | |
Zbigniew Jędrzejewski-Szmek | 0acfdd6 | 2013-01-14 21:34:19 -0500 | [diff] [blame] | 146 | _make_colophon(template, directive_groups.values()) |
| 147 | |
Zbigniew Jędrzejewski-Szmek | eeb019b | 2013-01-14 21:02:49 -0500 | [diff] [blame] | 148 | return template |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 149 | |
Zbigniew Jędrzejewski-Szmek | 2822308 | 2020-05-07 14:31:13 +0200 | [diff] [blame] | 150 | def make_page(template_path, xml_files): |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 151 | "Extract directives from xml_files and return XML index tree." |
Zbigniew Jędrzejewski-Szmek | 2822308 | 2020-05-07 14:31:13 +0200 | [diff] [blame] | 152 | template = xml_parse(template_path) |
Zbigniew Jędrzejewski-Szmek | eeb019b | 2013-01-14 21:02:49 -0500 | [diff] [blame] | 153 | names = [vl.get('id') for vl in template.iterfind('.//variablelist')] |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 154 | directive_groups = {name:collections.defaultdict(list) |
Zbigniew Jędrzejewski-Szmek | eeb019b | 2013-01-14 21:02:49 -0500 | [diff] [blame] | 155 | for name in names} |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 156 | formatting = {} |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 157 | for page in xml_files: |
Zbigniew Jędrzejewski-Szmek | ccc9a4f | 2013-01-26 10:47:16 -0500 | [diff] [blame] | 158 | try: |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 159 | _extract_directives(directive_groups, formatting, page) |
Zbigniew Jędrzejewski-Szmek | ccc9a4f | 2013-01-26 10:47:16 -0500 | [diff] [blame] | 160 | except Exception: |
| 161 | raise ValueError("failed to process " + page) |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 162 | |
Zbigniew Jędrzejewski-Szmek | d970bd6 | 2013-01-26 11:27:35 -0500 | [diff] [blame] | 163 | return _make_page(template, directive_groups, formatting) |
Zbigniew Jędrzejewski-Szmek | d9cfd69 | 2012-08-09 18:08:14 +0200 | [diff] [blame] | 164 | |
| 165 | if __name__ == '__main__': |
Zbigniew Jędrzejewski-Szmek | 1a13e31 | 2013-03-29 14:22:27 -0400 | [diff] [blame] | 166 | with open(sys.argv[1], 'wb') as f: |
Zbigniew Jędrzejewski-Szmek | 2822308 | 2020-05-07 14:31:13 +0200 | [diff] [blame] | 167 | template_path = sys.argv[2] |
| 168 | xml_files = sys.argv[3:] |
| 169 | xml = make_page(template_path, xml_files) |
| 170 | f.write(xml_print(xml)) |