blob: bbdc5572f53a968ae41ecf5bbb8d5c6275628846 [file] [log] [blame]
Jörg Thalheim3e67e5c2017-05-01 02:26:56 +02001#!/usr/bin/env python3
Yu Watanabedb9ecf02020-11-09 13:23:58 +09002# SPDX-License-Identifier: LGPL-2.1-or-later
Zbigniew Jędrzejewski-Szmeke7098b62012-11-13 18:39:18 +01003
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +02004import sys
5import collections
Zbigniew Jędrzejewski-Szmekccc9a4f2013-01-26 10:47:16 -05006import re
Zbigniew Jędrzejewski-Szmek1c6c3ef2017-07-02 20:26:32 -04007from xml_helper import xml_parse, xml_print, tree
Zbigniew Jędrzejewski-Szmek827f70e2013-05-29 22:31:20 -04008from copy import deepcopy
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +02009
Zbigniew Jędrzejewski-Szmek0acfdd62013-01-14 21:34:19 -050010COLOPHON = '''\
11This index contains {count} entries in {sections} sections,
12referring to {pages} individual manual pages.
13'''
14
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -050015def _extract_directives(directive_groups, formatting, page):
Zbigniew Jędrzejewski-Szmek1a13e312013-03-29 14:22:27 -040016 t = xml_parse(page)
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +020017 section = t.find('./refmeta/manvolnum').text
18 pagename = t.find('./refmeta/refentrytitle').text
Zbigniew Jędrzejewski-Szmeka4e0b942013-02-13 21:51:31 -050019
20 storopt = directive_groups['options']
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +020021 for variablelist in t.iterfind('.//variablelist'):
Zbigniew Jędrzejewski-Szmekccc9a4f2013-01-26 10:47:16 -050022 klass = variablelist.attrib.get('class')
Jérémy Rosenacbfdec2020-04-14 20:15:19 +020023 searchpath = variablelist.attrib.get('xpath','./varlistentry/term/varname')
Zbigniew Jędrzejewski-Szmekccc9a4f2013-01-26 10:47:16 -050024 storvar = directive_groups[klass or 'miscellaneous']
Zbigniew Jędrzejewski-Szmekccc9a4f2013-01-26 10:47:16 -050025 # <option>s go in OPTIONS, unless class is specified
Jérémy Rosenacbfdec2020-04-14 20:15:19 +020026 for xpath, stor in ((searchpath, storvar),
Zbigniew Jędrzejewski-Szmekccc9a4f2013-01-26 10:47:16 -050027 ('./varlistentry/term/option',
28 storvar if klass else storopt)):
29 for name in variablelist.iterfind(xpath):
30 text = re.sub(r'([= ]).*', r'\1', name.text).rstrip()
Zbigniew Jędrzejewski-Szmek8c6c56c2019-11-21 22:02:07 +010031 if text.startswith('-'):
32 # for options, merge options with and without mandatory arg
33 text = text.partition('=')[0]
Zbigniew Jędrzejewski-Szmekccc9a4f2013-01-26 10:47:16 -050034 stor[text].append((pagename, section))
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -050035 if text not in formatting:
36 # use element as formatted display
Zbigniew Jędrzejewski-Szmekf8b68532019-11-21 20:33:26 +010037 if name.text[-1] in "= '":
Zbigniew Jędrzejewski-Szmek699ad6c2013-02-27 21:31:02 -050038 name.clear()
39 else:
40 name.tail = ''
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -050041 name.text = text
42 formatting[text] = name
Jérémy Rosen8906e262020-04-18 19:48:13 +020043 extra = variablelist.attrib.get('extra-ref')
44 if extra:
45 stor[extra].append((pagename, section))
46 if extra not in formatting:
47 elt = tree.Element("varname")
48 elt.text= extra
49 formatting[extra] = elt
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +020050
Zbigniew Jędrzejewski-Szmeka4e0b942013-02-13 21:51:31 -050051 storfile = directive_groups['filenames']
Zbigniew Jędrzejewski-Szmek845c5322013-05-03 00:55:20 -040052 for xpath, absolute_only in (('.//refsynopsisdiv//filename', False),
53 ('.//refsynopsisdiv//command', False),
54 ('.//filename', True)):
Zbigniew Jędrzejewski-Szmeka4e0b942013-02-13 21:51:31 -050055 for name in t.iterfind(xpath):
Zbigniew Jędrzejewski-Szmek845c5322013-05-03 00:55:20 -040056 if absolute_only and not (name.text and name.text.startswith('/')):
57 continue
Zbigniew Jędrzejewski-Szmekb0343f82019-11-21 20:22:12 +010058 if name.attrib.get('index') == 'false':
Zbigniew Jędrzejewski-Szmek845c5322013-05-03 00:55:20 -040059 continue
Zbigniew Jędrzejewski-Szmeka4e0b942013-02-13 21:51:31 -050060 name.tail = ''
61 if name.text:
Zbigniew Jędrzejewski-Szmek845c5322013-05-03 00:55:20 -040062 if name.text.endswith('*'):
63 name.text = name.text[:-1]
Zbigniew Jędrzejewski-Szmeka4e0b942013-02-13 21:51:31 -050064 if not name.text.startswith('.'):
65 text = name.text.partition(' ')[0]
66 if text != name.text:
67 name.clear()
68 name.text = text
Zbigniew Jędrzejewski-Szmek845c5322013-05-03 00:55:20 -040069 if text.endswith('/'):
70 text = text[:-1]
Zbigniew Jędrzejewski-Szmeka4e0b942013-02-13 21:51:31 -050071 storfile[text].append((pagename, section))
72 if text not in formatting:
73 # use element as formatted display
74 formatting[text] = name
75 else:
76 text = ' '.join(name.itertext())
77 storfile[text].append((pagename, section))
78 formatting[text] = name
79
Zbigniew Jędrzejewski-Szmek785a51e2013-06-26 19:48:19 -040080 storfile = directive_groups['constants']
81 for name in t.iterfind('.//constant'):
Zbigniew Jędrzejewski-Szmekb0343f82019-11-21 20:22:12 +010082 if name.attrib.get('index') == 'false':
Zbigniew Jędrzejewski-Szmek785a51e2013-06-26 19:48:19 -040083 continue
84 name.tail = ''
85 if name.text.startswith('('): # a cast, strip it
86 name.text = name.text.partition(' ')[2]
87 storfile[name.text].append((pagename, section))
88 formatting[name.text] = name
89
Zbigniew Jędrzejewski-Szmek0d525a32020-05-07 14:59:24 +020090 storfile = directive_groups['specifiers']
91 for name in t.iterfind(".//table[@class='specifiers']//entry/literal"):
92 if name.text[0] != '%' or name.getparent().text is not None:
93 continue
94 if name.attrib.get('index') == 'false':
95 continue
96 storfile[name.text].append((pagename, section))
97 formatting[name.text] = name
Zbigniew Jędrzejewski-Szmek6dbf4022020-05-07 15:05:29 +020098 for name in t.iterfind(".//literal[@class='specifiers']"):
99 storfile[name.text].append((pagename, section))
100 formatting[name.text] = name
Zbigniew Jędrzejewski-Szmek0d525a32020-05-07 14:59:24 +0200101
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -0500102def _make_section(template, name, directives, formatting):
Zbigniew Jędrzejewski-Szmekeeb019b2013-01-14 21:02:49 -0500103 varlist = template.find(".//*[@id='{}']".format(name))
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200104 for varname, manpages in sorted(directives.items()):
105 entry = tree.SubElement(varlist, 'varlistentry')
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -0500106 term = tree.SubElement(entry, 'term')
Zbigniew Jędrzejewski-Szmek827f70e2013-05-29 22:31:20 -0400107 display = deepcopy(formatting[varname])
108 term.append(display)
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -0500109
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200110 para = tree.SubElement(tree.SubElement(entry, 'listitem'), 'para')
111
112 b = None
Zbigniew Jędrzejewski-Szmekccc9a4f2013-01-26 10:47:16 -0500113 for manpage, manvolume in sorted(set(manpages)):
Zbigniew Jędrzejewski-Szmek827f70e2013-05-29 22:31:20 -0400114 if b is not None:
115 b.tail = ', '
116 b = tree.SubElement(para, 'citerefentry')
117 c = tree.SubElement(b, 'refentrytitle')
118 c.text = manpage
Zbigniew Jędrzejewski-Szmek958caa52015-11-22 23:43:36 -0500119 c.attrib['target'] = varname
Zbigniew Jędrzejewski-Szmek827f70e2013-05-29 22:31:20 -0400120 d = tree.SubElement(b, 'manvolnum')
121 d.text = manvolume
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200122 entry.tail = '\n\n'
123
Zbigniew Jędrzejewski-Szmek0acfdd62013-01-14 21:34:19 -0500124def _make_colophon(template, groups):
125 count = 0
126 pages = set()
127 for group in groups:
128 count += len(group)
129 for pagelist in group.values():
130 pages |= set(pagelist)
131
132 para = template.find(".//para[@id='colophon']")
133 para.text = COLOPHON.format(count=count,
134 sections=len(groups),
135 pages=len(pages))
136
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -0500137def _make_page(template, directive_groups, formatting):
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200138 """Create an XML tree from directive_groups.
139
140 directive_groups = {
141 'class': {'variable': [('manpage', 'manvolume'), ...],
142 'variable2': ...},
143 ...
144 }
145 """
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200146 for name, directives in directive_groups.items():
Zbigniew Jędrzejewski-Szmek827f70e2013-05-29 22:31:20 -0400147 _make_section(template, name, directives, formatting)
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200148
Zbigniew Jędrzejewski-Szmek0acfdd62013-01-14 21:34:19 -0500149 _make_colophon(template, directive_groups.values())
150
Zbigniew Jędrzejewski-Szmekeeb019b2013-01-14 21:02:49 -0500151 return template
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200152
Zbigniew Jędrzejewski-Szmek28223082020-05-07 14:31:13 +0200153def make_page(template_path, xml_files):
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200154 "Extract directives from xml_files and return XML index tree."
Zbigniew Jędrzejewski-Szmek28223082020-05-07 14:31:13 +0200155 template = xml_parse(template_path)
Zbigniew Jędrzejewski-Szmekeeb019b2013-01-14 21:02:49 -0500156 names = [vl.get('id') for vl in template.iterfind('.//variablelist')]
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200157 directive_groups = {name:collections.defaultdict(list)
Zbigniew Jędrzejewski-Szmekeeb019b2013-01-14 21:02:49 -0500158 for name in names}
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -0500159 formatting = {}
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200160 for page in xml_files:
Zbigniew Jędrzejewski-Szmekccc9a4f2013-01-26 10:47:16 -0500161 try:
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -0500162 _extract_directives(directive_groups, formatting, page)
Zbigniew Jędrzejewski-Szmekccc9a4f2013-01-26 10:47:16 -0500163 except Exception:
164 raise ValueError("failed to process " + page)
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200165
Zbigniew Jędrzejewski-Szmekd970bd62013-01-26 11:27:35 -0500166 return _make_page(template, directive_groups, formatting)
Zbigniew Jędrzejewski-Szmekd9cfd692012-08-09 18:08:14 +0200167
168if __name__ == '__main__':
Zbigniew Jędrzejewski-Szmek1a13e312013-03-29 14:22:27 -0400169 with open(sys.argv[1], 'wb') as f:
Zbigniew Jędrzejewski-Szmek28223082020-05-07 14:31:13 +0200170 template_path = sys.argv[2]
171 xml_files = sys.argv[3:]
172 xml = make_page(template_path, xml_files)
173 f.write(xml_print(xml))