blob: cc3cd5b90a6808fb4d16f2b4fea2339ae5e92f61 [file] [log] [blame]
Ken Russell67f63222016-12-18 17:50:32 -07001#!/usr/bin/python
2
3import sys
4import html5lib
5
6htmlfilename = sys.argv[1]
7htmlfile = open(htmlfilename)
8try:
9 doc = html5lib.parse(htmlfile, treebuilder="dom")
10finally:
11 htmlfile.close()
12
13def elementHasClass(el, classArg):
14 """
15 Return true if and only if classArg is one of the classes of el
16 """
17 classes = [ c for c in el.getAttribute("class").split(" ") if c is not "" ]
18 return classArg in classes
19
20def elementTextContent(el):
21 """
22 Implementation of DOM Core's .textContent
23 """
24 textContent = ""
25 for child in el.childNodes:
26 if child.nodeType == 3: # Node.TEXT_NODE
27 textContent += child.data
28 elif child.nodeType == 1: # Node.ELEMENT_NODE
29 textContent += elementTextContent(child)
30 else:
31 # Other nodes are ignored
32 pass
33 return textContent
34
35preList = doc.getElementsByTagName("pre")
36idlList = [elementTextContent(p) for p in preList if elementHasClass(p, "idl") ]
37print "\n\n".join(idlList)