blob: 307b74297b47db079b63813e4dc6a026d77d56fd [file] [log] [blame]
Christian Eglic9cd88f2021-02-15 14:31:42 +01001#! /usr/bin/python -u
2#
3# Copyright (C) 2020 Jake Kyle <jake@compassbraille.org>
4#
5# Copying and distribution of this file, with or without modification,
6# are permitted in any medium without royalty provided the copyright
7# notice and this notice are preserved. This file is offered as-is,
8# without any warranty.
9
10"""Convert an HTML file to Braille
11
12This is a tiny example that illustrates how you can use lxml to read
13HTML and then translate only the text contained in the HTML nodes.
14
15Similar scripts can be written for XML in general using lxml.etree.
16Also there is much fine grained control possible by selecting
17different elements and attributes more specifically, using xpath and
18other methods available within lxml, to do different things
19"""
20
21import textwrap
22import louis
23from lxml import html
24
25tableList = ["en-ueb-g2.ctb"]
26lineLength = 38
27fileIn = input("Please enter the input file name: ")
28fileOut = input("Please enter the output file name: ")
29
30with open(fileOut, "w") as outputFile:
31 html_root = html.parse(fileIn).getroot()
32 for head_or_body in html_root:
33 for elem in head_or_body:
34 if elem.xpath("string()").strip() != "":
35 line = elem.xpath("string()")
36 translation = louis.translateString(tableList, line, 0, 0)
37 outputFile.write(textwrap.fill(translation, lineLength))
38 outputFile.write("\n")
39
40print ("Done.")