Christian Egli | c9cd88f | 2021-02-15 14:31:42 +0100 | [diff] [blame] | 1 | #! /usr/bin/python -u |
| 2 | # |
| 3 | # Copyright (C) 2020 Jake Kyle <jake@compassbraille.org> |
| 4 | # |
| 5 | # Copying and distribution of this file, with or without modification, |
| 6 | # are permitted in any medium without royalty provided the copyright |
| 7 | # notice and this notice are preserved. This file is offered as-is, |
| 8 | # without any warranty. |
| 9 | |
| 10 | """Convert an HTML file to Braille |
| 11 | |
| 12 | This is a tiny example that illustrates how you can use lxml to read |
| 13 | HTML and then translate only the text contained in the HTML nodes. |
| 14 | |
| 15 | Similar scripts can be written for XML in general using lxml.etree. |
| 16 | Also there is much fine grained control possible by selecting |
| 17 | different elements and attributes more specifically, using xpath and |
| 18 | other methods available within lxml, to do different things |
| 19 | """ |
| 20 | |
| 21 | import textwrap |
| 22 | import louis |
| 23 | from lxml import html |
| 24 | |
| 25 | tableList = ["en-ueb-g2.ctb"] |
| 26 | lineLength = 38 |
| 27 | fileIn = input("Please enter the input file name: ") |
| 28 | fileOut = input("Please enter the output file name: ") |
| 29 | |
| 30 | with open(fileOut, "w") as outputFile: |
| 31 | html_root = html.parse(fileIn).getroot() |
| 32 | for head_or_body in html_root: |
| 33 | for elem in head_or_body: |
| 34 | if elem.xpath("string()").strip() != "": |
| 35 | line = elem.xpath("string()") |
| 36 | translation = louis.translateString(tableList, line, 0, 0) |
| 37 | outputFile.write(textwrap.fill(translation, lineLength)) |
| 38 | outputFile.write("\n") |
| 39 | |
| 40 | print ("Done.") |