python/examples/translate-html.py - chromium.googlesource.com/external/liblouis-github - Gitiles

 #! /usr/bin/python -u
 #
 # Copyright (C) 2020 Jake Kyle <jake@compassbraille.org>
 #
 # Copying and distribution of this file, with or without modification,
 # are permitted in any medium without royalty provided the copyright
 # notice and this notice are preserved. This file is offered as-is,
 # without any warranty.

 """Convert an HTML file to Braille

 This is a tiny example that illustrates how you can use lxml to read
 HTML and then translate only the text contained in the HTML nodes.

 Similar scripts can be written for XML in general using lxml.etree.
 Also there is much fine grained control possible by selecting
 different elements and attributes more specifically, using xpath and
 other methods available within lxml, to do different things
 """

 import textwrap
 import louis
 from lxml import html

 tableList = ["en-ueb-g2.ctb"]
 lineLength = 38
 fileIn = input("Please enter the input file name: ")
 fileOut = input("Please enter the output file name: ")

 with open(fileOut, "w") as outputFile:
     html_root = html.parse(fileIn).getroot()
     for head_or_body in html_root:
         for elem in head_or_body:
             if elem.xpath("string()").strip() != "":
                 line = elem.xpath("string()")
                 translation = louis.translateString(tableList, line, 0, 0)
                 outputFile.write(textwrap.fill(translation, lineLength))
                 outputFile.write("\n")

 print ("Done.")
	#! /usr/bin/python -u
	#
	# Copyright (C) 2020 Jake Kyle <jake@compassbraille.org>
	#
	# Copying and distribution of this file, with or without modification,
	# are permitted in any medium without royalty provided the copyright
	# notice and this notice are preserved. This file is offered as-is,
	# without any warranty.

	"""Convert an HTML file to Braille

	This is a tiny example that illustrates how you can use lxml to read
	HTML and then translate only the text contained in the HTML nodes.

	Similar scripts can be written for XML in general using lxml.etree.
	Also there is much fine grained control possible by selecting
	different elements and attributes more specifically, using xpath and
	other methods available within lxml, to do different things
	"""

	import textwrap
	import louis
	from lxml import html

	tableList = ["en-ueb-g2.ctb"]
	lineLength = 38
	fileIn = input("Please enter the input file name: ")
	fileOut = input("Please enter the output file name: ")

	with open(fileOut, "w") as outputFile:
	html_root = html.parse(fileIn).getroot()
	for head_or_body in html_root:
	for elem in head_or_body:
	if elem.xpath("string()").strip() != "":
	line = elem.xpath("string()")
	translation = louis.translateString(tableList, line, 0, 0)
	outputFile.write(textwrap.fill(translation, lineLength))
	outputFile.write("\n")

	print ("Done.")