blob: c320d5e90b5f4c3ad40ce2ce6e47473d1c330fba [file] [log] [blame]
Mike Frysinger587bbd02022-02-28 01:52:16 -05001#!/usr/bin/env python3
Kevin Baileyb53f0de2016-03-22 10:44:59 -07002# Copyright 2015 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A simple script for downloading latest dictionaries."""
7
8import glob
9import os
Mike Frysinger587bbd02022-02-28 01:52:16 -050010from pathlib import Path
Mike Frysinger1699d072022-03-01 23:42:30 -050011import shutil
Kevin Baileyb53f0de2016-03-22 10:44:59 -070012import sys
Mike Frysinger587bbd02022-02-28 01:52:16 -050013import urllib.request
14import zipfile
Kevin Baileyb53f0de2016-03-22 10:44:59 -070015
16
Mike Frysinger587bbd02022-02-28 01:52:16 -050017DIR = Path(__file__).resolve().parent
Mike Frysinger1699d072022-03-01 23:42:30 -050018CACHE_DIR = DIR / "cache"
Chris Nardia9bac572017-10-14 16:47:23 -040019
Mike Frysinger587bbd02022-02-28 01:52:16 -050020
21DICTIONARIES = (
Mike Frysingerffe62142022-02-28 02:03:44 -050022 "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/"
23 "hunspell-en_US-2020.12.07.zip",
24 "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/"
25 "hunspell-en_CA-2020.12.07.zip",
26 "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/"
27 "hunspell-en_GB-ise-2020.12.07.zip",
28 "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/"
29 "hunspell-en_GB-ize-2020.12.07.zip",
30 "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/"
31 "hunspell-en_AU-2020.12.07.zip",
Mike Frysinger587bbd02022-02-28 01:52:16 -050032 "https://github.com/b00f/lilak/releases/latest/download/fa-IR.zip",
Mike Frysingerfb8cec92022-03-02 16:35:18 -050033 "https://github.com/brown-uk/dict_uk/releases/v5.6.0/download/"
34 "hunspell-uk_UA_5.6.0.zip",
Mike Frysinger587bbd02022-02-28 01:52:16 -050035)
36
37
38def main(argv):
39 if argv:
40 sys.exit(f"{__file__}: script takes no args")
41 os.chdir(DIR)
42
Mike Frysinger1699d072022-03-01 23:42:30 -050043 CACHE_DIR.mkdir(exist_ok=True)
Mike Frysinger587bbd02022-02-28 01:52:16 -050044
Mike Frysinger1699d072022-03-01 23:42:30 -050045 for url in DICTIONARIES:
46 cache = CACHE_DIR / url.rsplit("/", 1)[1]
47 if not cache.exists():
48 print(f"Downloading {url} to cache {cache}")
49 tmp = cache.with_suffix(".tmp")
50 with urllib.request.urlopen(url) as response:
51 tmp.write_bytes(response.read())
52 tmp.rename(cache)
53
54 print(f"Extracting {cache.name}")
55 zipfile.ZipFile(cache).extractall()
56
57 for name in glob.glob("*en_GB-ise*"):
58 os.rename(name, name.replace("-ise", ""))
59 for name in glob.glob("*en_GB-ize*"):
60 os.rename(name, name.replace("-ize", "_oxendict"))
61 for name in glob.glob("fa-IR/*fa-IR.*"):
62 os.rename(name, os.path.basename(name.replace("-", "_")))
63 shutil.rmtree("fa-IR")
Mike Frysinger587bbd02022-02-28 01:52:16 -050064
Mike Frysingerfb8cec92022-03-02 16:35:18 -050065 # Need to remove IGNORE as our tools don't support it.
66 file = DIR / 'uk_UA.aff'
67 lines = file.read_bytes().splitlines(keepends=True)
68 lines.remove('IGNORE ́\n'.encode('utf-8'))
69 file.write_bytes(b''.join(lines))
70
Mike Frysinger587bbd02022-02-28 01:52:16 -050071 return 0
72
Kevin Baileyb53f0de2016-03-22 10:44:59 -070073
74if __name__ == "__main__":
Mike Frysinger587bbd02022-02-28 01:52:16 -050075 sys.exit(main(sys.argv[1:]))