Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Kevin Bailey | b53f0de | 2016-03-22 10:44:59 -0700 | [diff] [blame] | 2 | # Copyright 2015 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """A simple script for downloading latest dictionaries.""" |
| 7 | |
| 8 | import glob |
| 9 | import os |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 10 | from pathlib import Path |
Mike Frysinger | 1699d07 | 2022-03-01 23:42:30 -0500 | [diff] [blame] | 11 | import shutil |
Kevin Bailey | b53f0de | 2016-03-22 10:44:59 -0700 | [diff] [blame] | 12 | import sys |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 13 | import urllib.request |
| 14 | import zipfile |
Kevin Bailey | b53f0de | 2016-03-22 10:44:59 -0700 | [diff] [blame] | 15 | |
| 16 | |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 17 | DIR = Path(__file__).resolve().parent |
Mike Frysinger | 1699d07 | 2022-03-01 23:42:30 -0500 | [diff] [blame] | 18 | CACHE_DIR = DIR / "cache" |
Chris Nardi | a9bac57 | 2017-10-14 16:47:23 -0400 | [diff] [blame] | 19 | |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 20 | |
| 21 | DICTIONARIES = ( |
Mike Frysinger | ffe6214 | 2022-02-28 02:03:44 -0500 | [diff] [blame] | 22 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 23 | "hunspell-en_US-2020.12.07.zip", |
| 24 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 25 | "hunspell-en_CA-2020.12.07.zip", |
| 26 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 27 | "hunspell-en_GB-ise-2020.12.07.zip", |
| 28 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 29 | "hunspell-en_GB-ize-2020.12.07.zip", |
| 30 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 31 | "hunspell-en_AU-2020.12.07.zip", |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 32 | "https://github.com/b00f/lilak/releases/latest/download/fa-IR.zip", |
Mike Frysinger | fb8cec9 | 2022-03-02 16:35:18 -0500 | [diff] [blame] | 33 | "https://github.com/brown-uk/dict_uk/releases/v5.6.0/download/" |
| 34 | "hunspell-uk_UA_5.6.0.zip", |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 35 | ) |
| 36 | |
| 37 | |
| 38 | def main(argv): |
| 39 | if argv: |
| 40 | sys.exit(f"{__file__}: script takes no args") |
| 41 | os.chdir(DIR) |
| 42 | |
Mike Frysinger | 1699d07 | 2022-03-01 23:42:30 -0500 | [diff] [blame] | 43 | CACHE_DIR.mkdir(exist_ok=True) |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 44 | |
Mike Frysinger | 1699d07 | 2022-03-01 23:42:30 -0500 | [diff] [blame] | 45 | for url in DICTIONARIES: |
| 46 | cache = CACHE_DIR / url.rsplit("/", 1)[1] |
| 47 | if not cache.exists(): |
| 48 | print(f"Downloading {url} to cache {cache}") |
| 49 | tmp = cache.with_suffix(".tmp") |
| 50 | with urllib.request.urlopen(url) as response: |
| 51 | tmp.write_bytes(response.read()) |
| 52 | tmp.rename(cache) |
| 53 | |
| 54 | print(f"Extracting {cache.name}") |
| 55 | zipfile.ZipFile(cache).extractall() |
| 56 | |
| 57 | for name in glob.glob("*en_GB-ise*"): |
| 58 | os.rename(name, name.replace("-ise", "")) |
| 59 | for name in glob.glob("*en_GB-ize*"): |
| 60 | os.rename(name, name.replace("-ize", "_oxendict")) |
| 61 | for name in glob.glob("fa-IR/*fa-IR.*"): |
| 62 | os.rename(name, os.path.basename(name.replace("-", "_"))) |
| 63 | shutil.rmtree("fa-IR") |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 64 | |
Mike Frysinger | fb8cec9 | 2022-03-02 16:35:18 -0500 | [diff] [blame] | 65 | # Need to remove IGNORE as our tools don't support it. |
| 66 | file = DIR / 'uk_UA.aff' |
| 67 | lines = file.read_bytes().splitlines(keepends=True) |
| 68 | lines.remove('IGNORE ́\n'.encode('utf-8')) |
| 69 | file.write_bytes(b''.join(lines)) |
| 70 | |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 71 | return 0 |
| 72 | |
Kevin Bailey | b53f0de | 2016-03-22 10:44:59 -0700 | [diff] [blame] | 73 | |
| 74 | if __name__ == "__main__": |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 75 | sys.exit(main(sys.argv[1:])) |