Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Kevin Bailey | b53f0de | 2016-03-22 10:44:59 -0700 | [diff] [blame] | 2 | # Copyright 2015 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """A simple script for downloading latest dictionaries.""" |
| 7 | |
| 8 | import glob |
| 9 | import os |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 10 | from pathlib import Path |
Mike Frysinger | 1699d07 | 2022-03-01 23:42:30 -0500 | [diff] [blame] | 11 | import shutil |
Kevin Bailey | b53f0de | 2016-03-22 10:44:59 -0700 | [diff] [blame] | 12 | import sys |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 13 | import urllib.request |
| 14 | import zipfile |
Kevin Bailey | b53f0de | 2016-03-22 10:44:59 -0700 | [diff] [blame] | 15 | |
| 16 | |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 17 | DIR = Path(__file__).resolve().parent |
Mike Frysinger | 1699d07 | 2022-03-01 23:42:30 -0500 | [diff] [blame] | 18 | CACHE_DIR = DIR / "cache" |
Chris Nardi | a9bac57 | 2017-10-14 16:47:23 -0400 | [diff] [blame] | 19 | |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 20 | |
| 21 | DICTIONARIES = ( |
Mike Frysinger | ffe6214 | 2022-02-28 02:03:44 -0500 | [diff] [blame] | 22 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 23 | "hunspell-en_US-2020.12.07.zip", |
| 24 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 25 | "hunspell-en_CA-2020.12.07.zip", |
| 26 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 27 | "hunspell-en_GB-ise-2020.12.07.zip", |
| 28 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 29 | "hunspell-en_GB-ize-2020.12.07.zip", |
| 30 | "https://sourceforge.net/projects/wordlist/files/speller/2020.12.07/" |
| 31 | "hunspell-en_AU-2020.12.07.zip", |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 32 | "https://github.com/b00f/lilak/releases/latest/download/fa-IR.zip", |
| 33 | # NOTE: need to remove IGNORE from uk_UA.aff |
Mike Frysinger | 1699d07 | 2022-03-01 23:42:30 -0500 | [diff] [blame] | 34 | # TODO: This archive no longer exists. |
| 35 | # "https://github.com/brown-uk/dict_uk/releases/latest/download/" |
| 36 | # "hunspell-uk_UA.zip", |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 37 | ) |
| 38 | |
| 39 | |
| 40 | def main(argv): |
| 41 | if argv: |
| 42 | sys.exit(f"{__file__}: script takes no args") |
| 43 | os.chdir(DIR) |
| 44 | |
Mike Frysinger | 1699d07 | 2022-03-01 23:42:30 -0500 | [diff] [blame] | 45 | CACHE_DIR.mkdir(exist_ok=True) |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 46 | |
Mike Frysinger | 1699d07 | 2022-03-01 23:42:30 -0500 | [diff] [blame] | 47 | for url in DICTIONARIES: |
| 48 | cache = CACHE_DIR / url.rsplit("/", 1)[1] |
| 49 | if not cache.exists(): |
| 50 | print(f"Downloading {url} to cache {cache}") |
| 51 | tmp = cache.with_suffix(".tmp") |
| 52 | with urllib.request.urlopen(url) as response: |
| 53 | tmp.write_bytes(response.read()) |
| 54 | tmp.rename(cache) |
| 55 | |
| 56 | print(f"Extracting {cache.name}") |
| 57 | zipfile.ZipFile(cache).extractall() |
| 58 | |
| 59 | for name in glob.glob("*en_GB-ise*"): |
| 60 | os.rename(name, name.replace("-ise", "")) |
| 61 | for name in glob.glob("*en_GB-ize*"): |
| 62 | os.rename(name, name.replace("-ize", "_oxendict")) |
| 63 | for name in glob.glob("fa-IR/*fa-IR.*"): |
| 64 | os.rename(name, os.path.basename(name.replace("-", "_"))) |
| 65 | shutil.rmtree("fa-IR") |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 66 | |
| 67 | return 0 |
| 68 | |
Kevin Bailey | b53f0de | 2016-03-22 10:44:59 -0700 | [diff] [blame] | 69 | |
| 70 | if __name__ == "__main__": |
Mike Frysinger | 587bbd0 | 2022-02-28 01:52:16 -0500 | [diff] [blame] | 71 | sys.exit(main(sys.argv[1:])) |