Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 1 | """Setup utility for gcld3.""" |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 2 | |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 3 | import os |
RAMI ALRFOU | 9ce971d | 2020-08-01 23:38:49 -0700 | [diff] [blame] | 4 | import platform |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 5 | import shutil |
| 6 | import subprocess |
| 7 | import setuptools |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 8 | from setuptools.command import build_ext |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 9 | |
RAMI ALRFOU | f6d8a22 | 2020-08-05 22:48:21 -0700 | [diff] [blame] | 10 | __version__ = '3.0.10' |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 11 | _NAME = 'gcld3' |
| 12 | |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 13 | REQUIREMENTS = ['pybind11 >= 2.5.0', 'wheel >= 0.34.2'] |
| 14 | |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 15 | PROTO_FILES = [ |
| 16 | 'src/feature_extractor.proto', |
| 17 | 'src/sentence.proto', |
| 18 | 'src/task_spec.proto', |
| 19 | ] |
| 20 | |
| 21 | SRCS = [ |
| 22 | 'src/base.cc', |
| 23 | 'src/embedding_feature_extractor.cc', |
| 24 | 'src/embedding_network.cc', |
| 25 | 'src/feature_extractor.cc', |
| 26 | 'src/feature_types.cc', |
| 27 | 'src/fml_parser.cc', |
| 28 | 'src/lang_id_nn_params.cc', |
| 29 | 'src/language_identifier_features.cc', |
| 30 | 'src/language_identifier_main.cc', |
| 31 | 'src/nnet_language_identifier.cc', |
| 32 | 'src/registry.cc', |
| 33 | 'src/relevant_script_feature.cc', |
| 34 | 'src/sentence_features.cc', |
| 35 | 'src/task_context.cc', |
| 36 | 'src/task_context_params.cc', |
| 37 | 'src/unicodetext.cc', |
| 38 | 'src/utils.cc', |
| 39 | 'src/workspace.cc', |
| 40 | 'src/script_span/fixunicodevalue.cc', |
| 41 | 'src/script_span/generated_entities.cc', |
| 42 | 'src/script_span/generated_ulscript.cc', |
| 43 | 'src/script_span/getonescriptspan.cc', |
| 44 | 'src/script_span/offsetmap.cc', |
| 45 | 'src/script_span/text_processing.cc', |
| 46 | 'src/script_span/utf8statetable.cc', |
| 47 | # These CC files have to be generated by the proto buffer compiler 'protoc' |
| 48 | 'src/cld_3/protos/feature_extractor.pb.cc', |
| 49 | 'src/cld_3/protos/sentence.pb.cc', |
| 50 | 'src/cld_3/protos/task_spec.pb.cc', |
| 51 | # pybind11 bindings |
Rami Al-Rfou | d86ea2d | 2020-08-06 23:12:32 -0700 | [diff] [blame^] | 52 | 'gcld3/pybind_ext.cc', |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 53 | ] |
| 54 | |
| 55 | |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 56 | class CompileProtos(build_ext.build_ext): |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 57 | """Compile protocol buffers via `protoc` compiler.""" |
| 58 | |
| 59 | def run(self): |
| 60 | if shutil.which('protoc') is None: |
| 61 | raise RuntimeError('Please install the proto buffer compiler.') |
| 62 | |
| 63 | # The C++ code expect the protos to be compiled under the following |
| 64 | # directory, therefore, create it if necessary. |
| 65 | compiled_protos_dir = 'src/cld_3/protos/' |
| 66 | os.makedirs(compiled_protos_dir, exist_ok=True) |
| 67 | command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src'] |
| 68 | command.extend(PROTO_FILES) |
| 69 | subprocess.run(command, check=True, cwd='./') |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 70 | build_ext.build_ext.run(self) |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 71 | |
| 72 | |
| 73 | class PyBindIncludes(object): |
| 74 | """Returns the include paths for pybind11 when needed. |
| 75 | |
| 76 | To delay the invocation of "pybind11.get_include()" until it is available |
| 77 | in the environment. This lazy evaluation allows us to install it first, then |
| 78 | import it later to determine the correct include paths. |
| 79 | """ |
| 80 | |
| 81 | def __str__(self): |
| 82 | import pybind11 # pylint: disable=g-import-not-at-top |
| 83 | return pybind11.get_include() |
| 84 | |
Rami Al-Rfou | d86ea2d | 2020-08-06 23:12:32 -0700 | [diff] [blame^] | 85 | |
| 86 | MACOS = platform.system() == 'Darwin' |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 87 | ext_modules = [ |
| 88 | setuptools.Extension( |
Rami Al-Rfou | d86ea2d | 2020-08-06 23:12:32 -0700 | [diff] [blame^] | 89 | 'gcld3.pybind_ext', |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 90 | sorted(SRCS), |
| 91 | include_dirs=[ |
| 92 | PyBindIncludes(), |
| 93 | ], |
| 94 | libraries=['protobuf'], |
Rami Al-Rfou | d86ea2d | 2020-08-06 23:12:32 -0700 | [diff] [blame^] | 95 | extra_compile_args=['-std=c++11', '-stdlib=libc++'] if MACOS else [], |
| 96 | extra_link_args=['-stdlib=libc++'] if MACOS else [], |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 97 | language='c++'), |
| 98 | ] |
| 99 | |
| 100 | DESCRIPTION = """CLD3 is a neural network model for language identification. |
| 101 | This package contains the inference code and a trained model. See |
| 102 | https://github.com/google/cld3 for more details. |
| 103 | """ |
| 104 | |
| 105 | setuptools.setup( |
| 106 | author='Rami Al-Rfou', |
| 107 | author_email='rmyeid@google.com', |
| 108 | cmdclass={ |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 109 | 'build_ext': CompileProtos, |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 110 | }, |
| 111 | ext_modules=ext_modules, |
RAMI ALRFOU | ca6b49d | 2020-08-06 11:33:42 -0700 | [diff] [blame] | 112 | packages=setuptools.find_packages(), |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 113 | description='CLD3 is a neural network model for language identification.', |
| 114 | long_description=DESCRIPTION, |
| 115 | name=_NAME, |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 116 | setup_requires=REQUIREMENTS, |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 117 | url='https://github.com/google/cld3', |
| 118 | version=__version__, |
| 119 | zip_safe=False, |
| 120 | ) |