Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 1 | """Setup utility for gCLD3.""" |
| 2 | |
| 3 | from distutils.command import build |
| 4 | import os |
| 5 | import shutil |
| 6 | import subprocess |
| 7 | import setuptools |
| 8 | |
| 9 | __version__ = '3.0.0' |
| 10 | _NAME = 'gcld3' |
| 11 | |
| 12 | PROTO_FILES = [ |
| 13 | 'src/feature_extractor.proto', |
| 14 | 'src/sentence.proto', |
| 15 | 'src/task_spec.proto', |
| 16 | ] |
| 17 | |
| 18 | SRCS = [ |
| 19 | 'src/base.cc', |
| 20 | 'src/embedding_feature_extractor.cc', |
| 21 | 'src/embedding_network.cc', |
| 22 | 'src/feature_extractor.cc', |
| 23 | 'src/feature_types.cc', |
| 24 | 'src/fml_parser.cc', |
| 25 | 'src/lang_id_nn_params.cc', |
| 26 | 'src/language_identifier_features.cc', |
| 27 | 'src/language_identifier_main.cc', |
| 28 | 'src/nnet_language_identifier.cc', |
| 29 | 'src/registry.cc', |
| 30 | 'src/relevant_script_feature.cc', |
| 31 | 'src/sentence_features.cc', |
| 32 | 'src/task_context.cc', |
| 33 | 'src/task_context_params.cc', |
| 34 | 'src/unicodetext.cc', |
| 35 | 'src/utils.cc', |
| 36 | 'src/workspace.cc', |
| 37 | 'src/script_span/fixunicodevalue.cc', |
| 38 | 'src/script_span/generated_entities.cc', |
| 39 | 'src/script_span/generated_ulscript.cc', |
| 40 | 'src/script_span/getonescriptspan.cc', |
| 41 | 'src/script_span/offsetmap.cc', |
| 42 | 'src/script_span/text_processing.cc', |
| 43 | 'src/script_span/utf8statetable.cc', |
| 44 | # These CC files have to be generated by the proto buffer compiler 'protoc' |
| 45 | 'src/cld_3/protos/feature_extractor.pb.cc', |
| 46 | 'src/cld_3/protos/sentence.pb.cc', |
| 47 | 'src/cld_3/protos/task_spec.pb.cc', |
| 48 | # pybind11 bindings |
| 49 | 'src/python/gcld3.cc', |
| 50 | ] |
| 51 | |
| 52 | |
| 53 | class CompileProtos(build.build): |
| 54 | """Compile protocol buffers via `protoc` compiler.""" |
| 55 | |
| 56 | def run(self): |
| 57 | if shutil.which('protoc') is None: |
| 58 | raise RuntimeError('Please install the proto buffer compiler.') |
| 59 | |
| 60 | # The C++ code expect the protos to be compiled under the following |
| 61 | # directory, therefore, create it if necessary. |
| 62 | compiled_protos_dir = 'src/cld_3/protos/' |
| 63 | os.makedirs(compiled_protos_dir, exist_ok=True) |
| 64 | command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src'] |
| 65 | command.extend(PROTO_FILES) |
| 66 | subprocess.run(command, check=True, cwd='./') |
| 67 | build.build.run(self) |
| 68 | |
| 69 | |
| 70 | class PyBindIncludes(object): |
| 71 | """Returns the include paths for pybind11 when needed. |
| 72 | |
| 73 | To delay the invocation of "pybind11.get_include()" until it is available |
| 74 | in the environment. This lazy evaluation allows us to install it first, then |
| 75 | import it later to determine the correct include paths. |
| 76 | """ |
| 77 | |
| 78 | def __str__(self): |
| 79 | import pybind11 # pylint: disable=g-import-not-at-top |
| 80 | return pybind11.get_include() |
| 81 | |
| 82 | |
| 83 | ext_modules = [ |
| 84 | setuptools.Extension( |
| 85 | _NAME, |
| 86 | sorted(SRCS), |
| 87 | include_dirs=[ |
| 88 | PyBindIncludes(), |
| 89 | ], |
| 90 | libraries=['protobuf'], |
| 91 | language='c++'), |
| 92 | ] |
| 93 | |
| 94 | DESCRIPTION = """CLD3 is a neural network model for language identification. |
| 95 | This package contains the inference code and a trained model. See |
| 96 | https://github.com/google/cld3 for more details. |
| 97 | """ |
| 98 | |
| 99 | setuptools.setup( |
| 100 | author='Rami Al-Rfou', |
| 101 | author_email='rmyeid@google.com', |
| 102 | cmdclass={ |
| 103 | 'build': CompileProtos, |
| 104 | }, |
| 105 | ext_modules=ext_modules, |
| 106 | description='CLD3 is a neural network model for language identification.', |
| 107 | long_description=DESCRIPTION, |
| 108 | name=_NAME, |
| 109 | setup_requires=['pybind11>=2.5.0'], |
| 110 | url='https://github.com/google/cld3', |
| 111 | version=__version__, |
| 112 | zip_safe=False, |
| 113 | ) |