Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 1 | """Setup utility for gcld3.""" |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 2 | |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 3 | import os |
RAMI ALRFOU | 9ce971d | 2020-08-01 23:38:49 -0700 | [diff] [blame] | 4 | import platform |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 5 | import shutil |
| 6 | import subprocess |
| 7 | import setuptools |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 8 | from setuptools.command import build_ext |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 9 | |
RAMI ALRFOU | f6d8a22 | 2020-08-05 22:48:21 -0700 | [diff] [blame^] | 10 | __version__ = '3.0.10' |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 11 | _NAME = 'gcld3' |
| 12 | |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 13 | REQUIREMENTS = ['pybind11 >= 2.5.0', 'wheel >= 0.34.2'] |
| 14 | |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 15 | PROTO_FILES = [ |
| 16 | 'src/feature_extractor.proto', |
| 17 | 'src/sentence.proto', |
| 18 | 'src/task_spec.proto', |
| 19 | ] |
| 20 | |
| 21 | SRCS = [ |
| 22 | 'src/base.cc', |
| 23 | 'src/embedding_feature_extractor.cc', |
| 24 | 'src/embedding_network.cc', |
| 25 | 'src/feature_extractor.cc', |
| 26 | 'src/feature_types.cc', |
| 27 | 'src/fml_parser.cc', |
| 28 | 'src/lang_id_nn_params.cc', |
| 29 | 'src/language_identifier_features.cc', |
| 30 | 'src/language_identifier_main.cc', |
| 31 | 'src/nnet_language_identifier.cc', |
| 32 | 'src/registry.cc', |
| 33 | 'src/relevant_script_feature.cc', |
| 34 | 'src/sentence_features.cc', |
| 35 | 'src/task_context.cc', |
| 36 | 'src/task_context_params.cc', |
| 37 | 'src/unicodetext.cc', |
| 38 | 'src/utils.cc', |
| 39 | 'src/workspace.cc', |
| 40 | 'src/script_span/fixunicodevalue.cc', |
| 41 | 'src/script_span/generated_entities.cc', |
| 42 | 'src/script_span/generated_ulscript.cc', |
| 43 | 'src/script_span/getonescriptspan.cc', |
| 44 | 'src/script_span/offsetmap.cc', |
| 45 | 'src/script_span/text_processing.cc', |
| 46 | 'src/script_span/utf8statetable.cc', |
| 47 | # These CC files have to be generated by the proto buffer compiler 'protoc' |
| 48 | 'src/cld_3/protos/feature_extractor.pb.cc', |
| 49 | 'src/cld_3/protos/sentence.pb.cc', |
| 50 | 'src/cld_3/protos/task_spec.pb.cc', |
| 51 | # pybind11 bindings |
| 52 | 'src/python/gcld3.cc', |
| 53 | ] |
| 54 | |
| 55 | |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 56 | class CompileProtos(build_ext.build_ext): |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 57 | """Compile protocol buffers via `protoc` compiler.""" |
| 58 | |
| 59 | def run(self): |
| 60 | if shutil.which('protoc') is None: |
| 61 | raise RuntimeError('Please install the proto buffer compiler.') |
| 62 | |
| 63 | # The C++ code expect the protos to be compiled under the following |
| 64 | # directory, therefore, create it if necessary. |
| 65 | compiled_protos_dir = 'src/cld_3/protos/' |
| 66 | os.makedirs(compiled_protos_dir, exist_ok=True) |
| 67 | command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src'] |
| 68 | command.extend(PROTO_FILES) |
| 69 | subprocess.run(command, check=True, cwd='./') |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 70 | build_ext.build_ext.run(self) |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 71 | |
| 72 | |
| 73 | class PyBindIncludes(object): |
| 74 | """Returns the include paths for pybind11 when needed. |
| 75 | |
| 76 | To delay the invocation of "pybind11.get_include()" until it is available |
| 77 | in the environment. This lazy evaluation allows us to install it first, then |
| 78 | import it later to determine the correct include paths. |
| 79 | """ |
| 80 | |
| 81 | def __str__(self): |
| 82 | import pybind11 # pylint: disable=g-import-not-at-top |
| 83 | return pybind11.get_include() |
| 84 | |
RAMI ALRFOU | 1274c48 | 2020-08-01 23:36:42 -0700 | [diff] [blame] | 85 | MACOS = platform.system() == "Darwin" |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 86 | ext_modules = [ |
| 87 | setuptools.Extension( |
| 88 | _NAME, |
| 89 | sorted(SRCS), |
| 90 | include_dirs=[ |
| 91 | PyBindIncludes(), |
| 92 | ], |
| 93 | libraries=['protobuf'], |
RAMI ALRFOU | 790ec4f | 2020-08-01 23:45:30 -0700 | [diff] [blame] | 94 | extra_compile_args=["-std=c++11", "-stdlib=libc++"] if MACOS else [], |
| 95 | extra_link_args=["-stdlib=libc++"] if MACOS else [], |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 96 | language='c++'), |
| 97 | ] |
| 98 | |
| 99 | DESCRIPTION = """CLD3 is a neural network model for language identification. |
| 100 | This package contains the inference code and a trained model. See |
| 101 | https://github.com/google/cld3 for more details. |
| 102 | """ |
| 103 | |
| 104 | setuptools.setup( |
| 105 | author='Rami Al-Rfou', |
| 106 | author_email='rmyeid@google.com', |
| 107 | cmdclass={ |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 108 | 'build_ext': CompileProtos, |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 109 | }, |
| 110 | ext_modules=ext_modules, |
| 111 | description='CLD3 is a neural network model for language identification.', |
| 112 | long_description=DESCRIPTION, |
| 113 | name=_NAME, |
Rami Al-Rfou | b66aba9 | 2020-07-24 20:28:04 -0700 | [diff] [blame] | 114 | setup_requires=REQUIREMENTS, |
Rami Al-Rfou | 37fa8fd | 2020-07-24 12:39:38 -0700 | [diff] [blame] | 115 | url='https://github.com/google/cld3', |
| 116 | version=__version__, |
| 117 | zip_safe=False, |
| 118 | ) |