blob: 29fb2003b949e33d4cbea9e9eb470ee27dc974a0 [file] [log] [blame]
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07001"""Setup utility for gcld3."""
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07002
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07003import os
4import shutil
5import subprocess
6import setuptools
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07007from setuptools.command import build_ext
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07008
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07009__version__ = '3.0.1'
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070010_NAME = 'gcld3'
11
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070012REQUIREMENTS = ['pybind11 >= 2.5.0', 'wheel >= 0.34.2']
13
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070014PROTO_FILES = [
15 'src/feature_extractor.proto',
16 'src/sentence.proto',
17 'src/task_spec.proto',
18]
19
20SRCS = [
21 'src/base.cc',
22 'src/embedding_feature_extractor.cc',
23 'src/embedding_network.cc',
24 'src/feature_extractor.cc',
25 'src/feature_types.cc',
26 'src/fml_parser.cc',
27 'src/lang_id_nn_params.cc',
28 'src/language_identifier_features.cc',
29 'src/language_identifier_main.cc',
30 'src/nnet_language_identifier.cc',
31 'src/registry.cc',
32 'src/relevant_script_feature.cc',
33 'src/sentence_features.cc',
34 'src/task_context.cc',
35 'src/task_context_params.cc',
36 'src/unicodetext.cc',
37 'src/utils.cc',
38 'src/workspace.cc',
39 'src/script_span/fixunicodevalue.cc',
40 'src/script_span/generated_entities.cc',
41 'src/script_span/generated_ulscript.cc',
42 'src/script_span/getonescriptspan.cc',
43 'src/script_span/offsetmap.cc',
44 'src/script_span/text_processing.cc',
45 'src/script_span/utf8statetable.cc',
46 # These CC files have to be generated by the proto buffer compiler 'protoc'
47 'src/cld_3/protos/feature_extractor.pb.cc',
48 'src/cld_3/protos/sentence.pb.cc',
49 'src/cld_3/protos/task_spec.pb.cc',
50 # pybind11 bindings
51 'src/python/gcld3.cc',
52]
53
54
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070055class CompileProtos(build_ext.build_ext):
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070056 """Compile protocol buffers via `protoc` compiler."""
57
58 def run(self):
59 if shutil.which('protoc') is None:
60 raise RuntimeError('Please install the proto buffer compiler.')
61
62 # The C++ code expect the protos to be compiled under the following
63 # directory, therefore, create it if necessary.
64 compiled_protos_dir = 'src/cld_3/protos/'
65 os.makedirs(compiled_protos_dir, exist_ok=True)
66 command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src']
67 command.extend(PROTO_FILES)
68 subprocess.run(command, check=True, cwd='./')
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070069 build_ext.build_ext.run(self)
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070070
71
72class PyBindIncludes(object):
73 """Returns the include paths for pybind11 when needed.
74
75 To delay the invocation of "pybind11.get_include()" until it is available
76 in the environment. This lazy evaluation allows us to install it first, then
77 import it later to determine the correct include paths.
78 """
79
80 def __str__(self):
81 import pybind11 # pylint: disable=g-import-not-at-top
82 return pybind11.get_include()
83
84
85ext_modules = [
86 setuptools.Extension(
87 _NAME,
88 sorted(SRCS),
89 include_dirs=[
90 PyBindIncludes(),
91 ],
92 libraries=['protobuf'],
93 language='c++'),
94]
95
96DESCRIPTION = """CLD3 is a neural network model for language identification.
97This package contains the inference code and a trained model. See
98https://github.com/google/cld3 for more details.
99"""
100
101setuptools.setup(
102 author='Rami Al-Rfou',
103 author_email='rmyeid@google.com',
104 cmdclass={
Rami Al-Rfoub66aba92020-07-24 20:28:04 -0700105 'build_ext': CompileProtos,
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -0700106 },
107 ext_modules=ext_modules,
108 description='CLD3 is a neural network model for language identification.',
109 long_description=DESCRIPTION,
110 name=_NAME,
Rami Al-Rfoub66aba92020-07-24 20:28:04 -0700111 setup_requires=REQUIREMENTS,
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -0700112 url='https://github.com/google/cld3',
113 version=__version__,
114 zip_safe=False,
115)