blob: 8312db56574dc3cd94180c587c4f83677d5d40dc [file] [log] [blame]
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07001"""Setup utility for gCLD3."""
2
3from distutils.command import build
4import os
5import shutil
6import subprocess
7import setuptools
8
9__version__ = '3.0.0'
10_NAME = 'gcld3'
11
12PROTO_FILES = [
13 'src/feature_extractor.proto',
14 'src/sentence.proto',
15 'src/task_spec.proto',
16]
17
18SRCS = [
19 'src/base.cc',
20 'src/embedding_feature_extractor.cc',
21 'src/embedding_network.cc',
22 'src/feature_extractor.cc',
23 'src/feature_types.cc',
24 'src/fml_parser.cc',
25 'src/lang_id_nn_params.cc',
26 'src/language_identifier_features.cc',
27 'src/language_identifier_main.cc',
28 'src/nnet_language_identifier.cc',
29 'src/registry.cc',
30 'src/relevant_script_feature.cc',
31 'src/sentence_features.cc',
32 'src/task_context.cc',
33 'src/task_context_params.cc',
34 'src/unicodetext.cc',
35 'src/utils.cc',
36 'src/workspace.cc',
37 'src/script_span/fixunicodevalue.cc',
38 'src/script_span/generated_entities.cc',
39 'src/script_span/generated_ulscript.cc',
40 'src/script_span/getonescriptspan.cc',
41 'src/script_span/offsetmap.cc',
42 'src/script_span/text_processing.cc',
43 'src/script_span/utf8statetable.cc',
44 # These CC files have to be generated by the proto buffer compiler 'protoc'
45 'src/cld_3/protos/feature_extractor.pb.cc',
46 'src/cld_3/protos/sentence.pb.cc',
47 'src/cld_3/protos/task_spec.pb.cc',
48 # pybind11 bindings
49 'src/python/gcld3.cc',
50]
51
52
53class CompileProtos(build.build):
54 """Compile protocol buffers via `protoc` compiler."""
55
56 def run(self):
57 if shutil.which('protoc') is None:
58 raise RuntimeError('Please install the proto buffer compiler.')
59
60 # The C++ code expect the protos to be compiled under the following
61 # directory, therefore, create it if necessary.
62 compiled_protos_dir = 'src/cld_3/protos/'
63 os.makedirs(compiled_protos_dir, exist_ok=True)
64 command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src']
65 command.extend(PROTO_FILES)
66 subprocess.run(command, check=True, cwd='./')
67 build.build.run(self)
68
69
70class PyBindIncludes(object):
71 """Returns the include paths for pybind11 when needed.
72
73 To delay the invocation of "pybind11.get_include()" until it is available
74 in the environment. This lazy evaluation allows us to install it first, then
75 import it later to determine the correct include paths.
76 """
77
78 def __str__(self):
79 import pybind11 # pylint: disable=g-import-not-at-top
80 return pybind11.get_include()
81
82
83ext_modules = [
84 setuptools.Extension(
85 _NAME,
86 sorted(SRCS),
87 include_dirs=[
88 PyBindIncludes(),
89 ],
90 libraries=['protobuf'],
91 language='c++'),
92]
93
94DESCRIPTION = """CLD3 is a neural network model for language identification.
95This package contains the inference code and a trained model. See
96https://github.com/google/cld3 for more details.
97"""
98
99setuptools.setup(
100 author='Rami Al-Rfou',
101 author_email='rmyeid@google.com',
102 cmdclass={
103 'build': CompileProtos,
104 },
105 ext_modules=ext_modules,
106 description='CLD3 is a neural network model for language identification.',
107 long_description=DESCRIPTION,
108 name=_NAME,
109 setup_requires=['pybind11>=2.5.0'],
110 url='https://github.com/google/cld3',
111 version=__version__,
112 zip_safe=False,
113)