blob: 641bac8c1838413aca5af1ce9fdc225467e8628a [file] [log] [blame]
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07001"""Setup utility for gcld3."""
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07002
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07003import os
4import shutil
5import subprocess
6import setuptools
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07007from setuptools.command import build_ext
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07008
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07009__version__ = '3.0.1'
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070010_NAME = 'gcld3'
11
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070012REQUIREMENTS = ['pybind11 >= 2.5.0', 'wheel >= 0.34.2']
13
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070014PROTO_FILES = [
15 'src/feature_extractor.proto',
16 'src/sentence.proto',
17 'src/task_spec.proto',
18]
19
20SRCS = [
21 'src/base.cc',
22 'src/embedding_feature_extractor.cc',
23 'src/embedding_network.cc',
24 'src/feature_extractor.cc',
25 'src/feature_types.cc',
26 'src/fml_parser.cc',
27 'src/lang_id_nn_params.cc',
28 'src/language_identifier_features.cc',
29 'src/language_identifier_main.cc',
30 'src/nnet_language_identifier.cc',
31 'src/registry.cc',
32 'src/relevant_script_feature.cc',
33 'src/sentence_features.cc',
34 'src/task_context.cc',
35 'src/task_context_params.cc',
36 'src/unicodetext.cc',
37 'src/utils.cc',
38 'src/workspace.cc',
39 'src/script_span/fixunicodevalue.cc',
40 'src/script_span/generated_entities.cc',
41 'src/script_span/generated_ulscript.cc',
42 'src/script_span/getonescriptspan.cc',
43 'src/script_span/offsetmap.cc',
44 'src/script_span/text_processing.cc',
45 'src/script_span/utf8statetable.cc',
46 # These CC files have to be generated by the proto buffer compiler 'protoc'
47 'src/cld_3/protos/feature_extractor.pb.cc',
48 'src/cld_3/protos/sentence.pb.cc',
49 'src/cld_3/protos/task_spec.pb.cc',
50 # pybind11 bindings
51 'src/python/gcld3.cc',
52]
53
54
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070055class CompileProtos(build_ext.build_ext):
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070056 """Compile protocol buffers via `protoc` compiler."""
57
58 def run(self):
59 if shutil.which('protoc') is None:
60 raise RuntimeError('Please install the proto buffer compiler.')
61
62 # The C++ code expect the protos to be compiled under the following
63 # directory, therefore, create it if necessary.
64 compiled_protos_dir = 'src/cld_3/protos/'
65 os.makedirs(compiled_protos_dir, exist_ok=True)
66 command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src']
67 command.extend(PROTO_FILES)
68 subprocess.run(command, check=True, cwd='./')
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070069 build_ext.build_ext.run(self)
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070070
71
72class PyBindIncludes(object):
73 """Returns the include paths for pybind11 when needed.
74
75 To delay the invocation of "pybind11.get_include()" until it is available
76 in the environment. This lazy evaluation allows us to install it first, then
77 import it later to determine the correct include paths.
78 """
79
80 def __str__(self):
81 import pybind11 # pylint: disable=g-import-not-at-top
82 return pybind11.get_include()
83
RAMI ALRFOU1274c482020-08-01 23:36:42 -070084MACOS = platform.system() == "Darwin"
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070085ext_modules = [
86 setuptools.Extension(
87 _NAME,
88 sorted(SRCS),
89 include_dirs=[
90 PyBindIncludes(),
91 ],
92 libraries=['protobuf'],
RAMI ALRFOU1274c482020-08-01 23:36:42 -070093 extra_compile_args=["-std=c++11", "-stdlib=libc++" if MACOS else ""],
94 extra_link_args=["-stdlib=libc++" if MACOS else ""],
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070095 language='c++'),
96]
97
98DESCRIPTION = """CLD3 is a neural network model for language identification.
99This package contains the inference code and a trained model. See
100https://github.com/google/cld3 for more details.
101"""
102
103setuptools.setup(
104 author='Rami Al-Rfou',
105 author_email='rmyeid@google.com',
106 cmdclass={
Rami Al-Rfoub66aba92020-07-24 20:28:04 -0700107 'build_ext': CompileProtos,
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -0700108 },
109 ext_modules=ext_modules,
110 description='CLD3 is a neural network model for language identification.',
111 long_description=DESCRIPTION,
112 name=_NAME,
Rami Al-Rfoub66aba92020-07-24 20:28:04 -0700113 setup_requires=REQUIREMENTS,
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -0700114 url='https://github.com/google/cld3',
115 version=__version__,
116 zip_safe=False,
117)