blob: 385189fc99e881b6a9fd5ff32549e09505507104 [file] [log] [blame]
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07001"""Setup utility for gcld3."""
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07002
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07003import os
RAMI ALRFOU9ce971d2020-08-01 23:38:49 -07004import platform
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07005import shutil
6import subprocess
7import setuptools
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07008from setuptools.command import build_ext
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07009
Rami Al-Rfou534c6862020-08-18 21:51:39 -070010__version__ = '3.0.13'
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070011_NAME = 'gcld3'
12
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070013REQUIREMENTS = ['pybind11 >= 2.5.0', 'wheel >= 0.34.2']
14
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070015PROTO_FILES = [
16 'src/feature_extractor.proto',
17 'src/sentence.proto',
18 'src/task_spec.proto',
19]
20
21SRCS = [
22 'src/base.cc',
23 'src/embedding_feature_extractor.cc',
24 'src/embedding_network.cc',
25 'src/feature_extractor.cc',
26 'src/feature_types.cc',
27 'src/fml_parser.cc',
28 'src/lang_id_nn_params.cc',
29 'src/language_identifier_features.cc',
30 'src/language_identifier_main.cc',
31 'src/nnet_language_identifier.cc',
32 'src/registry.cc',
33 'src/relevant_script_feature.cc',
34 'src/sentence_features.cc',
35 'src/task_context.cc',
36 'src/task_context_params.cc',
37 'src/unicodetext.cc',
38 'src/utils.cc',
39 'src/workspace.cc',
40 'src/script_span/fixunicodevalue.cc',
41 'src/script_span/generated_entities.cc',
42 'src/script_span/generated_ulscript.cc',
43 'src/script_span/getonescriptspan.cc',
44 'src/script_span/offsetmap.cc',
45 'src/script_span/text_processing.cc',
46 'src/script_span/utf8statetable.cc',
47 # These CC files have to be generated by the proto buffer compiler 'protoc'
48 'src/cld_3/protos/feature_extractor.pb.cc',
49 'src/cld_3/protos/sentence.pb.cc',
50 'src/cld_3/protos/task_spec.pb.cc',
51 # pybind11 bindings
Rami Al-Rfoud86ea2d2020-08-06 23:12:32 -070052 'gcld3/pybind_ext.cc',
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070053]
54
55
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070056class CompileProtos(build_ext.build_ext):
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070057 """Compile protocol buffers via `protoc` compiler."""
58
59 def run(self):
60 if shutil.which('protoc') is None:
61 raise RuntimeError('Please install the proto buffer compiler.')
62
63 # The C++ code expect the protos to be compiled under the following
64 # directory, therefore, create it if necessary.
65 compiled_protos_dir = 'src/cld_3/protos/'
66 os.makedirs(compiled_protos_dir, exist_ok=True)
67 command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src']
68 command.extend(PROTO_FILES)
69 subprocess.run(command, check=True, cwd='./')
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070070 build_ext.build_ext.run(self)
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070071
72
73class PyBindIncludes(object):
74 """Returns the include paths for pybind11 when needed.
75
76 To delay the invocation of "pybind11.get_include()" until it is available
77 in the environment. This lazy evaluation allows us to install it first, then
78 import it later to determine the correct include paths.
79 """
80
81 def __str__(self):
82 import pybind11 # pylint: disable=g-import-not-at-top
83 return pybind11.get_include()
84
Rami Al-Rfoud86ea2d2020-08-06 23:12:32 -070085
86MACOS = platform.system() == 'Darwin'
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070087ext_modules = [
88 setuptools.Extension(
Rami Al-Rfoud86ea2d2020-08-06 23:12:32 -070089 'gcld3.pybind_ext',
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070090 sorted(SRCS),
91 include_dirs=[
92 PyBindIncludes(),
93 ],
94 libraries=['protobuf'],
Rami Al-Rfoud86ea2d2020-08-06 23:12:32 -070095 extra_compile_args=['-std=c++11', '-stdlib=libc++'] if MACOS else [],
96 extra_link_args=['-stdlib=libc++'] if MACOS else [],
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070097 language='c++'),
98]
99
100DESCRIPTION = """CLD3 is a neural network model for language identification.
101This package contains the inference code and a trained model. See
102https://github.com/google/cld3 for more details.
103"""
104
105setuptools.setup(
106 author='Rami Al-Rfou',
107 author_email='rmyeid@google.com',
108 cmdclass={
Rami Al-Rfoub66aba92020-07-24 20:28:04 -0700109 'build_ext': CompileProtos,
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -0700110 },
111 ext_modules=ext_modules,
RAMI ALRFOUca6b49d2020-08-06 11:33:42 -0700112 packages=setuptools.find_packages(),
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -0700113 description='CLD3 is a neural network model for language identification.',
114 long_description=DESCRIPTION,
115 name=_NAME,
Rami Al-Rfoub66aba92020-07-24 20:28:04 -0700116 setup_requires=REQUIREMENTS,
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -0700117 url='https://github.com/google/cld3',
118 version=__version__,
119 zip_safe=False,
120)