blob: 9bdf67a6ff400df7ac5bf8d3c92d0c5d987cd934 [file] [log] [blame]
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07001"""Setup utility for gcld3."""
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07002
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07003import os
RAMI ALRFOU9ce971d2020-08-01 23:38:49 -07004import platform
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07005import shutil
6import subprocess
7import setuptools
Rami Al-Rfoub66aba92020-07-24 20:28:04 -07008from setuptools.command import build_ext
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -07009
RAMI ALRFOUf6d8a222020-08-05 22:48:21 -070010__version__ = '3.0.10'
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070011_NAME = 'gcld3'
12
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070013REQUIREMENTS = ['pybind11 >= 2.5.0', 'wheel >= 0.34.2']
14
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070015PROTO_FILES = [
16 'src/feature_extractor.proto',
17 'src/sentence.proto',
18 'src/task_spec.proto',
19]
20
21SRCS = [
22 'src/base.cc',
23 'src/embedding_feature_extractor.cc',
24 'src/embedding_network.cc',
25 'src/feature_extractor.cc',
26 'src/feature_types.cc',
27 'src/fml_parser.cc',
28 'src/lang_id_nn_params.cc',
29 'src/language_identifier_features.cc',
30 'src/language_identifier_main.cc',
31 'src/nnet_language_identifier.cc',
32 'src/registry.cc',
33 'src/relevant_script_feature.cc',
34 'src/sentence_features.cc',
35 'src/task_context.cc',
36 'src/task_context_params.cc',
37 'src/unicodetext.cc',
38 'src/utils.cc',
39 'src/workspace.cc',
40 'src/script_span/fixunicodevalue.cc',
41 'src/script_span/generated_entities.cc',
42 'src/script_span/generated_ulscript.cc',
43 'src/script_span/getonescriptspan.cc',
44 'src/script_span/offsetmap.cc',
45 'src/script_span/text_processing.cc',
46 'src/script_span/utf8statetable.cc',
47 # These CC files have to be generated by the proto buffer compiler 'protoc'
48 'src/cld_3/protos/feature_extractor.pb.cc',
49 'src/cld_3/protos/sentence.pb.cc',
50 'src/cld_3/protos/task_spec.pb.cc',
51 # pybind11 bindings
52 'src/python/gcld3.cc',
53]
54
55
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070056class CompileProtos(build_ext.build_ext):
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070057 """Compile protocol buffers via `protoc` compiler."""
58
59 def run(self):
60 if shutil.which('protoc') is None:
61 raise RuntimeError('Please install the proto buffer compiler.')
62
63 # The C++ code expect the protos to be compiled under the following
64 # directory, therefore, create it if necessary.
65 compiled_protos_dir = 'src/cld_3/protos/'
66 os.makedirs(compiled_protos_dir, exist_ok=True)
67 command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src']
68 command.extend(PROTO_FILES)
69 subprocess.run(command, check=True, cwd='./')
Rami Al-Rfoub66aba92020-07-24 20:28:04 -070070 build_ext.build_ext.run(self)
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070071
72
73class PyBindIncludes(object):
74 """Returns the include paths for pybind11 when needed.
75
76 To delay the invocation of "pybind11.get_include()" until it is available
77 in the environment. This lazy evaluation allows us to install it first, then
78 import it later to determine the correct include paths.
79 """
80
81 def __str__(self):
82 import pybind11 # pylint: disable=g-import-not-at-top
83 return pybind11.get_include()
84
RAMI ALRFOU1274c482020-08-01 23:36:42 -070085MACOS = platform.system() == "Darwin"
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070086ext_modules = [
87 setuptools.Extension(
88 _NAME,
89 sorted(SRCS),
90 include_dirs=[
91 PyBindIncludes(),
92 ],
93 libraries=['protobuf'],
RAMI ALRFOU790ec4f2020-08-01 23:45:30 -070094 extra_compile_args=["-std=c++11", "-stdlib=libc++"] if MACOS else [],
95 extra_link_args=["-stdlib=libc++"] if MACOS else [],
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -070096 language='c++'),
97]
98
99DESCRIPTION = """CLD3 is a neural network model for language identification.
100This package contains the inference code and a trained model. See
101https://github.com/google/cld3 for more details.
102"""
103
104setuptools.setup(
105 author='Rami Al-Rfou',
106 author_email='rmyeid@google.com',
107 cmdclass={
Rami Al-Rfoub66aba92020-07-24 20:28:04 -0700108 'build_ext': CompileProtos,
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -0700109 },
110 ext_modules=ext_modules,
111 description='CLD3 is a neural network model for language identification.',
112 long_description=DESCRIPTION,
113 name=_NAME,
Rami Al-Rfoub66aba92020-07-24 20:28:04 -0700114 setup_requires=REQUIREMENTS,
Rami Al-Rfou37fa8fd2020-07-24 12:39:38 -0700115 url='https://github.com/google/cld3',
116 version=__version__,
117 zip_safe=False,
118)