Add an LLVM IR library for more builtin functions (#715)
* The IR library is generated from libclc (see README.md for
instructions to rebuild it)
* it is included as a header in the compiler and linked with the
source
* Added the LICENSE text to LICENSE due to the addition of LLVM IR
library
* Added a new compiler option `-cl-native-math` for the fasted math
implementations
* no precision guarantees
* Causes a new pass to strip some of the library functions when a
lower precision, faster alternative is available (e.g. for fma)
* this option implies `-cl-fast-relaxed-math`
* Updated docs and readme
* Updated LLVM
* Updated tests
diff --git a/lib/Compiler.cpp b/lib/Compiler.cpp
index 3af1044..592b6f9 100644
--- a/lib/Compiler.cpp
+++ b/lib/Compiler.cpp
@@ -19,12 +19,15 @@
#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/Frontend/TextDiagnosticPrinter.h"
#include "clang/Lex/PreprocessorOptions.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/IRReader/IRReader.h"
#include "llvm/InitializePasses.h"
#include "llvm/LinkAllPasses.h"
+#include "llvm/Linker/Linker.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorOr.h"
@@ -38,8 +41,10 @@
#include "clspv/Option.h"
#include "clspv/Passes.h"
#include "clspv/Sampler.h"
+#include "clspv/clspv_builtin_library.h"
#include "clspv/opencl_builtins_header.h"
+#include "Builtins.h"
#include "FrontendPlugin.h"
#include "Passes.h"
@@ -164,6 +169,21 @@
"Emit LLVM IR to the given file after parsing and stop compilation."),
llvm::cl::value_desc("filename"));
+namespace {
+struct OpenCLBuiltinMemoryBuffer final : public llvm::MemoryBuffer {
+ OpenCLBuiltinMemoryBuffer(const void *data, uint64_t data_length) {
+ const char *dataCasted = reinterpret_cast<const char *>(data);
+ init(dataCasted, dataCasted + data_length, true);
+ }
+
+ virtual llvm::MemoryBuffer::BufferKind getBufferKind() const override {
+ return llvm::MemoryBuffer::MemoryBuffer_Malloc;
+ }
+
+ virtual ~OpenCLBuiltinMemoryBuffer() override {}
+};
+} // namespace
+
// Populates |SamplerMapEntries| with data from the input sampler map. Returns 0
// if successful.
int ParseSamplerMap(const std::string &sampler_map,
@@ -428,17 +448,20 @@
instance.getCodeGenOpts().LessPreciseFPMAD =
cl_mad_enable || cl_unsafe_math_optimizations;
// cl_no_signed_zeros ignored for now!
- instance.getLangOpts().UnsafeFPMath =
- cl_unsafe_math_optimizations || cl_fast_relaxed_math;
- instance.getLangOpts().FiniteMathOnly =
- cl_finite_math_only || cl_fast_relaxed_math;
- instance.getLangOpts().FastRelaxedMath = cl_fast_relaxed_math;
+ instance.getLangOpts().UnsafeFPMath = cl_unsafe_math_optimizations ||
+ cl_fast_relaxed_math ||
+ clspv::Option::NativeMath();
+ instance.getLangOpts().FiniteMathOnly = cl_finite_math_only ||
+ cl_fast_relaxed_math ||
+ clspv::Option::NativeMath();
+ instance.getLangOpts().FastRelaxedMath =
+ cl_fast_relaxed_math || clspv::Option::NativeMath();
// Preprocessor options
if (!clspv::Option::ImageSupport()) {
instance.getPreprocessorOpts().addMacroUndef("__IMAGE_SUPPORT__");
}
- if (cl_fast_relaxed_math) {
+ if (cl_fast_relaxed_math || clspv::Option::NativeMath()) {
instance.getPreprocessorOpts().addMacroDef("__FAST_RELAXED_MATH__");
}
@@ -494,19 +517,6 @@
instance.getPreprocessorOpts().addRemappedFile(overiddenInputFilename,
memory_buffer.release());
- struct OpenCLBuiltinMemoryBuffer final : public llvm::MemoryBuffer {
- OpenCLBuiltinMemoryBuffer(const void *data, uint64_t data_length) {
- const char *dataCasted = reinterpret_cast<const char *>(data);
- init(dataCasted, dataCasted + data_length, true);
- }
-
- virtual llvm::MemoryBuffer::BufferKind getBufferKind() const override {
- return llvm::MemoryBuffer::MemoryBuffer_Malloc;
- }
-
- virtual ~OpenCLBuiltinMemoryBuffer() override {}
- };
-
std::unique_ptr<llvm::MemoryBuffer> openCLBuiltinMemoryBuffer(
new OpenCLBuiltinMemoryBuffer(opencl_builtins_header_data,
opencl_builtins_header_size - 1));
@@ -599,6 +609,7 @@
break;
}
+ pm->add(clspv::createNativeMathPass());
pm->add(clspv::createZeroInitializeAllocasPass());
pm->add(clspv::createAddFunctionAttributesPass());
pm->add(clspv::createAutoPodArgsPass());
@@ -844,6 +855,28 @@
return 0;
}
+bool LinkBuiltinLibrary(llvm::Module *module) {
+ std::unique_ptr<llvm::MemoryBuffer> buffer(new OpenCLBuiltinMemoryBuffer(
+ clspv_builtin_library_data, clspv_builtin_library_size - 1));
+
+ llvm::SMDiagnostic Err;
+ auto library = llvm::parseIR(*buffer, Err, module->getContext());
+ if (!library) {
+ llvm::errs() << "Failed to parse builtins library\n";
+ return false;
+ }
+
+ // TODO: when clang generates builtins using the generic address space,
+ // different builtins are used for pointer-based builtins. Need to do some
+ // work to ensure they are kept around.
+ // Affects: modf, remquo, lgamma_r, frexp
+
+ llvm::Linker L(*module);
+ L.linkInModule(std::move(library), 0);
+
+ return true;
+}
+
} // namespace
namespace clspv {
@@ -935,6 +968,10 @@
return GenerateIRFile(&pm, *module, IROutputFile);
}
+ if (!LinkBuiltinLibrary(module.get())) {
+ return -1;
+ }
+
// Otherwise, populate the pass manager and run the regular passes.
if (auto error = PopulatePassManager(&pm, &binaryStream, &SamplerMapEntries))
return error;
@@ -1032,6 +1069,10 @@
std::unique_ptr<llvm::Module> module(action.takeModule());
+ if (!LinkBuiltinLibrary(module.get())) {
+ return -1;
+ }
+
// Optimize.
// Create a memory buffer for temporarily writing the result.
SmallVector<char, 10000> binary;