blob: 2a36d989f2b4cf6fc8532981b11ff4076eaa8cf2 [file] [log] [blame]
Nicolas Capens41a73022020-01-30 00:30:14 -05001// Copyright 2020 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "LLVMReactor.hpp"
16
17#include "Debug.hpp"
18#include "ExecutableMemory.hpp"
19#include "Routine.hpp"
20
21#if defined(__clang__)
22// LLVM has occurrences of the extra-semi warning in its headers, which will be
23// treated as an error in SwiftShader targets.
24# pragma clang diagnostic push
25# pragma clang diagnostic ignored "-Wextra-semi"
26#endif // defined(__clang__)
27
28// TODO(b/143539525): Eliminate when warning has been fixed.
29#ifdef _MSC_VER
30__pragma(warning(push))
31 __pragma(warning(disable : 4146)) // unary minus operator applied to unsigned type, result still unsigned
32#endif
33
34#include "llvm/Analysis/LoopPass.h"
35#include "llvm/ExecutionEngine/ExecutionEngine.h"
36#include "llvm/ExecutionEngine/JITSymbol.h"
37#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
38#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
39#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
40#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
41#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
42#include "llvm/ExecutionEngine/SectionMemoryManager.h"
43#include "llvm/IR/Constants.h"
44#include "llvm/IR/DataLayout.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalVariable.h"
47#include "llvm/IR/LegacyPassManager.h"
48#include "llvm/IR/Mangler.h"
49#include "llvm/IR/Module.h"
50#include "llvm/IR/Verifier.h"
Antonio Maioranodd48b7e2020-02-05 13:17:07 -050051#include "llvm/Support/Compiler.h"
Nicolas Capens41a73022020-01-30 00:30:14 -050052#include "llvm/Support/Error.h"
53#include "llvm/Support/TargetSelect.h"
54#include "llvm/Target/TargetOptions.h"
55#include "llvm/Transforms/Coroutines.h"
56#include "llvm/Transforms/IPO.h"
57#include "llvm/Transforms/IPO/PassManagerBuilder.h"
58#include "llvm/Transforms/InstCombine/InstCombine.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Scalar/GVN.h"
61
62#if defined(__clang__)
63# pragma clang diagnostic pop
64#endif // defined(__clang__)
65
66#ifdef _MSC_VER
67 __pragma(warning(pop))
68#endif
69
70#include <unordered_map>
71
72#if defined(_WIN64)
73 extern "C" void __chkstk();
74#elif defined(_WIN32)
75extern "C" void _chkstk();
76#endif
77
Antonio Maioranodd48b7e2020-02-05 13:17:07 -050078#if __has_feature(memory_sanitizer)
79# include <sanitizer/msan_interface.h>
80#endif
81
Nicolas Capens41a73022020-01-30 00:30:14 -050082namespace {
83
84// Cache provides a simple, thread-safe key-value store.
85template<typename KEY, typename VALUE>
86class Cache
87{
88public:
89 Cache() = default;
90 Cache(const Cache &other);
91 VALUE getOrCreate(KEY key, std::function<VALUE()> create);
92
93private:
94 mutable std::mutex mutex; // mutable required for copy constructor.
95 std::unordered_map<KEY, VALUE> map;
96};
97
98template<typename KEY, typename VALUE>
99Cache<KEY, VALUE>::Cache(const Cache &other)
100{
101 std::unique_lock<std::mutex> lock(other.mutex);
102 map = other.map;
103}
104
105template<typename KEY, typename VALUE>
106VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create)
107{
108 std::unique_lock<std::mutex> lock(mutex);
109 auto it = map.find(key);
110 if(it != map.end())
111 {
112 return it->second;
113 }
114 auto value = create();
115 map.emplace(key, value);
116 return value;
117}
118
119// JITGlobals is a singleton that holds all the immutable machine specific
120// information for the host device.
121class JITGlobals
122{
123public:
124 using TargetMachineSPtr = std::shared_ptr<llvm::TargetMachine>;
125
126 static JITGlobals *get();
127
128 const std::string mcpu;
129 const std::vector<std::string> mattrs;
130 const char *const march;
131 const llvm::TargetOptions targetOptions;
132 const llvm::DataLayout dataLayout;
133
134 TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel);
135
136private:
137 static JITGlobals create();
138 static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
139 JITGlobals(const char *mcpu,
140 const std::vector<std::string> &mattrs,
141 const char *march,
142 const llvm::TargetOptions &targetOptions,
143 const llvm::DataLayout &dataLayout);
144 JITGlobals(const JITGlobals &) = default;
145
146 Cache<rr::Optimization::Level, TargetMachineSPtr> targetMachines;
147};
148
149JITGlobals *JITGlobals::get()
150{
151 static JITGlobals instance = create();
152 return &instance;
153}
154
155JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel)
156{
157#ifdef ENABLE_RR_DEBUG_INFO
158 auto llvmOptLevel = toLLVM(rr::Optimization::Level::None);
159#else // ENABLE_RR_DEBUG_INFO
160 auto llvmOptLevel = toLLVM(optlevel);
161#endif // ENABLE_RR_DEBUG_INFO
162
163 return targetMachines.getOrCreate(optlevel, [&]() {
164 return TargetMachineSPtr(llvm::EngineBuilder()
165 .setOptLevel(llvmOptLevel)
166 .setMCPU(mcpu)
167 .setMArch(march)
168 .setMAttrs(mattrs)
169 .setTargetOptions(targetOptions)
170 .selectTarget());
171 });
172}
173
174JITGlobals JITGlobals::create()
175{
176 struct LLVMInitializer
177 {
178 LLVMInitializer()
179 {
180 llvm::InitializeNativeTarget();
181 llvm::InitializeNativeTargetAsmPrinter();
182 llvm::InitializeNativeTargetAsmParser();
183 }
184 };
185 static LLVMInitializer initializeLLVM;
186
187 auto mcpu = llvm::sys::getHostCPUName();
188
189 llvm::StringMap<bool> features;
190 bool ok = llvm::sys::getHostCPUFeatures(features);
191
192#if defined(__i386__) || defined(__x86_64__) || \
193 (defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
194 ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
195#else
196 (void)ok; // getHostCPUFeatures always returns false on other platforms
197#endif
198
199 std::vector<std::string> mattrs;
200 for(auto &feature : features)
201 {
202 if(feature.second) { mattrs.push_back(feature.first().str()); }
203 }
204
205 const char *march = nullptr;
206#if defined(__x86_64__)
207 march = "x86-64";
208#elif defined(__i386__)
209 march = "x86";
210#elif defined(__aarch64__)
211 march = "arm64";
212#elif defined(__arm__)
213 march = "arm";
214#elif defined(__mips__)
215# if defined(__mips64)
216 march = "mips64el";
217# else
218 march = "mipsel";
219# endif
220#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
221 march = "ppc64le";
222#else
223# error "unknown architecture"
224#endif
225
226 llvm::TargetOptions targetOptions;
227 targetOptions.UnsafeFPMath = false;
228
229 auto targetMachine = std::unique_ptr<llvm::TargetMachine>(
230 llvm::EngineBuilder()
231 .setOptLevel(llvm::CodeGenOpt::None)
232 .setMCPU(mcpu)
233 .setMArch(march)
234 .setMAttrs(mattrs)
235 .setTargetOptions(targetOptions)
236 .selectTarget());
237
238 auto dataLayout = targetMachine->createDataLayout();
239
240 return JITGlobals(mcpu.data(), mattrs, march, targetOptions, dataLayout);
241}
242
243llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
244{
245 switch(level)
246 {
247 case rr::Optimization::Level::None: return ::llvm::CodeGenOpt::None;
248 case rr::Optimization::Level::Less: return ::llvm::CodeGenOpt::Less;
249 case rr::Optimization::Level::Default: return ::llvm::CodeGenOpt::Default;
250 case rr::Optimization::Level::Aggressive: return ::llvm::CodeGenOpt::Aggressive;
251 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
252 }
253 return ::llvm::CodeGenOpt::Default;
254}
255
256JITGlobals::JITGlobals(const char *mcpu,
257 const std::vector<std::string> &mattrs,
258 const char *march,
259 const llvm::TargetOptions &targetOptions,
260 const llvm::DataLayout &dataLayout)
261 : mcpu(mcpu)
262 , mattrs(mattrs)
263 , march(march)
264 , targetOptions(targetOptions)
265 , dataLayout(dataLayout)
266{
267}
268
269class MemoryMapper : public llvm::SectionMemoryManager::MemoryMapper
270{
271public:
272 MemoryMapper() {}
273 ~MemoryMapper() final {}
274
275 llvm::sys::MemoryBlock allocateMappedMemory(
276 llvm::SectionMemoryManager::AllocationPurpose purpose,
277 size_t numBytes, const llvm::sys::MemoryBlock *const nearBlock,
278 unsigned flags, std::error_code &errorCode) final
279 {
280 errorCode = std::error_code();
281
282 // Round up numBytes to page size.
283 size_t pageSize = rr::memoryPageSize();
284 numBytes = (numBytes + pageSize - 1) & ~(pageSize - 1);
285
286 bool need_exec =
287 purpose == llvm::SectionMemoryManager::AllocationPurpose::Code;
288 void *addr = rr::allocateMemoryPages(
289 numBytes, flagsToPermissions(flags), need_exec);
290 if(!addr)
291 return llvm::sys::MemoryBlock();
292 return llvm::sys::MemoryBlock(addr, numBytes);
293 }
294
295 std::error_code protectMappedMemory(const llvm::sys::MemoryBlock &block,
296 unsigned flags)
297 {
298 // Round down base address to align with a page boundary. This matches
299 // DefaultMMapper behavior.
300 void *addr = block.base();
301#if LLVM_VERSION_MAJOR >= 9
302 size_t size = block.allocatedSize();
303#else
304 size_t size = block.size();
305#endif
306 size_t pageSize = rr::memoryPageSize();
307 addr = reinterpret_cast<void *>(
308 reinterpret_cast<uintptr_t>(addr) & ~(pageSize - 1));
309 size += reinterpret_cast<uintptr_t>(block.base()) -
310 reinterpret_cast<uintptr_t>(addr);
311
312 rr::protectMemoryPages(addr, size, flagsToPermissions(flags));
313 return std::error_code();
314 }
315
316 std::error_code releaseMappedMemory(llvm::sys::MemoryBlock &block)
317 {
318#if LLVM_VERSION_MAJOR >= 9
319 size_t size = block.allocatedSize();
320#else
321 size_t size = block.size();
322#endif
323
324 rr::deallocateMemoryPages(block.base(), size);
325 return std::error_code();
326 }
327
328private:
329 int flagsToPermissions(unsigned flags)
330 {
331 int result = 0;
332 if(flags & llvm::sys::Memory::MF_READ)
333 {
334 result |= rr::PERMISSION_READ;
335 }
336 if(flags & llvm::sys::Memory::MF_WRITE)
337 {
338 result |= rr::PERMISSION_WRITE;
339 }
340 if(flags & llvm::sys::Memory::MF_EXEC)
341 {
342 result |= rr::PERMISSION_EXECUTE;
343 }
344 return result;
345 }
346};
347
348template<typename T>
349T alignUp(T val, T alignment)
350{
351 return alignment * ((val + alignment - 1) / alignment);
352}
353
354void *alignedAlloc(size_t size, size_t alignment)
355{
356 ASSERT(alignment < 256);
357 auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
358 auto aligned = allocation;
359 aligned += sizeof(uint8_t); // Make space for the base-address offset.
360 aligned = reinterpret_cast<uint8_t *>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment)); // align
361 auto offset = static_cast<uint8_t>(aligned - allocation);
362 aligned[-1] = offset;
363 return aligned;
364}
365
366void alignedFree(void *ptr)
367{
368 auto aligned = reinterpret_cast<uint8_t *>(ptr);
369 auto offset = aligned[-1];
370 auto allocation = aligned - offset;
371 delete[] allocation;
372}
373
374template<typename T>
375static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
376{
377 *reinterpret_cast<T *>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), rr::atomicOrdering(ordering));
378}
379
380template<typename T>
381static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
382{
383 std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), *reinterpret_cast<T *>(val), rr::atomicOrdering(ordering));
384}
385
386#ifdef __ANDROID__
387template<typename F>
388static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
389{
390 // Build an arbitrary op out of looped CAS
391 for(;;)
392 {
393 uint32_t expected = *ptr;
394 uint32_t desired = f(expected, val);
395
396 if(expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
397 {
398 return expected;
399 }
400 }
401}
402#endif
403
404void *resolveExternalSymbol(const char *name)
405{
406 struct Atomic
407 {
408 static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
409 {
410 switch(size)
411 {
412 case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
413 case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
414 case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
415 case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
416 default:
Ben Claytonce54c592020-02-07 11:30:51 +0000417 UNIMPLEMENTED_NO_BUG("Atomic::load(size: %d)", int(size));
Nicolas Capens41a73022020-01-30 00:30:14 -0500418 }
419 }
420 static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
421 {
422 switch(size)
423 {
424 case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
425 case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
426 case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
427 case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
428 default:
Ben Claytonce54c592020-02-07 11:30:51 +0000429 UNIMPLEMENTED_NO_BUG("Atomic::store(size: %d)", int(size));
Nicolas Capens41a73022020-01-30 00:30:14 -0500430 }
431 }
432 };
433
434 struct F
435 {
436 static void nop() {}
437 static void neverCalled() { UNREACHABLE("Should never be called"); }
438
439 static void *coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
440 static void coroutine_free_frame(void *ptr) { alignedFree(ptr); }
441
442#ifdef __ANDROID__
443 // forwarders since we can't take address of builtins
444 static void sync_synchronize() { __sync_synchronize(); }
445 static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
446 static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
447 static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
448 static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
449 static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
450 static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
451 static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
452
453 static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val)
454 {
455 return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a, b); });
456 }
457 static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val)
458 {
459 return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a, b); });
460 }
461 static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val)
462 {
463 return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a, b); });
464 }
465 static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val)
466 {
467 return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a, b); });
468 }
469#endif
470 };
471
472 class Resolver
473 {
474 public:
475 using FunctionMap = std::unordered_map<std::string, void *>;
476
477 FunctionMap functions;
478
479 Resolver()
480 {
481 functions.emplace("nop", reinterpret_cast<void *>(F::nop));
482 functions.emplace("floorf", reinterpret_cast<void *>(floorf));
483 functions.emplace("nearbyintf", reinterpret_cast<void *>(nearbyintf));
484 functions.emplace("truncf", reinterpret_cast<void *>(truncf));
485 functions.emplace("printf", reinterpret_cast<void *>(printf));
486 functions.emplace("puts", reinterpret_cast<void *>(puts));
487 functions.emplace("fmodf", reinterpret_cast<void *>(fmodf));
488
489 functions.emplace("sinf", reinterpret_cast<void *>(sinf));
490 functions.emplace("cosf", reinterpret_cast<void *>(cosf));
491 functions.emplace("asinf", reinterpret_cast<void *>(asinf));
492 functions.emplace("acosf", reinterpret_cast<void *>(acosf));
493 functions.emplace("atanf", reinterpret_cast<void *>(atanf));
494 functions.emplace("sinhf", reinterpret_cast<void *>(sinhf));
495 functions.emplace("coshf", reinterpret_cast<void *>(coshf));
496 functions.emplace("tanhf", reinterpret_cast<void *>(tanhf));
497 functions.emplace("asinhf", reinterpret_cast<void *>(asinhf));
498 functions.emplace("acoshf", reinterpret_cast<void *>(acoshf));
499 functions.emplace("atanhf", reinterpret_cast<void *>(atanhf));
500 functions.emplace("atan2f", reinterpret_cast<void *>(atan2f));
501 functions.emplace("powf", reinterpret_cast<void *>(powf));
502 functions.emplace("expf", reinterpret_cast<void *>(expf));
503 functions.emplace("logf", reinterpret_cast<void *>(logf));
504 functions.emplace("exp2f", reinterpret_cast<void *>(exp2f));
505 functions.emplace("log2f", reinterpret_cast<void *>(log2f));
506
507 functions.emplace("sin", reinterpret_cast<void *>(static_cast<double (*)(double)>(sin)));
508 functions.emplace("cos", reinterpret_cast<void *>(static_cast<double (*)(double)>(cos)));
509 functions.emplace("asin", reinterpret_cast<void *>(static_cast<double (*)(double)>(asin)));
510 functions.emplace("acos", reinterpret_cast<void *>(static_cast<double (*)(double)>(acos)));
511 functions.emplace("atan", reinterpret_cast<void *>(static_cast<double (*)(double)>(atan)));
512 functions.emplace("sinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(sinh)));
513 functions.emplace("cosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(cosh)));
514 functions.emplace("tanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(tanh)));
515 functions.emplace("asinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(asinh)));
516 functions.emplace("acosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(acosh)));
517 functions.emplace("atanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(atanh)));
518 functions.emplace("atan2", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(atan2)));
519 functions.emplace("pow", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(pow)));
520 functions.emplace("exp", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp)));
521 functions.emplace("log", reinterpret_cast<void *>(static_cast<double (*)(double)>(log)));
522 functions.emplace("exp2", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp2)));
523 functions.emplace("log2", reinterpret_cast<void *>(static_cast<double (*)(double)>(log2)));
524
525 functions.emplace("atomic_load", reinterpret_cast<void *>(Atomic::load));
526 functions.emplace("atomic_store", reinterpret_cast<void *>(Atomic::store));
527
528 // FIXME(b/119409619): use an allocator here so we can control all memory allocations
529 functions.emplace("coroutine_alloc_frame", reinterpret_cast<void *>(F::coroutine_alloc_frame));
530 functions.emplace("coroutine_free_frame", reinterpret_cast<void *>(F::coroutine_free_frame));
531
532#ifdef __APPLE__
533 functions.emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
534#elif defined(__linux__)
535 functions.emplace("sincosf", reinterpret_cast<void *>(sincosf));
536#elif defined(_WIN64)
537 functions.emplace("chkstk", reinterpret_cast<void *>(__chkstk));
538#elif defined(_WIN32)
539 functions.emplace("chkstk", reinterpret_cast<void *>(_chkstk));
540#endif
541
542#ifdef __ANDROID__
543 functions.emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void *>(F::neverCalled));
544 functions.emplace("sync_synchronize", reinterpret_cast<void *>(F::sync_synchronize));
545 functions.emplace("sync_fetch_and_add_4", reinterpret_cast<void *>(F::sync_fetch_and_add_4));
546 functions.emplace("sync_fetch_and_and_4", reinterpret_cast<void *>(F::sync_fetch_and_and_4));
547 functions.emplace("sync_fetch_and_or_4", reinterpret_cast<void *>(F::sync_fetch_and_or_4));
548 functions.emplace("sync_fetch_and_xor_4", reinterpret_cast<void *>(F::sync_fetch_and_xor_4));
549 functions.emplace("sync_fetch_and_sub_4", reinterpret_cast<void *>(F::sync_fetch_and_sub_4));
550 functions.emplace("sync_lock_test_and_set_4", reinterpret_cast<void *>(F::sync_lock_test_and_set_4));
551 functions.emplace("sync_val_compare_and_swap_4", reinterpret_cast<void *>(F::sync_val_compare_and_swap_4));
552 functions.emplace("sync_fetch_and_max_4", reinterpret_cast<void *>(F::sync_fetch_and_max_4));
553 functions.emplace("sync_fetch_and_min_4", reinterpret_cast<void *>(F::sync_fetch_and_min_4));
554 functions.emplace("sync_fetch_and_umax_4", reinterpret_cast<void *>(F::sync_fetch_and_umax_4));
555 functions.emplace("sync_fetch_and_umin_4", reinterpret_cast<void *>(F::sync_fetch_and_umin_4));
556#endif
Antonio Maioranodd48b7e2020-02-05 13:17:07 -0500557#if __has_feature(memory_sanitizer)
558 functions.emplace("msan_unpoison", reinterpret_cast<void *>(__msan_unpoison));
559#endif
Nicolas Capens41a73022020-01-30 00:30:14 -0500560 }
561 };
562
563 static Resolver resolver;
564
565 // Trim off any underscores from the start of the symbol. LLVM likes
566 // to append these on macOS.
567 const char *trimmed = name;
568 while(trimmed[0] == '_') { trimmed++; }
569
570 auto it = resolver.functions.find(trimmed);
571 // Missing functions will likely make the module fail in exciting non-obvious ways.
572 ASSERT_MSG(it != resolver.functions.end(), "Missing external function: '%s'", name);
573 return it->second;
574}
575
576// JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
577// object layer as each routine may require different target machine
578// settings and no Reactor routine directly links against another.
579class JITRoutine : public rr::Routine
580{
581#if LLVM_VERSION_MAJOR >= 8
582 using ObjLayer = llvm::orc::LegacyRTDyldObjectLinkingLayer;
583 using CompileLayer = llvm::orc::LegacyIRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
584#else
585 using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
586 using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
587#endif
588
589public:
590 JITRoutine(
591 std::unique_ptr<llvm::Module> module,
592 llvm::Function **funcs,
593 size_t count,
594 const rr::Config &config)
595 : resolver(createLegacyLookupResolver(
596 session,
597 [&](const llvm::StringRef &name) {
598 void *func = resolveExternalSymbol(name.str().c_str());
599 if(func != nullptr)
600 {
601 return llvm::JITSymbol(
602 reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
603 }
604 return objLayer.findSymbol(name, true);
605 },
606 [](llvm::Error err) {
607 if(err)
608 {
609 // TODO: Log the symbol resolution errors.
610 return;
611 }
612 }))
613 , targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel()))
614 , compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine))
615 , objLayer(
616 session,
617 [this](llvm::orc::VModuleKey) {
618 return ObjLayer::Resources{ std::make_shared<llvm::SectionMemoryManager>(&memoryMapper), resolver };
619 },
620 ObjLayer::NotifyLoadedFtor(),
621 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
622#ifdef ENABLE_RR_DEBUG_INFO
623 rr::DebugInfo::NotifyObjectEmitted(Obj, L);
624#endif // ENABLE_RR_DEBUG_INFO
625 },
626 [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
627#ifdef ENABLE_RR_DEBUG_INFO
628 rr::DebugInfo::NotifyFreeingObject(Obj);
629#endif // ENABLE_RR_DEBUG_INFO
630 })
631 , addresses(count)
632 {
633 std::vector<std::string> mangledNames(count);
634 for(size_t i = 0; i < count; i++)
635 {
636 auto func = funcs[i];
637 static size_t numEmittedFunctions = 0;
638 std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
639 func->setName(name);
640 func->setLinkage(llvm::GlobalValue::ExternalLinkage);
641 func->setDoesNotThrow();
642
643 llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
644 llvm::Mangler::getNameWithPrefix(mangledNameStream, name, JITGlobals::get()->dataLayout);
645 }
646
647 auto moduleKey = session.allocateVModule();
648
649 // Once the module is passed to the compileLayer, the
650 // llvm::Functions are freed. Make sure funcs are not referenced
651 // after this point.
652 funcs = nullptr;
653
654 llvm::cantFail(compileLayer.addModule(moduleKey, std::move(module)));
655
656 // Resolve the function addresses.
657 for(size_t i = 0; i < count; i++)
658 {
659 auto symbol = compileLayer.findSymbolIn(moduleKey, mangledNames[i], false);
660 if(auto address = symbol.getAddress())
661 {
662 addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(address.get()));
663 }
664 }
665 }
666
667 const void *getEntry(int index) const override
668 {
669 return addresses[index];
670 }
671
672private:
673 std::shared_ptr<llvm::orc::SymbolResolver> resolver;
674 std::shared_ptr<llvm::TargetMachine> targetMachine;
675 llvm::orc::ExecutionSession session;
676 CompileLayer compileLayer;
677 MemoryMapper memoryMapper;
678 ObjLayer objLayer;
679 std::vector<const void *> addresses;
680};
681
682} // anonymous namespace
683
684namespace rr {
685
686JITBuilder::JITBuilder(const rr::Config &config)
687 : config(config)
688 , module(new llvm::Module("", context))
689 , builder(new llvm::IRBuilder<>(context))
690{
691 module->setDataLayout(JITGlobals::get()->dataLayout);
692}
693
694void JITBuilder::optimize(const rr::Config &cfg)
695{
696
697#ifdef ENABLE_RR_DEBUG_INFO
698 if(debugInfo != nullptr)
699 {
700 return; // Don't optimize if we're generating debug info.
701 }
702#endif // ENABLE_RR_DEBUG_INFO
703
704 std::unique_ptr<llvm::legacy::PassManager> passManager(
705 new llvm::legacy::PassManager());
706
707 for(auto pass : cfg.getOptimization().getPasses())
708 {
709 switch(pass)
710 {
711 case rr::Optimization::Pass::Disabled: break;
712 case rr::Optimization::Pass::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
713 case rr::Optimization::Pass::LICM: passManager->add(llvm::createLICMPass()); break;
714 case rr::Optimization::Pass::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
715 case rr::Optimization::Pass::GVN: passManager->add(llvm::createGVNPass()); break;
716 case rr::Optimization::Pass::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
717 case rr::Optimization::Pass::Reassociate: passManager->add(llvm::createReassociatePass()); break;
718 case rr::Optimization::Pass::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
719 case rr::Optimization::Pass::SCCP: passManager->add(llvm::createSCCPPass()); break;
720 case rr::Optimization::Pass::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
721 case rr::Optimization::Pass::EarlyCSEPass: passManager->add(llvm::createEarlyCSEPass()); break;
722 default:
723 UNREACHABLE("pass: %d", int(pass));
724 }
725 }
726
727 passManager->run(*module);
728}
729
730std::shared_ptr<rr::Routine> JITBuilder::acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
731{
732 ASSERT(module);
733 return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg);
734}
735
736} // namespace rr