Nicolas Capens | 41a7302 | 2020-01-30 00:30:14 -0500 | [diff] [blame] | 1 | // Copyright 2020 The SwiftShader Authors. All Rights Reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "LLVMReactor.hpp" |
| 16 | |
| 17 | #include "Debug.hpp" |
| 18 | #include "ExecutableMemory.hpp" |
| 19 | #include "Routine.hpp" |
| 20 | |
| 21 | #if defined(__clang__) |
| 22 | // LLVM has occurrences of the extra-semi warning in its headers, which will be |
| 23 | // treated as an error in SwiftShader targets. |
| 24 | # pragma clang diagnostic push |
| 25 | # pragma clang diagnostic ignored "-Wextra-semi" |
| 26 | #endif // defined(__clang__) |
| 27 | |
| 28 | // TODO(b/143539525): Eliminate when warning has been fixed. |
| 29 | #ifdef _MSC_VER |
| 30 | __pragma(warning(push)) |
| 31 | __pragma(warning(disable : 4146)) // unary minus operator applied to unsigned type, result still unsigned |
| 32 | #endif |
| 33 | |
| 34 | #include "llvm/Analysis/LoopPass.h" |
| 35 | #include "llvm/ExecutionEngine/ExecutionEngine.h" |
| 36 | #include "llvm/ExecutionEngine/JITSymbol.h" |
| 37 | #include "llvm/ExecutionEngine/Orc/CompileUtils.h" |
| 38 | #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" |
| 39 | #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" |
| 40 | #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" |
| 41 | #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" |
| 42 | #include "llvm/ExecutionEngine/SectionMemoryManager.h" |
| 43 | #include "llvm/IR/Constants.h" |
| 44 | #include "llvm/IR/DataLayout.h" |
| 45 | #include "llvm/IR/Function.h" |
| 46 | #include "llvm/IR/GlobalVariable.h" |
| 47 | #include "llvm/IR/LegacyPassManager.h" |
| 48 | #include "llvm/IR/Mangler.h" |
| 49 | #include "llvm/IR/Module.h" |
| 50 | #include "llvm/IR/Verifier.h" |
Antonio Maiorano | dd48b7e | 2020-02-05 13:17:07 -0500 | [diff] [blame] | 51 | #include "llvm/Support/Compiler.h" |
Nicolas Capens | 41a7302 | 2020-01-30 00:30:14 -0500 | [diff] [blame] | 52 | #include "llvm/Support/Error.h" |
| 53 | #include "llvm/Support/TargetSelect.h" |
| 54 | #include "llvm/Target/TargetOptions.h" |
| 55 | #include "llvm/Transforms/Coroutines.h" |
| 56 | #include "llvm/Transforms/IPO.h" |
| 57 | #include "llvm/Transforms/IPO/PassManagerBuilder.h" |
| 58 | #include "llvm/Transforms/InstCombine/InstCombine.h" |
| 59 | #include "llvm/Transforms/Scalar.h" |
| 60 | #include "llvm/Transforms/Scalar/GVN.h" |
| 61 | |
| 62 | #if defined(__clang__) |
| 63 | # pragma clang diagnostic pop |
| 64 | #endif // defined(__clang__) |
| 65 | |
| 66 | #ifdef _MSC_VER |
| 67 | __pragma(warning(pop)) |
| 68 | #endif |
| 69 | |
| 70 | #include <unordered_map> |
| 71 | |
| 72 | #if defined(_WIN64) |
| 73 | extern "C" void __chkstk(); |
| 74 | #elif defined(_WIN32) |
| 75 | extern "C" void _chkstk(); |
| 76 | #endif |
| 77 | |
Antonio Maiorano | dd48b7e | 2020-02-05 13:17:07 -0500 | [diff] [blame] | 78 | #if __has_feature(memory_sanitizer) |
| 79 | # include <sanitizer/msan_interface.h> |
| 80 | #endif |
| 81 | |
Nicolas Capens | 41a7302 | 2020-01-30 00:30:14 -0500 | [diff] [blame] | 82 | namespace { |
| 83 | |
| 84 | // Cache provides a simple, thread-safe key-value store. |
| 85 | template<typename KEY, typename VALUE> |
| 86 | class Cache |
| 87 | { |
| 88 | public: |
| 89 | Cache() = default; |
| 90 | Cache(const Cache &other); |
| 91 | VALUE getOrCreate(KEY key, std::function<VALUE()> create); |
| 92 | |
| 93 | private: |
| 94 | mutable std::mutex mutex; // mutable required for copy constructor. |
| 95 | std::unordered_map<KEY, VALUE> map; |
| 96 | }; |
| 97 | |
| 98 | template<typename KEY, typename VALUE> |
| 99 | Cache<KEY, VALUE>::Cache(const Cache &other) |
| 100 | { |
| 101 | std::unique_lock<std::mutex> lock(other.mutex); |
| 102 | map = other.map; |
| 103 | } |
| 104 | |
| 105 | template<typename KEY, typename VALUE> |
| 106 | VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create) |
| 107 | { |
| 108 | std::unique_lock<std::mutex> lock(mutex); |
| 109 | auto it = map.find(key); |
| 110 | if(it != map.end()) |
| 111 | { |
| 112 | return it->second; |
| 113 | } |
| 114 | auto value = create(); |
| 115 | map.emplace(key, value); |
| 116 | return value; |
| 117 | } |
| 118 | |
| 119 | // JITGlobals is a singleton that holds all the immutable machine specific |
| 120 | // information for the host device. |
| 121 | class JITGlobals |
| 122 | { |
| 123 | public: |
| 124 | using TargetMachineSPtr = std::shared_ptr<llvm::TargetMachine>; |
| 125 | |
| 126 | static JITGlobals *get(); |
| 127 | |
| 128 | const std::string mcpu; |
| 129 | const std::vector<std::string> mattrs; |
| 130 | const char *const march; |
| 131 | const llvm::TargetOptions targetOptions; |
| 132 | const llvm::DataLayout dataLayout; |
| 133 | |
| 134 | TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel); |
| 135 | |
| 136 | private: |
| 137 | static JITGlobals create(); |
| 138 | static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level); |
| 139 | JITGlobals(const char *mcpu, |
| 140 | const std::vector<std::string> &mattrs, |
| 141 | const char *march, |
| 142 | const llvm::TargetOptions &targetOptions, |
| 143 | const llvm::DataLayout &dataLayout); |
| 144 | JITGlobals(const JITGlobals &) = default; |
| 145 | |
| 146 | Cache<rr::Optimization::Level, TargetMachineSPtr> targetMachines; |
| 147 | }; |
| 148 | |
| 149 | JITGlobals *JITGlobals::get() |
| 150 | { |
| 151 | static JITGlobals instance = create(); |
| 152 | return &instance; |
| 153 | } |
| 154 | |
| 155 | JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel) |
| 156 | { |
| 157 | #ifdef ENABLE_RR_DEBUG_INFO |
| 158 | auto llvmOptLevel = toLLVM(rr::Optimization::Level::None); |
| 159 | #else // ENABLE_RR_DEBUG_INFO |
| 160 | auto llvmOptLevel = toLLVM(optlevel); |
| 161 | #endif // ENABLE_RR_DEBUG_INFO |
| 162 | |
| 163 | return targetMachines.getOrCreate(optlevel, [&]() { |
| 164 | return TargetMachineSPtr(llvm::EngineBuilder() |
| 165 | .setOptLevel(llvmOptLevel) |
| 166 | .setMCPU(mcpu) |
| 167 | .setMArch(march) |
| 168 | .setMAttrs(mattrs) |
| 169 | .setTargetOptions(targetOptions) |
| 170 | .selectTarget()); |
| 171 | }); |
| 172 | } |
| 173 | |
| 174 | JITGlobals JITGlobals::create() |
| 175 | { |
| 176 | struct LLVMInitializer |
| 177 | { |
| 178 | LLVMInitializer() |
| 179 | { |
| 180 | llvm::InitializeNativeTarget(); |
| 181 | llvm::InitializeNativeTargetAsmPrinter(); |
| 182 | llvm::InitializeNativeTargetAsmParser(); |
| 183 | } |
| 184 | }; |
| 185 | static LLVMInitializer initializeLLVM; |
| 186 | |
| 187 | auto mcpu = llvm::sys::getHostCPUName(); |
| 188 | |
| 189 | llvm::StringMap<bool> features; |
| 190 | bool ok = llvm::sys::getHostCPUFeatures(features); |
| 191 | |
| 192 | #if defined(__i386__) || defined(__x86_64__) || \ |
| 193 | (defined(__linux__) && (defined(__arm__) || defined(__aarch64__))) |
| 194 | ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false"); |
| 195 | #else |
| 196 | (void)ok; // getHostCPUFeatures always returns false on other platforms |
| 197 | #endif |
| 198 | |
| 199 | std::vector<std::string> mattrs; |
| 200 | for(auto &feature : features) |
| 201 | { |
| 202 | if(feature.second) { mattrs.push_back(feature.first().str()); } |
| 203 | } |
| 204 | |
| 205 | const char *march = nullptr; |
| 206 | #if defined(__x86_64__) |
| 207 | march = "x86-64"; |
| 208 | #elif defined(__i386__) |
| 209 | march = "x86"; |
| 210 | #elif defined(__aarch64__) |
| 211 | march = "arm64"; |
| 212 | #elif defined(__arm__) |
| 213 | march = "arm"; |
| 214 | #elif defined(__mips__) |
| 215 | # if defined(__mips64) |
| 216 | march = "mips64el"; |
| 217 | # else |
| 218 | march = "mipsel"; |
| 219 | # endif |
| 220 | #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
| 221 | march = "ppc64le"; |
| 222 | #else |
| 223 | # error "unknown architecture" |
| 224 | #endif |
| 225 | |
| 226 | llvm::TargetOptions targetOptions; |
| 227 | targetOptions.UnsafeFPMath = false; |
| 228 | |
| 229 | auto targetMachine = std::unique_ptr<llvm::TargetMachine>( |
| 230 | llvm::EngineBuilder() |
| 231 | .setOptLevel(llvm::CodeGenOpt::None) |
| 232 | .setMCPU(mcpu) |
| 233 | .setMArch(march) |
| 234 | .setMAttrs(mattrs) |
| 235 | .setTargetOptions(targetOptions) |
| 236 | .selectTarget()); |
| 237 | |
| 238 | auto dataLayout = targetMachine->createDataLayout(); |
| 239 | |
| 240 | return JITGlobals(mcpu.data(), mattrs, march, targetOptions, dataLayout); |
| 241 | } |
| 242 | |
| 243 | llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level) |
| 244 | { |
| 245 | switch(level) |
| 246 | { |
| 247 | case rr::Optimization::Level::None: return ::llvm::CodeGenOpt::None; |
| 248 | case rr::Optimization::Level::Less: return ::llvm::CodeGenOpt::Less; |
| 249 | case rr::Optimization::Level::Default: return ::llvm::CodeGenOpt::Default; |
| 250 | case rr::Optimization::Level::Aggressive: return ::llvm::CodeGenOpt::Aggressive; |
| 251 | default: UNREACHABLE("Unknown Optimization Level %d", int(level)); |
| 252 | } |
| 253 | return ::llvm::CodeGenOpt::Default; |
| 254 | } |
| 255 | |
| 256 | JITGlobals::JITGlobals(const char *mcpu, |
| 257 | const std::vector<std::string> &mattrs, |
| 258 | const char *march, |
| 259 | const llvm::TargetOptions &targetOptions, |
| 260 | const llvm::DataLayout &dataLayout) |
| 261 | : mcpu(mcpu) |
| 262 | , mattrs(mattrs) |
| 263 | , march(march) |
| 264 | , targetOptions(targetOptions) |
| 265 | , dataLayout(dataLayout) |
| 266 | { |
| 267 | } |
| 268 | |
| 269 | class MemoryMapper : public llvm::SectionMemoryManager::MemoryMapper |
| 270 | { |
| 271 | public: |
| 272 | MemoryMapper() {} |
| 273 | ~MemoryMapper() final {} |
| 274 | |
| 275 | llvm::sys::MemoryBlock allocateMappedMemory( |
| 276 | llvm::SectionMemoryManager::AllocationPurpose purpose, |
| 277 | size_t numBytes, const llvm::sys::MemoryBlock *const nearBlock, |
| 278 | unsigned flags, std::error_code &errorCode) final |
| 279 | { |
| 280 | errorCode = std::error_code(); |
| 281 | |
| 282 | // Round up numBytes to page size. |
| 283 | size_t pageSize = rr::memoryPageSize(); |
| 284 | numBytes = (numBytes + pageSize - 1) & ~(pageSize - 1); |
| 285 | |
| 286 | bool need_exec = |
| 287 | purpose == llvm::SectionMemoryManager::AllocationPurpose::Code; |
| 288 | void *addr = rr::allocateMemoryPages( |
| 289 | numBytes, flagsToPermissions(flags), need_exec); |
| 290 | if(!addr) |
| 291 | return llvm::sys::MemoryBlock(); |
| 292 | return llvm::sys::MemoryBlock(addr, numBytes); |
| 293 | } |
| 294 | |
| 295 | std::error_code protectMappedMemory(const llvm::sys::MemoryBlock &block, |
| 296 | unsigned flags) |
| 297 | { |
| 298 | // Round down base address to align with a page boundary. This matches |
| 299 | // DefaultMMapper behavior. |
| 300 | void *addr = block.base(); |
| 301 | #if LLVM_VERSION_MAJOR >= 9 |
| 302 | size_t size = block.allocatedSize(); |
| 303 | #else |
| 304 | size_t size = block.size(); |
| 305 | #endif |
| 306 | size_t pageSize = rr::memoryPageSize(); |
| 307 | addr = reinterpret_cast<void *>( |
| 308 | reinterpret_cast<uintptr_t>(addr) & ~(pageSize - 1)); |
| 309 | size += reinterpret_cast<uintptr_t>(block.base()) - |
| 310 | reinterpret_cast<uintptr_t>(addr); |
| 311 | |
| 312 | rr::protectMemoryPages(addr, size, flagsToPermissions(flags)); |
| 313 | return std::error_code(); |
| 314 | } |
| 315 | |
| 316 | std::error_code releaseMappedMemory(llvm::sys::MemoryBlock &block) |
| 317 | { |
| 318 | #if LLVM_VERSION_MAJOR >= 9 |
| 319 | size_t size = block.allocatedSize(); |
| 320 | #else |
| 321 | size_t size = block.size(); |
| 322 | #endif |
| 323 | |
| 324 | rr::deallocateMemoryPages(block.base(), size); |
| 325 | return std::error_code(); |
| 326 | } |
| 327 | |
| 328 | private: |
| 329 | int flagsToPermissions(unsigned flags) |
| 330 | { |
| 331 | int result = 0; |
| 332 | if(flags & llvm::sys::Memory::MF_READ) |
| 333 | { |
| 334 | result |= rr::PERMISSION_READ; |
| 335 | } |
| 336 | if(flags & llvm::sys::Memory::MF_WRITE) |
| 337 | { |
| 338 | result |= rr::PERMISSION_WRITE; |
| 339 | } |
| 340 | if(flags & llvm::sys::Memory::MF_EXEC) |
| 341 | { |
| 342 | result |= rr::PERMISSION_EXECUTE; |
| 343 | } |
| 344 | return result; |
| 345 | } |
| 346 | }; |
| 347 | |
| 348 | template<typename T> |
| 349 | T alignUp(T val, T alignment) |
| 350 | { |
| 351 | return alignment * ((val + alignment - 1) / alignment); |
| 352 | } |
| 353 | |
| 354 | void *alignedAlloc(size_t size, size_t alignment) |
| 355 | { |
| 356 | ASSERT(alignment < 256); |
| 357 | auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment]; |
| 358 | auto aligned = allocation; |
| 359 | aligned += sizeof(uint8_t); // Make space for the base-address offset. |
| 360 | aligned = reinterpret_cast<uint8_t *>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment)); // align |
| 361 | auto offset = static_cast<uint8_t>(aligned - allocation); |
| 362 | aligned[-1] = offset; |
| 363 | return aligned; |
| 364 | } |
| 365 | |
| 366 | void alignedFree(void *ptr) |
| 367 | { |
| 368 | auto aligned = reinterpret_cast<uint8_t *>(ptr); |
| 369 | auto offset = aligned[-1]; |
| 370 | auto allocation = aligned - offset; |
| 371 | delete[] allocation; |
| 372 | } |
| 373 | |
| 374 | template<typename T> |
| 375 | static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering) |
| 376 | { |
| 377 | *reinterpret_cast<T *>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), rr::atomicOrdering(ordering)); |
| 378 | } |
| 379 | |
| 380 | template<typename T> |
| 381 | static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering) |
| 382 | { |
| 383 | std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), *reinterpret_cast<T *>(val), rr::atomicOrdering(ordering)); |
| 384 | } |
| 385 | |
| 386 | #ifdef __ANDROID__ |
| 387 | template<typename F> |
| 388 | static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f) |
| 389 | { |
| 390 | // Build an arbitrary op out of looped CAS |
| 391 | for(;;) |
| 392 | { |
| 393 | uint32_t expected = *ptr; |
| 394 | uint32_t desired = f(expected, val); |
| 395 | |
| 396 | if(expected == __sync_val_compare_and_swap_4(ptr, expected, desired)) |
| 397 | { |
| 398 | return expected; |
| 399 | } |
| 400 | } |
| 401 | } |
| 402 | #endif |
| 403 | |
| 404 | void *resolveExternalSymbol(const char *name) |
| 405 | { |
| 406 | struct Atomic |
| 407 | { |
| 408 | static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering) |
| 409 | { |
| 410 | switch(size) |
| 411 | { |
| 412 | case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break; |
| 413 | case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break; |
| 414 | case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break; |
| 415 | case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break; |
| 416 | default: |
Ben Clayton | ce54c59 | 2020-02-07 11:30:51 +0000 | [diff] [blame^] | 417 | UNIMPLEMENTED_NO_BUG("Atomic::load(size: %d)", int(size)); |
Nicolas Capens | 41a7302 | 2020-01-30 00:30:14 -0500 | [diff] [blame] | 418 | } |
| 419 | } |
| 420 | static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering) |
| 421 | { |
| 422 | switch(size) |
| 423 | { |
| 424 | case 1: atomicStore<uint8_t>(ptr, ret, ordering); break; |
| 425 | case 2: atomicStore<uint16_t>(ptr, ret, ordering); break; |
| 426 | case 4: atomicStore<uint32_t>(ptr, ret, ordering); break; |
| 427 | case 8: atomicStore<uint64_t>(ptr, ret, ordering); break; |
| 428 | default: |
Ben Clayton | ce54c59 | 2020-02-07 11:30:51 +0000 | [diff] [blame^] | 429 | UNIMPLEMENTED_NO_BUG("Atomic::store(size: %d)", int(size)); |
Nicolas Capens | 41a7302 | 2020-01-30 00:30:14 -0500 | [diff] [blame] | 430 | } |
| 431 | } |
| 432 | }; |
| 433 | |
| 434 | struct F |
| 435 | { |
| 436 | static void nop() {} |
| 437 | static void neverCalled() { UNREACHABLE("Should never be called"); } |
| 438 | |
| 439 | static void *coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); } |
| 440 | static void coroutine_free_frame(void *ptr) { alignedFree(ptr); } |
| 441 | |
| 442 | #ifdef __ANDROID__ |
| 443 | // forwarders since we can't take address of builtins |
| 444 | static void sync_synchronize() { __sync_synchronize(); } |
| 445 | static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); } |
| 446 | static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); } |
| 447 | static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); } |
| 448 | static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); } |
| 449 | static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); } |
| 450 | static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); } |
| 451 | static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); } |
| 452 | |
| 453 | static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val) |
| 454 | { |
| 455 | return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a, b); }); |
| 456 | } |
| 457 | static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val) |
| 458 | { |
| 459 | return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a, b); }); |
| 460 | } |
| 461 | static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val) |
| 462 | { |
| 463 | return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a, b); }); |
| 464 | } |
| 465 | static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val) |
| 466 | { |
| 467 | return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a, b); }); |
| 468 | } |
| 469 | #endif |
| 470 | }; |
| 471 | |
| 472 | class Resolver |
| 473 | { |
| 474 | public: |
| 475 | using FunctionMap = std::unordered_map<std::string, void *>; |
| 476 | |
| 477 | FunctionMap functions; |
| 478 | |
| 479 | Resolver() |
| 480 | { |
| 481 | functions.emplace("nop", reinterpret_cast<void *>(F::nop)); |
| 482 | functions.emplace("floorf", reinterpret_cast<void *>(floorf)); |
| 483 | functions.emplace("nearbyintf", reinterpret_cast<void *>(nearbyintf)); |
| 484 | functions.emplace("truncf", reinterpret_cast<void *>(truncf)); |
| 485 | functions.emplace("printf", reinterpret_cast<void *>(printf)); |
| 486 | functions.emplace("puts", reinterpret_cast<void *>(puts)); |
| 487 | functions.emplace("fmodf", reinterpret_cast<void *>(fmodf)); |
| 488 | |
| 489 | functions.emplace("sinf", reinterpret_cast<void *>(sinf)); |
| 490 | functions.emplace("cosf", reinterpret_cast<void *>(cosf)); |
| 491 | functions.emplace("asinf", reinterpret_cast<void *>(asinf)); |
| 492 | functions.emplace("acosf", reinterpret_cast<void *>(acosf)); |
| 493 | functions.emplace("atanf", reinterpret_cast<void *>(atanf)); |
| 494 | functions.emplace("sinhf", reinterpret_cast<void *>(sinhf)); |
| 495 | functions.emplace("coshf", reinterpret_cast<void *>(coshf)); |
| 496 | functions.emplace("tanhf", reinterpret_cast<void *>(tanhf)); |
| 497 | functions.emplace("asinhf", reinterpret_cast<void *>(asinhf)); |
| 498 | functions.emplace("acoshf", reinterpret_cast<void *>(acoshf)); |
| 499 | functions.emplace("atanhf", reinterpret_cast<void *>(atanhf)); |
| 500 | functions.emplace("atan2f", reinterpret_cast<void *>(atan2f)); |
| 501 | functions.emplace("powf", reinterpret_cast<void *>(powf)); |
| 502 | functions.emplace("expf", reinterpret_cast<void *>(expf)); |
| 503 | functions.emplace("logf", reinterpret_cast<void *>(logf)); |
| 504 | functions.emplace("exp2f", reinterpret_cast<void *>(exp2f)); |
| 505 | functions.emplace("log2f", reinterpret_cast<void *>(log2f)); |
| 506 | |
| 507 | functions.emplace("sin", reinterpret_cast<void *>(static_cast<double (*)(double)>(sin))); |
| 508 | functions.emplace("cos", reinterpret_cast<void *>(static_cast<double (*)(double)>(cos))); |
| 509 | functions.emplace("asin", reinterpret_cast<void *>(static_cast<double (*)(double)>(asin))); |
| 510 | functions.emplace("acos", reinterpret_cast<void *>(static_cast<double (*)(double)>(acos))); |
| 511 | functions.emplace("atan", reinterpret_cast<void *>(static_cast<double (*)(double)>(atan))); |
| 512 | functions.emplace("sinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(sinh))); |
| 513 | functions.emplace("cosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(cosh))); |
| 514 | functions.emplace("tanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(tanh))); |
| 515 | functions.emplace("asinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(asinh))); |
| 516 | functions.emplace("acosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(acosh))); |
| 517 | functions.emplace("atanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(atanh))); |
| 518 | functions.emplace("atan2", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(atan2))); |
| 519 | functions.emplace("pow", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(pow))); |
| 520 | functions.emplace("exp", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp))); |
| 521 | functions.emplace("log", reinterpret_cast<void *>(static_cast<double (*)(double)>(log))); |
| 522 | functions.emplace("exp2", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp2))); |
| 523 | functions.emplace("log2", reinterpret_cast<void *>(static_cast<double (*)(double)>(log2))); |
| 524 | |
| 525 | functions.emplace("atomic_load", reinterpret_cast<void *>(Atomic::load)); |
| 526 | functions.emplace("atomic_store", reinterpret_cast<void *>(Atomic::store)); |
| 527 | |
| 528 | // FIXME(b/119409619): use an allocator here so we can control all memory allocations |
| 529 | functions.emplace("coroutine_alloc_frame", reinterpret_cast<void *>(F::coroutine_alloc_frame)); |
| 530 | functions.emplace("coroutine_free_frame", reinterpret_cast<void *>(F::coroutine_free_frame)); |
| 531 | |
| 532 | #ifdef __APPLE__ |
| 533 | functions.emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret)); |
| 534 | #elif defined(__linux__) |
| 535 | functions.emplace("sincosf", reinterpret_cast<void *>(sincosf)); |
| 536 | #elif defined(_WIN64) |
| 537 | functions.emplace("chkstk", reinterpret_cast<void *>(__chkstk)); |
| 538 | #elif defined(_WIN32) |
| 539 | functions.emplace("chkstk", reinterpret_cast<void *>(_chkstk)); |
| 540 | #endif |
| 541 | |
| 542 | #ifdef __ANDROID__ |
| 543 | functions.emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void *>(F::neverCalled)); |
| 544 | functions.emplace("sync_synchronize", reinterpret_cast<void *>(F::sync_synchronize)); |
| 545 | functions.emplace("sync_fetch_and_add_4", reinterpret_cast<void *>(F::sync_fetch_and_add_4)); |
| 546 | functions.emplace("sync_fetch_and_and_4", reinterpret_cast<void *>(F::sync_fetch_and_and_4)); |
| 547 | functions.emplace("sync_fetch_and_or_4", reinterpret_cast<void *>(F::sync_fetch_and_or_4)); |
| 548 | functions.emplace("sync_fetch_and_xor_4", reinterpret_cast<void *>(F::sync_fetch_and_xor_4)); |
| 549 | functions.emplace("sync_fetch_and_sub_4", reinterpret_cast<void *>(F::sync_fetch_and_sub_4)); |
| 550 | functions.emplace("sync_lock_test_and_set_4", reinterpret_cast<void *>(F::sync_lock_test_and_set_4)); |
| 551 | functions.emplace("sync_val_compare_and_swap_4", reinterpret_cast<void *>(F::sync_val_compare_and_swap_4)); |
| 552 | functions.emplace("sync_fetch_and_max_4", reinterpret_cast<void *>(F::sync_fetch_and_max_4)); |
| 553 | functions.emplace("sync_fetch_and_min_4", reinterpret_cast<void *>(F::sync_fetch_and_min_4)); |
| 554 | functions.emplace("sync_fetch_and_umax_4", reinterpret_cast<void *>(F::sync_fetch_and_umax_4)); |
| 555 | functions.emplace("sync_fetch_and_umin_4", reinterpret_cast<void *>(F::sync_fetch_and_umin_4)); |
| 556 | #endif |
Antonio Maiorano | dd48b7e | 2020-02-05 13:17:07 -0500 | [diff] [blame] | 557 | #if __has_feature(memory_sanitizer) |
| 558 | functions.emplace("msan_unpoison", reinterpret_cast<void *>(__msan_unpoison)); |
| 559 | #endif |
Nicolas Capens | 41a7302 | 2020-01-30 00:30:14 -0500 | [diff] [blame] | 560 | } |
| 561 | }; |
| 562 | |
| 563 | static Resolver resolver; |
| 564 | |
| 565 | // Trim off any underscores from the start of the symbol. LLVM likes |
| 566 | // to append these on macOS. |
| 567 | const char *trimmed = name; |
| 568 | while(trimmed[0] == '_') { trimmed++; } |
| 569 | |
| 570 | auto it = resolver.functions.find(trimmed); |
| 571 | // Missing functions will likely make the module fail in exciting non-obvious ways. |
| 572 | ASSERT_MSG(it != resolver.functions.end(), "Missing external function: '%s'", name); |
| 573 | return it->second; |
| 574 | } |
| 575 | |
| 576 | // JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and |
| 577 | // object layer as each routine may require different target machine |
| 578 | // settings and no Reactor routine directly links against another. |
| 579 | class JITRoutine : public rr::Routine |
| 580 | { |
| 581 | #if LLVM_VERSION_MAJOR >= 8 |
| 582 | using ObjLayer = llvm::orc::LegacyRTDyldObjectLinkingLayer; |
| 583 | using CompileLayer = llvm::orc::LegacyIRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>; |
| 584 | #else |
| 585 | using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer; |
| 586 | using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>; |
| 587 | #endif |
| 588 | |
| 589 | public: |
| 590 | JITRoutine( |
| 591 | std::unique_ptr<llvm::Module> module, |
| 592 | llvm::Function **funcs, |
| 593 | size_t count, |
| 594 | const rr::Config &config) |
| 595 | : resolver(createLegacyLookupResolver( |
| 596 | session, |
| 597 | [&](const llvm::StringRef &name) { |
| 598 | void *func = resolveExternalSymbol(name.str().c_str()); |
| 599 | if(func != nullptr) |
| 600 | { |
| 601 | return llvm::JITSymbol( |
| 602 | reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute); |
| 603 | } |
| 604 | return objLayer.findSymbol(name, true); |
| 605 | }, |
| 606 | [](llvm::Error err) { |
| 607 | if(err) |
| 608 | { |
| 609 | // TODO: Log the symbol resolution errors. |
| 610 | return; |
| 611 | } |
| 612 | })) |
| 613 | , targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel())) |
| 614 | , compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)) |
| 615 | , objLayer( |
| 616 | session, |
| 617 | [this](llvm::orc::VModuleKey) { |
| 618 | return ObjLayer::Resources{ std::make_shared<llvm::SectionMemoryManager>(&memoryMapper), resolver }; |
| 619 | }, |
| 620 | ObjLayer::NotifyLoadedFtor(), |
| 621 | [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) { |
| 622 | #ifdef ENABLE_RR_DEBUG_INFO |
| 623 | rr::DebugInfo::NotifyObjectEmitted(Obj, L); |
| 624 | #endif // ENABLE_RR_DEBUG_INFO |
| 625 | }, |
| 626 | [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) { |
| 627 | #ifdef ENABLE_RR_DEBUG_INFO |
| 628 | rr::DebugInfo::NotifyFreeingObject(Obj); |
| 629 | #endif // ENABLE_RR_DEBUG_INFO |
| 630 | }) |
| 631 | , addresses(count) |
| 632 | { |
| 633 | std::vector<std::string> mangledNames(count); |
| 634 | for(size_t i = 0; i < count; i++) |
| 635 | { |
| 636 | auto func = funcs[i]; |
| 637 | static size_t numEmittedFunctions = 0; |
| 638 | std::string name = "f" + llvm::Twine(numEmittedFunctions++).str(); |
| 639 | func->setName(name); |
| 640 | func->setLinkage(llvm::GlobalValue::ExternalLinkage); |
| 641 | func->setDoesNotThrow(); |
| 642 | |
| 643 | llvm::raw_string_ostream mangledNameStream(mangledNames[i]); |
| 644 | llvm::Mangler::getNameWithPrefix(mangledNameStream, name, JITGlobals::get()->dataLayout); |
| 645 | } |
| 646 | |
| 647 | auto moduleKey = session.allocateVModule(); |
| 648 | |
| 649 | // Once the module is passed to the compileLayer, the |
| 650 | // llvm::Functions are freed. Make sure funcs are not referenced |
| 651 | // after this point. |
| 652 | funcs = nullptr; |
| 653 | |
| 654 | llvm::cantFail(compileLayer.addModule(moduleKey, std::move(module))); |
| 655 | |
| 656 | // Resolve the function addresses. |
| 657 | for(size_t i = 0; i < count; i++) |
| 658 | { |
| 659 | auto symbol = compileLayer.findSymbolIn(moduleKey, mangledNames[i], false); |
| 660 | if(auto address = symbol.getAddress()) |
| 661 | { |
| 662 | addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(address.get())); |
| 663 | } |
| 664 | } |
| 665 | } |
| 666 | |
| 667 | const void *getEntry(int index) const override |
| 668 | { |
| 669 | return addresses[index]; |
| 670 | } |
| 671 | |
| 672 | private: |
| 673 | std::shared_ptr<llvm::orc::SymbolResolver> resolver; |
| 674 | std::shared_ptr<llvm::TargetMachine> targetMachine; |
| 675 | llvm::orc::ExecutionSession session; |
| 676 | CompileLayer compileLayer; |
| 677 | MemoryMapper memoryMapper; |
| 678 | ObjLayer objLayer; |
| 679 | std::vector<const void *> addresses; |
| 680 | }; |
| 681 | |
| 682 | } // anonymous namespace |
| 683 | |
| 684 | namespace rr { |
| 685 | |
| 686 | JITBuilder::JITBuilder(const rr::Config &config) |
| 687 | : config(config) |
| 688 | , module(new llvm::Module("", context)) |
| 689 | , builder(new llvm::IRBuilder<>(context)) |
| 690 | { |
| 691 | module->setDataLayout(JITGlobals::get()->dataLayout); |
| 692 | } |
| 693 | |
| 694 | void JITBuilder::optimize(const rr::Config &cfg) |
| 695 | { |
| 696 | |
| 697 | #ifdef ENABLE_RR_DEBUG_INFO |
| 698 | if(debugInfo != nullptr) |
| 699 | { |
| 700 | return; // Don't optimize if we're generating debug info. |
| 701 | } |
| 702 | #endif // ENABLE_RR_DEBUG_INFO |
| 703 | |
| 704 | std::unique_ptr<llvm::legacy::PassManager> passManager( |
| 705 | new llvm::legacy::PassManager()); |
| 706 | |
| 707 | for(auto pass : cfg.getOptimization().getPasses()) |
| 708 | { |
| 709 | switch(pass) |
| 710 | { |
| 711 | case rr::Optimization::Pass::Disabled: break; |
| 712 | case rr::Optimization::Pass::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break; |
| 713 | case rr::Optimization::Pass::LICM: passManager->add(llvm::createLICMPass()); break; |
| 714 | case rr::Optimization::Pass::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break; |
| 715 | case rr::Optimization::Pass::GVN: passManager->add(llvm::createGVNPass()); break; |
| 716 | case rr::Optimization::Pass::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break; |
| 717 | case rr::Optimization::Pass::Reassociate: passManager->add(llvm::createReassociatePass()); break; |
| 718 | case rr::Optimization::Pass::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break; |
| 719 | case rr::Optimization::Pass::SCCP: passManager->add(llvm::createSCCPPass()); break; |
| 720 | case rr::Optimization::Pass::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break; |
| 721 | case rr::Optimization::Pass::EarlyCSEPass: passManager->add(llvm::createEarlyCSEPass()); break; |
| 722 | default: |
| 723 | UNREACHABLE("pass: %d", int(pass)); |
| 724 | } |
| 725 | } |
| 726 | |
| 727 | passManager->run(*module); |
| 728 | } |
| 729 | |
| 730 | std::shared_ptr<rr::Routine> JITBuilder::acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg) |
| 731 | { |
| 732 | ASSERT(module); |
| 733 | return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg); |
| 734 | } |
| 735 | |
| 736 | } // namespace rr |