blob: 09d3ff34707c7f5ec8438eb0f88c7ac58f238500 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano9c14bda2020-09-18 16:33:36 -040016#include "EmulatedIntrinsics.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050017#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000018#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050019#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040020
Nicolas Capens1a3ce872018-10-10 10:42:36 -040021#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000022#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040023
Nicolas Capens598f8d82016-09-26 15:09:10 -040024#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000027#include "src/IceELFStreamer.h"
28#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040029#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031
Ben Clayton713b8d32019-12-17 20:37:56 +000032#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040033#include "llvm/Support/FileSystem.h"
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -050034#include "llvm/Support/ManagedStatic.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040035#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040036
Antonio Maiorano8bce0672020-02-28 13:13:45 -050037#include "marl/event.h"
38
Nicolas Capens6a990f82018-07-06 15:54:07 -040039#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000040# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040041#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040042
Nicolas Capensbd65da92017-01-05 16:31:06 -050043#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000044# ifndef WIN32_LEAN_AND_MEAN
45# define WIN32_LEAN_AND_MEAN
46# endif // !WIN32_LEAN_AND_MEAN
47# ifndef NOMINMAX
48# define NOMINMAX
49# endif // !NOMINMAX
50# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050051#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040052
Ben Clayton683bad82020-02-10 23:57:09 +000053#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040054#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000055#include <limits>
56#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040057
Antonio Maiorano02a39532020-01-21 15:15:34 -050058// Subzero utility functions
59// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050060namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050061namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050062
63Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
64{
65 uint32_t sequenceNumber = 0;
Nicolas Capensff010f92021-02-01 12:22:53 -050066 auto *function = Ice::Cfg::create(context, sequenceNumber).release();
67
68 function->setStackSizeLimit(512 * 1024); // 512 KiB
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050069
70 Ice::CfgLocalAllocatorScope allocScope{ function };
71
72 for(auto type : paramTypes)
73 {
74 Ice::Variable *arg = function->makeVariable(type);
75 function->addArg(arg);
76 }
77
78 Ice::CfgNode *node = function->makeNode();
79 function->setEntryNode(node);
80
81 return function;
82}
83
84Ice::Type getPointerType(Ice::Type elementType)
85{
86 if(sizeof(void *) == 8)
87 {
88 return Ice::IceType_i64;
89 }
90 else
91 {
92 return Ice::IceType_i32;
93 }
94}
95
96Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
97{
98 int typeSize = Ice::typeWidthInBytes(type);
99 int totalSize = typeSize * (arraySize ? arraySize : 1);
100
101 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
102 auto address = function->makeVariable(getPointerType(type));
Nicolas Capens0cfc0432021-02-05 15:18:42 -0500103 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize); // SRoA depends on the alignment to match the type size.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500104 function->getEntryNode()->getInsts().push_front(alloca);
105
106 return address;
107}
108
109Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500110{
111 if(sizeof(void *) == 8)
112 {
113 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
114 }
115 else
116 {
117 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
118 }
119}
120
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400121// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
122Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500123
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400124// Wrapper for calls on C functions with Ice types
125Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
126{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500127 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400128
129 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
130 // then truncate result to bool.
131 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
132 const bool returningBool = (retTy == Ice::IceType_i1);
133 if(returningBool)
134 {
135 ret = function->makeVariable(Ice::IceType_i32);
136 }
137 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500138 {
139 ret = function->makeVariable(retTy);
140 }
141
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400142 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500143 for(auto arg : iceArgs)
144 {
145 call->addArg(arg);
146 }
147
148 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400149
150 if(returningBool)
151 {
152 // Truncate result to bool so that if any (lsb) bits were set, result will be true
153 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
154 }
155
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500156 return ret;
157}
158
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400159Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
160{
161 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
162 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
163}
164
Antonio Maiorano62427e02020-02-13 09:18:05 -0500165// Wrapper for calls on C functions with Ice types
166template<typename Return, typename... CArgs, typename... RArgs>
Nicolas Capens629bf952022-01-18 15:08:14 -0500167Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&...args)
Antonio Maiorano62427e02020-02-13 09:18:05 -0500168{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400169 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
170
Nicolas Capens519cf222020-05-08 15:27:19 -0400171 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500172 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500173 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500174}
175
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400176Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
177{
178 Ice::Variable *to = function->makeVariable(toType);
179 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
180 basicBlock->appendInst(cast);
181 return to;
182}
183
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500184Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500185{
Antonio Maiorano02a39532020-01-21 15:15:34 -0500186 Ice::Variable *result = function->makeVariable(type);
187 auto load = Ice::InstLoad::create(function, result, ptr, align);
188 basicBlock->appendInst(load);
189
190 return result;
191}
192
193} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500194} // namespace
195
Ben Clayton713b8d32019-12-17 20:37:56 +0000196namespace rr {
197class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500198class CoroutineGenerator;
199} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500200
201namespace {
202
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -0500203// Used to automatically invoke llvm_shutdown() when driver is unloaded
204llvm::llvm_shutdown_obj llvmShutdownObj;
205
Nicolas Capens157ba262019-12-10 17:49:14 -0500206// Default configuration settings. Must be accessed under mutex lock.
207std::mutex defaultConfigLock;
208rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000209{
Nicolas Capens157ba262019-12-10 17:49:14 -0500210 // This uses a static in a function to avoid the cost of a global static
211 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
212 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000213 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500214 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000215}
216
Nicolas Capens157ba262019-12-10 17:49:14 -0500217Ice::GlobalContext *context = nullptr;
218Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400219Ice::CfgNode *entryBlock = nullptr;
220Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500221Ice::CfgNode *basicBlock = nullptr;
222Ice::CfgLocalAllocatorScope *allocator = nullptr;
223rr::ELFMemoryStreamer *routine = nullptr;
224
225std::mutex codegenMutex;
226
227Ice::ELFFileStreamer *elfFile = nullptr;
228Ice::Fdstream *out = nullptr;
229
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500230// Coroutine globals
231rr::Type *coroYieldType = nullptr;
232std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500233marl::Scheduler &getOrCreateScheduler()
234{
235 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100236 marl::Scheduler::Config cfg;
237 cfg.setWorkerThreadCount(8);
238 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500239 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500240
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500241 return *scheduler;
242}
Nicolas Capens54313fb2021-02-19 14:26:27 -0500243
244rr::Nucleus::OptimizerCallback *optimizerCallback = nullptr;
245
Nicolas Capens157ba262019-12-10 17:49:14 -0500246} // Anonymous namespace
247
248namespace {
249
250#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000251# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500252#endif
253
Ben Clayton713b8d32019-12-17 20:37:56 +0000254#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
255# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500256#endif
257
Antonio Maiorano370cba52019-12-31 11:36:07 -0500258Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400259{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500260 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100261 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500262 // Note that Opt_0 and Opt_1 are not implemented by Subzero
263 case rr::Optimization::Level::None: return Ice::Opt_m1;
264 case rr::Optimization::Level::Less: return Ice::Opt_m1;
265 case rr::Optimization::Level::Default: return Ice::Opt_2;
266 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
267 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100268 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500269 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400270}
271
Antonio Maiorano370cba52019-12-31 11:36:07 -0500272Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
273{
274 switch(memoryOrder)
275 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500276 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
277 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
278 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
279 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
280 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
281 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
Antonio Maiorano370cba52019-12-31 11:36:07 -0500282 }
283 return Ice::Intrinsics::MemoryOrderInvalid;
284}
285
Nicolas Capens157ba262019-12-10 17:49:14 -0500286class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500287{
Nicolas Capens157ba262019-12-10 17:49:14 -0500288public:
289 const static bool ARM;
290 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400291
Nicolas Capens157ba262019-12-10 17:49:14 -0500292private:
293 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100294 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000295#if defined(__i386__) || defined(__x86_64__)
296# if defined(_WIN32)
297 __cpuid(registers, info);
298# else
299 __asm volatile("cpuid"
300 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
301 : "a"(info));
302# endif
303#else
304 registers[0] = 0;
305 registers[1] = 0;
306 registers[2] = 0;
307 registers[3] = 0;
308#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100309 }
310
Sean Risser46a649d2021-08-30 15:44:33 -0400311 constexpr static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500312 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000313#if defined(__arm__) || defined(__aarch64__)
314 return true;
315#elif defined(__i386__) || defined(__x86_64__)
316 return false;
317#elif defined(__mips__)
318 return false;
319#else
320# error "Unknown architecture"
321#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500322 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500323
Nicolas Capens157ba262019-12-10 17:49:14 -0500324 static bool detectSSE4_1()
325 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000326#if defined(__i386__) || defined(__x86_64__)
327 int registers[4];
328 cpuid(registers, 1);
329 return (registers[2] & 0x00080000) != 0;
330#else
331 return false;
332#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500333 }
334};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500335
Sean Risser46a649d2021-08-30 15:44:33 -0400336constexpr bool CPUID::ARM = CPUID::detectARM();
Nicolas Capens157ba262019-12-10 17:49:14 -0500337const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
Sean Risser46a649d2021-08-30 15:44:33 -0400338constexpr bool emulateIntrinsics = false;
339constexpr bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400340
Nicolas Capens157ba262019-12-10 17:49:14 -0500341constexpr bool subzeroDumpEnabled = false;
342constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500343
344#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500345static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
346static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500347#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500348
349} // anonymous namespace
350
351namespace rr {
352
Nicolas Capens70505b42022-01-31 22:29:48 -0500353std::string Caps::backendName()
Antonio Maioranoab210f92019-12-13 16:26:24 -0500354{
355 return "Subzero";
356}
357
Nicolas Capens70505b42022-01-31 22:29:48 -0500358bool Caps::coroutinesSupported()
359{
360 return true;
361}
362
363bool Caps::fmaIsFast()
364{
365 // TODO(b/214591655): Subzero currently never emits FMA instructions. std::fma() is called instead.
366 return false;
367}
Nicolas Capens157ba262019-12-10 17:49:14 -0500368
369enum EmulatedType
370{
371 EmulatedShift = 16,
372 EmulatedV2 = 2 << EmulatedShift,
373 EmulatedV4 = 4 << EmulatedShift,
374 EmulatedV8 = 8 << EmulatedShift,
375 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
376
377 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
378 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
379 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000380 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
381 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500382 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
383};
384
Ben Clayton713b8d32019-12-17 20:37:56 +0000385class Value : public Ice::Operand
386{};
387class SwitchCases : public Ice::InstSwitch
388{};
389class BasicBlock : public Ice::CfgNode
390{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500391
392Ice::Type T(Type *t)
393{
394 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
395 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500396}
397
Nicolas Capens157ba262019-12-10 17:49:14 -0500398Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400399{
Ben Clayton713b8d32019-12-17 20:37:56 +0000400 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500401}
402
403Type *T(EmulatedType t)
404{
Ben Clayton713b8d32019-12-17 20:37:56 +0000405 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500406}
407
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500408std::vector<Ice::Type> T(const std::vector<Type *> &types)
409{
410 std::vector<Ice::Type> result;
411 result.reserve(types.size());
412 for(auto &t : types)
413 {
414 result.push_back(T(t));
415 }
416 return result;
417}
418
Nicolas Capens157ba262019-12-10 17:49:14 -0500419Value *V(Ice::Operand *v)
420{
Ben Clayton713b8d32019-12-17 20:37:56 +0000421 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500422}
423
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500424Ice::Operand *V(Value *v)
425{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500426 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500427}
428
Antonio Maiorano62427e02020-02-13 09:18:05 -0500429std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
430{
431 std::vector<Ice::Operand *> result;
432 result.reserve(values.size());
433 for(auto &v : values)
434 {
435 result.push_back(V(v));
436 }
437 return result;
438}
439
Nicolas Capens157ba262019-12-10 17:49:14 -0500440BasicBlock *B(Ice::CfgNode *b)
441{
Ben Clayton713b8d32019-12-17 20:37:56 +0000442 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500443}
444
445static size_t typeSize(Type *type)
446{
447 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400448 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500449 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800450 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500451 case Type_v2i32: return 8;
452 case Type_v4i16: return 8;
453 case Type_v2i16: return 4;
454 case Type_v8i8: return 8;
455 case Type_v4i8: return 4;
456 case Type_v2f32: return 8;
457 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500458 }
459 }
460
461 return Ice::typeWidthInBytes(T(type));
462}
463
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400464static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500465{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400466 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500467 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
468 {
469 Nucleus::createRetVoid();
470 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400471
472 // Connect the entry block to the top of the initial basic block
473 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
474 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500475}
476
Ben Clayton713b8d32019-12-17 20:37:56 +0000477using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
478using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500479
480inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
481{
Ben Clayton713b8d32019-12-17 20:37:56 +0000482 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500483}
484
485inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
486{
487 return &sectionHeader(elfHeader)[index];
488}
489
490static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
491{
492 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
493
494 uint32_t index = relocation.getSymbol();
495 int table = relocationTable.sh_link;
496 void *symbolValue = nullptr;
497
498 if(index != SHN_UNDEF)
499 {
500 if(table == SHN_UNDEF) return nullptr;
501 const SectionHeader *symbolTable = elfSection(elfHeader, table);
502
503 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
504 if(index >= symtab_entries)
505 {
506 ASSERT(index < symtab_entries && "Symbol Index out of range");
507 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800508 }
509
Nicolas Capens157ba262019-12-10 17:49:14 -0500510 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000511 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500512 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800513
Nicolas Capens157ba262019-12-10 17:49:14 -0500514 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400515 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500516 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000517 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400518 }
519 else
520 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500521 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400522 }
Nicolas Capens66478362016-10-13 15:36:36 -0400523 }
524
Nicolas Capens157ba262019-12-10 17:49:14 -0500525 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000526 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500527
528 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400529 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400530 switch(relocation.getType())
531 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500532 case R_ARM_NONE:
533 // No relocation
534 break;
535 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500536 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000537 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500538 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
539 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
540 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400541 break;
Nicolas Capens112faf42019-12-13 17:32:26 -0500542 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500543 {
544 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
545 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
546 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400547 break;
Nicolas Capens112faf42019-12-13 17:32:26 -0500548 default:
549 ASSERT(false && "Unsupported relocation type");
550 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400551 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500552 }
553 else
554 {
555 switch(relocation.getType())
556 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500557 case R_386_NONE:
558 // No relocation
559 break;
560 case R_386_32:
561 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
562 break;
563 case R_386_PC32:
564 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
565 break;
566 default:
567 ASSERT(false && "Unsupported relocation type");
568 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500569 }
Nicolas Capens66478362016-10-13 15:36:36 -0400570 }
571
Nicolas Capens157ba262019-12-10 17:49:14 -0500572 return symbolValue;
573}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400574
Nicolas Capens157ba262019-12-10 17:49:14 -0500575static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
576{
577 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
578
579 uint32_t index = relocation.getSymbol();
580 int table = relocationTable.sh_link;
581 void *symbolValue = nullptr;
582
583 if(index != SHN_UNDEF)
584 {
585 if(table == SHN_UNDEF) return nullptr;
586 const SectionHeader *symbolTable = elfSection(elfHeader, table);
587
588 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
589 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400590 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500591 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400592 return nullptr;
593 }
594
Nicolas Capens157ba262019-12-10 17:49:14 -0500595 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000596 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400598
Nicolas Capens157ba262019-12-10 17:49:14 -0500599 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400600 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500601 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000602 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 }
604 else
605 {
606 return nullptr;
607 }
608 }
Nicolas Capens66478362016-10-13 15:36:36 -0400609
Nicolas Capens157ba262019-12-10 17:49:14 -0500610 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000611 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
612 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400613
Nicolas Capens157ba262019-12-10 17:49:14 -0500614 switch(relocation.getType())
615 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500616 case R_X86_64_NONE:
617 // No relocation
618 break;
619 case R_X86_64_64:
620 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
621 break;
622 case R_X86_64_PC32:
623 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
624 break;
625 case R_X86_64_32S:
626 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
627 break;
628 default:
629 ASSERT(false && "Unsupported relocation type");
630 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500631 }
632
633 return symbolValue;
634}
635
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400636struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500637{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400638 const void *entry;
639 size_t codeSize = 0;
640};
641
642std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
643{
644 ASSERT(functionNames.size() > 0);
645 std::vector<EntryPoint> entryPoints(functionNames.size());
646
Ben Clayton713b8d32019-12-17 20:37:56 +0000647 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500648
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400649 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500650 if(!elfHeader->checkMagic())
651 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400652 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500653 }
654
655 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000656 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
657#if defined(__i386__)
658 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
659#elif defined(__x86_64__)
660 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
661#elif defined(__arm__)
662 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
663#elif defined(__aarch64__)
664 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
665#elif defined(__mips__)
666 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
667#else
668# error "Unsupported platform"
669#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500670
Ben Clayton713b8d32019-12-17 20:37:56 +0000671 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500672
673 for(int i = 0; i < elfHeader->e_shnum; i++)
674 {
675 if(sectionHeader[i].sh_type == SHT_PROGBITS)
676 {
677 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
678 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400679 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500680 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400681 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500682
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400683 for(size_t j = 0; j < functionNames.size(); ++j)
684 {
685 if(strstr(sectionName, functionNames[j]) != nullptr)
686 {
687 return j;
688 }
689 }
690
691 UNREACHABLE("Failed to find executable section that matches input function names");
692 return static_cast<size_t>(-1);
693 };
694
695 size_t index = findSectionNameEntryIndex();
696 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
697 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400698 }
699 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500700 else if(sectionHeader[i].sh_type == SHT_REL)
701 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000702 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400703
Nicolas Capens157ba262019-12-10 17:49:14 -0500704 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
705 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000706 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500707 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
708 }
709 }
710 else if(sectionHeader[i].sh_type == SHT_RELA)
711 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000712 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500713
714 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
715 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000716 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500717 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
718 }
719 }
720 }
721
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400722 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500723}
724
725template<typename T>
726struct ExecutableAllocator
727{
728 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000729 template<class U>
730 ExecutableAllocator(const ExecutableAllocator<U> &other)
731 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500732
733 using value_type = T;
734 using size_type = std::size_t;
735
736 T *allocate(size_type n)
737 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000738 return (T *)allocateMemoryPages(
739 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500740 }
741
742 void deallocate(T *p, size_type n)
743 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800744 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500745 }
746};
747
748class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
749{
750 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
751 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
752
753public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000754 ELFMemoryStreamer()
755 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500756 {
757 position = 0;
758 buffer.reserve(0x1000);
759 }
760
761 ~ELFMemoryStreamer() override
762 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500763 }
764
765 void write8(uint8_t Value) override
766 {
767 if(position == (uint64_t)buffer.size())
768 {
769 buffer.push_back(Value);
770 position++;
771 }
772 else if(position < (uint64_t)buffer.size())
773 {
774 buffer[position] = Value;
775 position++;
776 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000777 else
778 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500779 }
780
781 void writeBytes(llvm::StringRef Bytes) override
782 {
783 std::size_t oldSize = buffer.size();
784 buffer.resize(oldSize + Bytes.size());
785 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
786 position += Bytes.size();
787 }
788
789 uint64_t tell() const override { return position; }
790
791 void seek(uint64_t Off) override { position = Off; }
792
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400793 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500794 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400795 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500796
797#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500798 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
799#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400800 for(auto &entryPoint : entryPoints)
801 {
802 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
803 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500804#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500805
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400806 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400807 }
808
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500809 void finalize()
810 {
811 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
812
813 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
814 }
815
Ben Clayton713b8d32019-12-17 20:37:56 +0000816 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400817 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500818 ASSERT(func);
819 funcs[index] = func;
820 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400821
Nicolas Capens157ba262019-12-10 17:49:14 -0500822 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400823 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500824 ASSERT(funcs[index]);
825 return funcs[index];
826 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400827
Antonio Maiorano02a39532020-01-21 15:15:34 -0500828 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500829 {
Nicolas Capens4e75f452021-01-28 01:52:56 -0500830 // Check if we already have a suitable constant.
831 for(const auto &c : constantsPool)
832 {
833 void *ptr = c.data.get();
834 size_t space = c.space;
835
836 void *alignedPtr = std::align(alignment, size, ptr, space);
837
838 if(space < size)
839 {
840 continue;
841 }
842
843 if(memcmp(data, alignedPtr, size) == 0)
844 {
845 return alignedPtr;
846 }
847 }
848
Antonio Maiorano02a39532020-01-21 15:15:34 -0500849 // TODO(b/148086935): Replace with a buffer allocator.
850 size_t space = size + alignment;
851 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
852 void *ptr = buf.get();
853 void *alignedPtr = std::align(alignment, size, ptr, space);
854 ASSERT(alignedPtr);
855 memcpy(alignedPtr, data, size);
Nicolas Capens4e75f452021-01-28 01:52:56 -0500856 constantsPool.emplace_back(std::move(buf), space);
857
Antonio Maiorano02a39532020-01-21 15:15:34 -0500858 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500859 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400860
Nicolas Capens157ba262019-12-10 17:49:14 -0500861private:
Nicolas Capens4e75f452021-01-28 01:52:56 -0500862 struct Constant
863 {
864 Constant(std::unique_ptr<uint8_t[]> data, size_t space)
865 : data(std::move(data))
866 , space(space)
867 {}
868
869 std::unique_ptr<uint8_t[]> data;
870 size_t space;
871 };
872
Ben Clayton713b8d32019-12-17 20:37:56 +0000873 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500874 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
875 std::size_t position;
Nicolas Capens4e75f452021-01-28 01:52:56 -0500876 std::vector<Constant> constantsPool;
Nicolas Capens157ba262019-12-10 17:49:14 -0500877};
878
Antonio Maiorano62427e02020-02-13 09:18:05 -0500879#ifdef ENABLE_RR_PRINT
880void VPrintf(const std::vector<Value *> &vals)
881{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400882 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500883}
884#endif // ENABLE_RR_PRINT
885
Nicolas Capens157ba262019-12-10 17:49:14 -0500886Nucleus::Nucleus()
887{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400888 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500889
890 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
891 Ice::ClFlags::getParsedClFlags(Flags);
892
Ben Clayton713b8d32019-12-17 20:37:56 +0000893#if defined(__arm__)
894 Flags.setTargetArch(Ice::Target_ARM32);
895 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
896#elif defined(__mips__)
897 Flags.setTargetArch(Ice::Target_MIPS32);
898 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
899#else // x86
900 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
901 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
902#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500903 Flags.setOutFileType(Ice::FT_Elf);
904 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
Nicolas Capens157ba262019-12-10 17:49:14 -0500905 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
906 Flags.setDisableHybridAssembly(true);
907
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500908 // Emit functions into separate sections in the ELF so we can find them by name
909 Flags.setFunctionSections(true);
910
Nicolas Capens157ba262019-12-10 17:49:14 -0500911 static llvm::raw_os_ostream cout(std::cout);
912 static llvm::raw_os_ostream cerr(std::cerr);
913
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500914 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500915 {
916 // Decorate text asm with liveness info
917 Flags.setDecorateAsm(true);
918 }
919
Ben Clayton713b8d32019-12-17 20:37:56 +0000920 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500921 {
922 std::error_code errorCode;
923 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
924 ::elfFile = new Ice::ELFFileStreamer(*out);
925 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
926 }
927 else
928 {
929 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
930 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
931 ::routine = elfMemory;
932 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400933
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400934#if !__has_feature(memory_sanitizer)
935 // thread_local variables in shared libraries are initialized at load-time,
936 // but this is not observed by MemorySanitizer if the loader itself was not
Nicolas Capensaf907702021-05-14 11:10:49 -0400937 // instrumented, leading to false-positive uninitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400938 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400939#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500940 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500941}
942
943Nucleus::~Nucleus()
944{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400945 delete Variable::unmaterializedVariables;
946 Variable::unmaterializedVariables = nullptr;
947
Nicolas Capens157ba262019-12-10 17:49:14 -0500948 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500949 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500950
951 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500952 ::allocator = nullptr;
953
Nicolas Capens157ba262019-12-10 17:49:14 -0500954 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500955 ::function = nullptr;
956
Nicolas Capens157ba262019-12-10 17:49:14 -0500957 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500958 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500959
960 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500961 ::elfFile = nullptr;
962
Nicolas Capens157ba262019-12-10 17:49:14 -0500963 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500964 ::out = nullptr;
965
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400966 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500967 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400968 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500969
970 ::codegenMutex.unlock();
971}
972
973void Nucleus::setDefaultConfig(const Config &cfg)
974{
975 std::unique_lock<std::mutex> lock(::defaultConfigLock);
976 ::defaultConfig() = cfg;
977}
978
979void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
980{
981 std::unique_lock<std::mutex> lock(::defaultConfigLock);
982 auto &config = ::defaultConfig();
983 config = cfgEdit.apply(config);
984}
985
986Config Nucleus::getDefaultConfig()
987{
988 std::unique_lock<std::mutex> lock(::defaultConfigLock);
989 return ::defaultConfig();
990}
991
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500992// This function lowers and produces executable binary code in memory for the input functions,
993// and returns a Routine with the entry points to these functions.
994template<size_t Count>
Sean Risser705231f2021-08-19 18:17:24 -0400995static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit *cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500996{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500997 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
998 // and GlobalContext::emitItems.
999
Nicolas Capens81bc9d92019-12-16 15:05:57 -05001000 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -05001001 {
1002 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001003 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -05001004 }
1005
1006 ::context->emitFileHeader();
1007
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001008 // Translate
1009
1010 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -05001011 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001012 Ice::Cfg *currFunc = functions[i];
1013
1014 // Install function allocator in TLS for Cfg-specific container allocators
1015 Ice::CfgLocalAllocatorScope allocScope(currFunc);
1016
1017 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
1018
Nicolas Capens54313fb2021-02-19 14:26:27 -05001019 if(::optimizerCallback)
1020 {
1021 Nucleus::OptimizerReport report;
1022 rr::optimize(currFunc, &report);
1023 ::optimizerCallback(&report);
1024 ::optimizerCallback = nullptr;
1025 }
1026 else
1027 {
1028 rr::optimize(currFunc);
1029 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001030
1031 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001032 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001033
1034 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001035 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001036
1037 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1038
1039 if(subzeroEmitTextAsm)
1040 {
1041 currFunc->emit();
1042 }
1043
1044 currFunc->emitIAS();
Nicolas Capensff010f92021-02-01 12:22:53 -05001045
1046 if(currFunc->hasError())
1047 {
1048 return nullptr;
1049 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001050 }
1051
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001052 // Emit items
1053
1054 ::context->lowerGlobals("");
1055
Nicolas Capens157ba262019-12-10 17:49:14 -05001056 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001057
1058 for(size_t i = 0; i < Count; ++i)
1059 {
1060 Ice::Cfg *currFunc = functions[i];
1061
1062 // Accumulate globals from functions to emit into the "last" section at the end
1063 auto globals = currFunc->getGlobalInits();
1064 if(globals && !globals->empty())
1065 {
1066 ::context->getGlobals()->merge(globals.get());
1067 }
1068
1069 auto assembler = currFunc->releaseAssembler();
1070 assembler->alignFunction();
1071 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1072 }
1073
Nicolas Capens157ba262019-12-10 17:49:14 -05001074 ::context->lowerGlobals("last");
1075 ::context->lowerConstants();
1076 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001077
Nicolas Capens157ba262019-12-10 17:49:14 -05001078 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001079 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001080 objectWriter->writeNonUserSections();
1081
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001082 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001083 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1084 ASSERT(entryPoints.size() == Count);
1085 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001086 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001087 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001088 }
1089
1090 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001091
1092 Routine *handoffRoutine = ::routine;
1093 ::routine = nullptr;
1094
1095 return std::shared_ptr<Routine>(handoffRoutine);
1096}
1097
Sean Risser705231f2021-08-19 18:17:24 -04001098std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit *cfgEdit /* = nullptr */)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001099{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001100 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001101 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1102}
1103
Nicolas Capens157ba262019-12-10 17:49:14 -05001104Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1105{
1106 Ice::Type type = T(t);
1107 int typeSize = Ice::typeWidthInBytes(type);
1108 int totalSize = typeSize * (arraySize ? arraySize : 1);
1109
1110 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1111 auto address = ::function->makeVariable(T(getPointerType(t)));
Nicolas Capens0cfc0432021-02-05 15:18:42 -05001112 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize); // SRoA depends on the alignment to match the type size.
Nicolas Capens157ba262019-12-10 17:49:14 -05001113 ::function->getEntryNode()->getInsts().push_front(alloca);
1114
1115 return V(address);
1116}
1117
1118BasicBlock *Nucleus::createBasicBlock()
1119{
1120 return B(::function->makeNode());
1121}
1122
1123BasicBlock *Nucleus::getInsertBlock()
1124{
1125 return B(::basicBlock);
1126}
1127
1128void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1129{
Nicolas Capens7c296ec2021-02-18 14:10:26 -05001130 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001131
1132 ::basicBlock = basicBlock;
1133}
1134
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001135void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001136{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001137 ASSERT(::function == nullptr);
1138 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001139 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001140 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001141 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001142
1143 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1144
1145 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1146 // becomes invalid if another one is created; for example, when creating await and destroy functions
1147 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1148 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001149 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1150
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001151 ::entryBlock = ::function->getEntryNode();
1152 ::basicBlock = ::function->makeNode();
1153 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001154}
1155
1156Value *Nucleus::getArgument(unsigned int index)
1157{
1158 return V(::function->getArgs()[index]);
1159}
1160
1161void Nucleus::createRetVoid()
1162{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001163 RR_DEBUG_INFO_UPDATE_LOC();
1164
Nicolas Capens157ba262019-12-10 17:49:14 -05001165 // Code generated after this point is unreachable, so any variables
1166 // being read can safely return an undefined value. We have to avoid
1167 // materializing variables after the terminator ret instruction.
1168 Variable::killUnmaterialized();
1169
1170 Ice::InstRet *ret = Ice::InstRet::create(::function);
1171 ::basicBlock->appendInst(ret);
1172}
1173
1174void Nucleus::createRet(Value *v)
1175{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001176 RR_DEBUG_INFO_UPDATE_LOC();
1177
Nicolas Capens157ba262019-12-10 17:49:14 -05001178 // Code generated after this point is unreachable, so any variables
1179 // being read can safely return an undefined value. We have to avoid
1180 // materializing variables after the terminator ret instruction.
1181 Variable::killUnmaterialized();
1182
1183 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1184 ::basicBlock->appendInst(ret);
1185}
1186
1187void Nucleus::createBr(BasicBlock *dest)
1188{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001189 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001190 Variable::materializeAll();
1191
1192 auto br = Ice::InstBr::create(::function, dest);
1193 ::basicBlock->appendInst(br);
1194}
1195
1196void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1197{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001198 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001199 Variable::materializeAll();
1200
1201 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1202 ::basicBlock->appendInst(br);
1203}
1204
1205static bool isCommutative(Ice::InstArithmetic::OpKind op)
1206{
1207 switch(op)
1208 {
Nicolas Capens112faf42019-12-13 17:32:26 -05001209 case Ice::InstArithmetic::Add:
1210 case Ice::InstArithmetic::Fadd:
1211 case Ice::InstArithmetic::Mul:
1212 case Ice::InstArithmetic::Fmul:
1213 case Ice::InstArithmetic::And:
1214 case Ice::InstArithmetic::Or:
1215 case Ice::InstArithmetic::Xor:
1216 return true;
1217 default:
1218 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001219 }
1220}
1221
1222static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1223{
1224 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1225
1226 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1227
1228 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1229 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1230 ::basicBlock->appendInst(arithmetic);
1231
1232 return V(result);
1233}
1234
1235Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1236{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001237 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001238 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1239}
1240
1241Value *Nucleus::createSub(Value *lhs, Value *rhs)
1242{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001243 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001244 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1245}
1246
1247Value *Nucleus::createMul(Value *lhs, Value *rhs)
1248{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001249 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001250 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1251}
1252
1253Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1254{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001255 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001256 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1257}
1258
1259Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1260{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001261 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001262 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1263}
1264
1265Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1266{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001267 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001268 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1269}
1270
1271Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1272{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001273 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001274 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1275}
1276
1277Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1278{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001279 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001280 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1281}
1282
1283Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001285 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001286 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1287}
1288
1289Value *Nucleus::createURem(Value *lhs, Value *rhs)
1290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001292 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1293}
1294
1295Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1296{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001297 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001298 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1299}
1300
1301Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1302{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001303 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001304 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1305 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001306 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001307 return nullptr;
1308}
1309
1310RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1311{
1312 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001313}
1314
1315Value *Nucleus::createShl(Value *lhs, Value *rhs)
1316{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001317 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001318 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1319}
1320
1321Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1322{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001323 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001324 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1325}
1326
1327Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1328{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001329 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001330 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1331}
1332
1333Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1334{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001335 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001336 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1337}
1338
1339Value *Nucleus::createOr(Value *lhs, Value *rhs)
1340{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001341 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001342 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1343}
1344
1345Value *Nucleus::createXor(Value *lhs, Value *rhs)
1346{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001347 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001348 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1349}
1350
1351Value *Nucleus::createNeg(Value *v)
1352{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001353 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001354 return createSub(createNullValue(T(v->getType())), v);
1355}
1356
1357Value *Nucleus::createFNeg(Value *v)
1358{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001359 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001360 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1361 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001362
1363 return createFSub(negativeZero, v);
1364}
1365
1366Value *Nucleus::createNot(Value *v)
1367{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001368 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001369 if(Ice::isScalarIntegerType(v->getType()))
1370 {
1371 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1372 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001373 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001374 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001375 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001376 return createXor(v, createConstantVector(c, T(v->getType())));
1377 }
1378}
1379
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001380static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1381{
1382#if defined(__i386__) || defined(__x86_64__)
1383 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1384 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1385 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1386#else
1387 if(atomic)
1388 {
1389 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1390 }
1391 if(memoryOrder != std::memory_order_relaxed)
1392 {
1393 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1394 }
1395#endif
1396
1397 // Vulkan doesn't allow sequential memory order
1398 ASSERT(memoryOrder != std::memory_order_seq_cst);
1399}
1400
Nicolas Capens157ba262019-12-10 17:49:14 -05001401Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1402{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001403 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001404 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001405
1406 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001407 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001408
Ben Clayton713b8d32019-12-17 20:37:56 +00001409 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001410 {
1411 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001412 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001413 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001414 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001415 auto pointer = RValue<Pointer<Byte>>(ptr);
1416 Int x = *Pointer<Int>(pointer);
1417
1418 Int4 vector;
1419 vector = Insert(vector, x, 0);
1420
Antonio Maiorano02a39532020-01-21 15:15:34 -05001421 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001422 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1423 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001424 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001425 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001426 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001427 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001428 auto pointer = RValue<Pointer<Byte>>(ptr);
1429 Int x = *Pointer<Int>(pointer);
1430 Int y = *Pointer<Int>(pointer + 4);
1431
1432 Int4 vector;
1433 vector = Insert(vector, x, 0);
1434 vector = Insert(vector, y, 1);
1435
Antonio Maiorano02a39532020-01-21 15:15:34 -05001436 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001437 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1438 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001439 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001440 else
1441 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001442 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001443 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001444 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001445 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Antonio Maiorano02a39532020-01-21 15:15:34 -05001446 result = ::function->makeVariable(T(type));
Nicolas Capens33a77f72021-02-08 15:04:38 -05001447 auto load = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001448 load->addArg(ptr);
1449 load->addArg(::context->getConstantInt32(typeSize(type)));
1450 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001451 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001452 }
1453 else
1454 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001455 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001456 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001457
Antonio Maiorano02a39532020-01-21 15:15:34 -05001458 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001459 return V(result);
1460}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001461
Nicolas Capens157ba262019-12-10 17:49:14 -05001462Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1463{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001464 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001465 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001466
Ben Clayton713b8d32019-12-17 20:37:56 +00001467#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001468 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001469 if(align != 0)
1470 {
1471 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1472 call->addArg(ptr);
1473 call->addArg(::context->getConstantInt64(typeSize(type)));
1474 ::basicBlock->appendInst(call);
1475 }
1476#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001477
Nicolas Capens157ba262019-12-10 17:49:14 -05001478 int valueType = (int)reinterpret_cast<intptr_t>(type);
1479
Ben Clayton713b8d32019-12-17 20:37:56 +00001480 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001481 {
1482 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001483 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001484 if(typeSize(type) == 4)
1485 {
1486 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1487 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1488 ::basicBlock->appendInst(bitcast);
1489
1490 RValue<Int4> v(V(vector));
1491
1492 auto pointer = RValue<Pointer<Byte>>(ptr);
1493 Int x = Extract(v, 0);
1494 *Pointer<Int>(pointer) = x;
1495 }
1496 else if(typeSize(type) == 8)
1497 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001498 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001499 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1500 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1501 ::basicBlock->appendInst(bitcast);
1502
1503 RValue<Int4> v(V(vector));
1504
1505 auto pointer = RValue<Pointer<Byte>>(ptr);
1506 Int x = Extract(v, 0);
1507 *Pointer<Int>(pointer) = x;
1508 Int y = Extract(v, 1);
1509 *Pointer<Int>(pointer + 4) = y;
1510 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001511 else
1512 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001513 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001514 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001515 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001516 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001517 auto store = Ice::InstIntrinsic::create(::function, 3, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001518 store->addArg(value);
1519 store->addArg(ptr);
1520 store->addArg(::context->getConstantInt32(typeSize(type)));
1521 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001522 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001523 }
1524 else
1525 {
1526 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001527
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001528 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001529 ::basicBlock->appendInst(store);
1530 }
1531
1532 return value;
1533}
1534
1535Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1536{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001537 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001538 ASSERT(index->getType() == Ice::IceType_i32);
1539
1540 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1541 {
1542 int32_t offset = constant->getValue() * (int)typeSize(type);
1543
1544 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001545 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001546 return ptr;
1547 }
1548
Nicolas Capens157ba262019-12-10 17:49:14 -05001549 return createAdd(ptr, createConstantInt(offset));
1550 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001551
Nicolas Capens157ba262019-12-10 17:49:14 -05001552 if(!Ice::isByteSizedType(T(type)))
1553 {
1554 index = createMul(index, createConstantInt((int)typeSize(type)));
1555 }
1556
Ben Clayton713b8d32019-12-17 20:37:56 +00001557 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001558 {
1559 if(unsignedIndex)
1560 {
1561 index = createZExt(index, T(Ice::IceType_i64));
1562 }
1563 else
1564 {
1565 index = createSExt(index, T(Ice::IceType_i64));
1566 }
1567 }
1568
1569 return createAdd(ptr, index);
1570}
1571
Antonio Maiorano370cba52019-12-31 11:36:07 -05001572static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1573{
1574 Ice::Variable *result = ::function->makeVariable(value->getType());
1575
1576 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001577 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001578 auto op = ::context->getConstantInt32(rmwOp);
1579 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1580 inst->addArg(op);
1581 inst->addArg(ptr);
1582 inst->addArg(value);
1583 inst->addArg(order);
1584 ::basicBlock->appendInst(inst);
1585
1586 return V(result);
1587}
1588
Nicolas Capens157ba262019-12-10 17:49:14 -05001589Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1590{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001591 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001592 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001593}
1594
1595Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1596{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001597 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001598 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001599}
1600
1601Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1602{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001603 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001604 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001605}
1606
1607Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1608{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001609 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001610 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001611}
1612
1613Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1614{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001615 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001616 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001617}
1618
1619Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1620{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001621 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001622 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001623}
1624
1625Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1626{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001627 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001628 Ice::Variable *result = ::function->makeVariable(value->getType());
1629
1630 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001631 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001632 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1633 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1634 inst->addArg(ptr);
1635 inst->addArg(compare);
1636 inst->addArg(value);
1637 inst->addArg(orderEq);
1638 inst->addArg(orderNeq);
1639 ::basicBlock->appendInst(inst);
1640
1641 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001642}
1643
1644static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1645{
1646 if(v->getType() == T(destType))
1647 {
1648 return v;
1649 }
1650
1651 Ice::Variable *result = ::function->makeVariable(T(destType));
1652 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1653 ::basicBlock->appendInst(cast);
1654
1655 return V(result);
1656}
1657
1658Value *Nucleus::createTrunc(Value *v, Type *destType)
1659{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001660 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001661 return createCast(Ice::InstCast::Trunc, v, destType);
1662}
1663
1664Value *Nucleus::createZExt(Value *v, Type *destType)
1665{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001666 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001667 return createCast(Ice::InstCast::Zext, v, destType);
1668}
1669
1670Value *Nucleus::createSExt(Value *v, Type *destType)
1671{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001672 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001673 return createCast(Ice::InstCast::Sext, v, destType);
1674}
1675
1676Value *Nucleus::createFPToUI(Value *v, Type *destType)
1677{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001678 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001679 return createCast(Ice::InstCast::Fptoui, v, destType);
1680}
1681
1682Value *Nucleus::createFPToSI(Value *v, Type *destType)
1683{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001684 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001685 return createCast(Ice::InstCast::Fptosi, v, destType);
1686}
1687
1688Value *Nucleus::createSIToFP(Value *v, Type *destType)
1689{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001690 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001691 return createCast(Ice::InstCast::Sitofp, v, destType);
1692}
1693
1694Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1695{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001696 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001697 return createCast(Ice::InstCast::Fptrunc, v, destType);
1698}
1699
1700Value *Nucleus::createFPExt(Value *v, Type *destType)
1701{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001702 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001703 return createCast(Ice::InstCast::Fpext, v, destType);
1704}
1705
1706Value *Nucleus::createBitCast(Value *v, Type *destType)
1707{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001708 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001709 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1710 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1711 // emulate them by writing to the stack and reading back as the destination type.
1712 if(emulateMismatchedBitCast)
1713 {
1714 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1715 {
1716 Value *address = allocateStackVariable(destType);
1717 createStore(v, address, T(v->getType()));
1718 return createLoad(address, destType);
1719 }
1720 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1721 {
1722 Value *address = allocateStackVariable(T(v->getType()));
1723 createStore(v, address, T(v->getType()));
1724 return createLoad(address, destType);
1725 }
1726 }
1727
1728 return createCast(Ice::InstCast::Bitcast, v, destType);
1729}
1730
1731static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1732{
1733 ASSERT(lhs->getType() == rhs->getType());
1734
1735 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1736 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1737 ::basicBlock->appendInst(cmp);
1738
1739 return V(result);
1740}
1741
Nicolas Capens157ba262019-12-10 17:49:14 -05001742Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1743{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001744 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001745 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1746}
1747
1748Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1749{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001750 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001751 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1752}
1753
1754Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1755{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001756 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001757 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1758}
1759
1760Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1761{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001762 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001763 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1764}
1765
1766Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1767{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001768 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001769 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1770}
1771
1772Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1773{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001774 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001775 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1776}
1777
1778Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1779{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001780 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001781 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1782}
1783
1784Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1785{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001786 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001787 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1788}
1789
1790Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1791{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001792 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001793 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1794}
1795
1796Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1797{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001798 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001799 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1800}
1801
1802static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1803{
1804 ASSERT(lhs->getType() == rhs->getType());
1805 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1806
1807 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1808 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1809 ::basicBlock->appendInst(cmp);
1810
1811 return V(result);
1812}
1813
1814Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1815{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001816 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001817 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1818}
1819
1820Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1821{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001822 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001823 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1824}
1825
1826Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1827{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001828 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001829 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1830}
1831
1832Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1833{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001834 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001835 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1836}
1837
1838Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1839{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001840 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001841 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1842}
1843
1844Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1845{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001846 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001847 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1848}
1849
1850Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1851{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001852 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001853 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1854}
1855
1856Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1857{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001858 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001859 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1860}
1861
1862Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1863{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001864 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001865 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1866}
1867
1868Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1869{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001870 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001871 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1872}
1873
1874Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1875{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001876 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001877 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1878}
1879
1880Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1881{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001882 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001883 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1884}
1885
1886Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1887{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001888 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001889 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1890}
1891
1892Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1893{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001894 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001895 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1896}
1897
1898Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1899{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001900 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001901 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001902 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001903 ::basicBlock->appendInst(extract);
1904
1905 return V(result);
1906}
1907
1908Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1909{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001910 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001911 auto result = ::function->makeVariable(vector->getType());
1912 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1913 ::basicBlock->appendInst(insert);
1914
1915 return V(result);
1916}
1917
1918Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1919{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001920 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001921 ASSERT(V1->getType() == V2->getType());
1922
1923 int size = Ice::typeNumElements(V1->getType());
1924 auto result = ::function->makeVariable(V1->getType());
1925 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1926
1927 for(int i = 0; i < size; i++)
1928 {
1929 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1930 }
1931
1932 ::basicBlock->appendInst(shuffle);
1933
1934 return V(result);
1935}
1936
1937Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1938{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001939 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001940 ASSERT(ifTrue->getType() == ifFalse->getType());
1941
1942 auto result = ::function->makeVariable(ifTrue->getType());
1943 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1944 ::basicBlock->appendInst(select);
1945
1946 return V(result);
1947}
1948
1949SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1950{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001951 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001952 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1953 ::basicBlock->appendInst(switchInst);
1954
Ben Clayton713b8d32019-12-17 20:37:56 +00001955 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001956}
1957
1958void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1959{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001960 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001961 switchCases->addBranch(label, label, branch);
1962}
1963
1964void Nucleus::createUnreachable()
1965{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001966 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001967 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1968 ::basicBlock->appendInst(unreachable);
1969}
1970
Antonio Maiorano62427e02020-02-13 09:18:05 -05001971Type *Nucleus::getType(Value *value)
1972{
1973 return T(V(value)->getType());
1974}
1975
1976Type *Nucleus::getContainedType(Type *vectorType)
1977{
1978 Ice::Type vecTy = T(vectorType);
1979 switch(vecTy)
1980 {
Nicolas Capens112faf42019-12-13 17:32:26 -05001981 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1982 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1983 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1984 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1985 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1986 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1987 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1988 default:
1989 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1990 return {};
Antonio Maiorano62427e02020-02-13 09:18:05 -05001991 }
1992}
1993
Nicolas Capens157ba262019-12-10 17:49:14 -05001994Type *Nucleus::getPointerType(Type *ElementType)
1995{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001996 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001997}
1998
Antonio Maiorano62427e02020-02-13 09:18:05 -05001999static constexpr Ice::Type getNaturalIntType()
2000{
2001 constexpr size_t intSize = sizeof(int);
2002 static_assert(intSize == 4 || intSize == 8, "");
2003 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
2004}
2005
2006Type *Nucleus::getPrintfStorageType(Type *valueType)
2007{
2008 Ice::Type valueTy = T(valueType);
2009 switch(valueTy)
2010 {
Nicolas Capens112faf42019-12-13 17:32:26 -05002011 case Ice::IceType_i32:
2012 return T(getNaturalIntType());
Antonio Maiorano62427e02020-02-13 09:18:05 -05002013
Nicolas Capens112faf42019-12-13 17:32:26 -05002014 case Ice::IceType_f32:
2015 return T(Ice::IceType_f64);
Antonio Maiorano62427e02020-02-13 09:18:05 -05002016
Nicolas Capens112faf42019-12-13 17:32:26 -05002017 default:
2018 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
2019 return {};
Antonio Maiorano62427e02020-02-13 09:18:05 -05002020 }
2021}
2022
Nicolas Capens157ba262019-12-10 17:49:14 -05002023Value *Nucleus::createNullValue(Type *Ty)
2024{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002025 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002026 if(Ice::isVectorType(T(Ty)))
2027 {
2028 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002029 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002030 return createConstantVector(c, Ty);
2031 }
2032 else
2033 {
2034 return V(::context->getConstantZero(T(Ty)));
2035 }
2036}
2037
2038Value *Nucleus::createConstantLong(int64_t i)
2039{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002040 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002041 return V(::context->getConstantInt64(i));
2042}
2043
2044Value *Nucleus::createConstantInt(int i)
2045{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002046 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002047 return V(::context->getConstantInt32(i));
2048}
2049
2050Value *Nucleus::createConstantInt(unsigned int i)
2051{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002052 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002053 return V(::context->getConstantInt32(i));
2054}
2055
2056Value *Nucleus::createConstantBool(bool b)
2057{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002058 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002059 return V(::context->getConstantInt1(b));
2060}
2061
2062Value *Nucleus::createConstantByte(signed char i)
2063{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002064 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002065 return V(::context->getConstantInt8(i));
2066}
2067
2068Value *Nucleus::createConstantByte(unsigned char i)
2069{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002070 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002071 return V(::context->getConstantInt8(i));
2072}
2073
2074Value *Nucleus::createConstantShort(short i)
2075{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002076 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002077 return V(::context->getConstantInt16(i));
2078}
2079
2080Value *Nucleus::createConstantShort(unsigned short i)
2081{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002082 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002083 return V(::context->getConstantInt16(i));
2084}
2085
2086Value *Nucleus::createConstantFloat(float x)
2087{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002088 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002089 return V(::context->getConstantFloat(x));
2090}
2091
2092Value *Nucleus::createNullPointer(Type *Ty)
2093{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002094 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002095 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002096}
2097
Antonio Maiorano02a39532020-01-21 15:15:34 -05002098static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2099{
2100 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2101}
2102
Nicolas Capens157ba262019-12-10 17:49:14 -05002103Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2104{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002105 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002106 const int vectorSize = 16;
2107 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2108 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002109
2110 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002111 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002112
Antonio Maioranoa0957112020-03-04 15:06:19 -05002113 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002114 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002115
2116 switch((int)reinterpret_cast<intptr_t>(type))
2117 {
Nicolas Capens112faf42019-12-13 17:32:26 -05002118 case Ice::IceType_v4i32:
2119 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002120 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002121 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002122 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002123 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002124 }
2125 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002126 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002127 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002128 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002129 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002130 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002131 }
2132 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002133 case Ice::IceType_v8i16:
2134 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002135 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002136 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002137 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002138 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002139 }
2140 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002141 case Ice::IceType_v16i8:
2142 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002143 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002144 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002145 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002146 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002147 }
2148 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002149 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002150 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002151 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002152 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002153 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002154 }
2155 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002156 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002157 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002158 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002159 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002160 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002161 }
2162 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002163 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002164 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002165 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002166 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002167 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002168 }
2169 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002170 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002171 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002172 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002173 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002174 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002175 }
2176 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002177 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002178 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002179 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002180 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002181 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002182 }
2183 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002184 default:
2185 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002186 }
2187
Antonio Maiorano02a39532020-01-21 15:15:34 -05002188 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002189
Antonio Maiorano02a39532020-01-21 15:15:34 -05002190 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002191 return V(result);
2192}
2193
2194Value *Nucleus::createConstantVector(const double *constants, Type *type)
2195{
Ben Clayton713b8d32019-12-17 20:37:56 +00002196 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002197}
2198
Antonio Maiorano62427e02020-02-13 09:18:05 -05002199Value *Nucleus::createConstantString(const char *v)
2200{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002201 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002202 return V(IceConstantData(v, strlen(v) + 1));
2203}
2204
Nicolas Capens54313fb2021-02-19 14:26:27 -05002205void Nucleus::setOptimizerCallback(OptimizerCallback *callback)
2206{
2207 ::optimizerCallback = callback;
2208}
2209
Nicolas Capens519cf222020-05-08 15:27:19 -04002210Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002211{
2212 return T(Ice::IceType_void);
2213}
2214
Nicolas Capens519cf222020-05-08 15:27:19 -04002215Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002216{
2217 return T(Ice::IceType_i1);
2218}
2219
Nicolas Capens519cf222020-05-08 15:27:19 -04002220Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002221{
2222 return T(Ice::IceType_i8);
2223}
2224
Nicolas Capens519cf222020-05-08 15:27:19 -04002225Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002226{
2227 return T(Ice::IceType_i8);
2228}
2229
Nicolas Capens519cf222020-05-08 15:27:19 -04002230Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002231{
2232 return T(Ice::IceType_i16);
2233}
2234
Nicolas Capens519cf222020-05-08 15:27:19 -04002235Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002236{
2237 return T(Ice::IceType_i16);
2238}
2239
Nicolas Capens519cf222020-05-08 15:27:19 -04002240Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002241{
2242 return T(Type_v4i8);
2243}
2244
Nicolas Capens519cf222020-05-08 15:27:19 -04002245Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002246{
2247 return T(Type_v4i8);
2248}
2249
Ben Clayton713b8d32019-12-17 20:37:56 +00002250namespace {
2251RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002252{
Ben Clayton713b8d32019-12-17 20:37:56 +00002253 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002254}
2255
Ben Clayton713b8d32019-12-17 20:37:56 +00002256RValue<Byte> Extract(RValue<Byte8> val, int i)
2257{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002258 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002259}
2260
2261RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2262{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002263 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002264}
2265} // namespace
2266
Nicolas Capens157ba262019-12-10 17:49:14 -05002267RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2268{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002269 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002270 if(emulateIntrinsics)
2271 {
2272 Byte8 result;
2273 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2274 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2275 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2276 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2277 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2278 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2279 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2280 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2281
2282 return result;
2283 }
2284 else
2285 {
2286 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002287 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002288 auto paddusb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002289 paddusb->addArg(x.value());
2290 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002291 ::basicBlock->appendInst(paddusb);
2292
2293 return RValue<Byte8>(V(result));
2294 }
2295}
2296
2297RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2298{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002299 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002300 if(emulateIntrinsics)
2301 {
2302 Byte8 result;
2303 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2304 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2305 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2306 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2307 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2308 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2309 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2310 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2311
2312 return result;
2313 }
2314 else
2315 {
2316 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002317 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002318 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002319 psubusw->addArg(x.value());
2320 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002321 ::basicBlock->appendInst(psubusw);
2322
2323 return RValue<Byte8>(V(result));
2324 }
2325}
2326
2327RValue<SByte> Extract(RValue<SByte8> val, int i)
2328{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002329 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002330 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002331}
2332
2333RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2334{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002335 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002336 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002337}
2338
2339RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2340{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002341 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002342 if(emulateIntrinsics)
2343 {
2344 SByte8 result;
2345 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2346 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2347 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2348 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2349 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2350 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2351 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2352 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2353
2354 return result;
2355 }
2356 else
2357 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002358#if defined(__i386__) || defined(__x86_64__)
2359 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2360 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2361 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002362
Ben Clayton713b8d32019-12-17 20:37:56 +00002363 return As<SByte8>(hi | lo);
2364#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002365 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002366#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002367 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002368}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002369
Nicolas Capens157ba262019-12-10 17:49:14 -05002370RValue<Int> SignMask(RValue<Byte8> x)
2371{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002372 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002373 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002374 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002375 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2376 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002377 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002378 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002379 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002380 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002381 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002382 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002383 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002384 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002385
Nicolas Capens157ba262019-12-10 17:49:14 -05002386 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002387 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002388}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002389
2390// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2391// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002392// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002393// }
2394
Nicolas Capens157ba262019-12-10 17:49:14 -05002395RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2396{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002397 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002398 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002399}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002400
Nicolas Capens519cf222020-05-08 15:27:19 -04002401Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002402{
2403 return T(Type_v8i8);
2404}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002405
Nicolas Capens598f8d82016-09-26 15:09:10 -04002406// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2407// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002408// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002409// }
2410
2411// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2412// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002413// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002414// }
2415
Nicolas Capens157ba262019-12-10 17:49:14 -05002416RValue<SByte> SaturateSigned(RValue<Short> x)
2417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002418 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002419 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2420}
2421
2422RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2423{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002424 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002425 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002426 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002427 SByte8 result;
2428 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2429 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2430 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2431 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2432 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2433 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2434 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2435 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002436
Nicolas Capens157ba262019-12-10 17:49:14 -05002437 return result;
2438 }
2439 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002440 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002441 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002442 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002443 auto paddsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002444 paddsb->addArg(x.value());
2445 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002446 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002447
Nicolas Capens157ba262019-12-10 17:49:14 -05002448 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002449 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002450}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002451
Nicolas Capens157ba262019-12-10 17:49:14 -05002452RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2453{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002454 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002455 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002456 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002457 SByte8 result;
2458 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2459 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2460 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2461 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2462 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2463 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2464 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2465 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002466
Nicolas Capens157ba262019-12-10 17:49:14 -05002467 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002468 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002469 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002470 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002471 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002472 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002473 auto psubsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002474 psubsb->addArg(x.value());
2475 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002476 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002477
Nicolas Capens157ba262019-12-10 17:49:14 -05002478 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002479 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002480}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002481
Nicolas Capens157ba262019-12-10 17:49:14 -05002482RValue<Int> SignMask(RValue<SByte8> x)
2483{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002484 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002485 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002486 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002487 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2488 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002489 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002490 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002491 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002492 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002493 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002494 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002495 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002496 ::basicBlock->appendInst(movmsk);
2497
2498 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002499 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002500}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002501
Nicolas Capens157ba262019-12-10 17:49:14 -05002502RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2503{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002504 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002505 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002506}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002507
Nicolas Capens157ba262019-12-10 17:49:14 -05002508RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2509{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002510 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002511 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002512}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002513
Nicolas Capens519cf222020-05-08 15:27:19 -04002514Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002515{
2516 return T(Type_v8i8);
2517}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002518
Nicolas Capens519cf222020-05-08 15:27:19 -04002519Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002520{
2521 return T(Ice::IceType_v16i8);
2522}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002523
Nicolas Capens519cf222020-05-08 15:27:19 -04002524Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002525{
2526 return T(Ice::IceType_v16i8);
2527}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002528
Nicolas Capens519cf222020-05-08 15:27:19 -04002529Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002530{
2531 return T(Type_v2i16);
2532}
Nicolas Capensd4227962016-11-09 14:24:25 -05002533
Nicolas Capens519cf222020-05-08 15:27:19 -04002534Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002535{
2536 return T(Type_v2i16);
2537}
Nicolas Capensd4227962016-11-09 14:24:25 -05002538
Nicolas Capens157ba262019-12-10 17:49:14 -05002539Short4::Short4(RValue<Int4> cast)
2540{
Ben Clayton713b8d32019-12-17 20:37:56 +00002541 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002542 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002543 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2544
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002545 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002546 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002547
2548 storeValue(short4);
2549}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002550
2551// Short4::Short4(RValue<Float> cast)
2552// {
2553// }
2554
Nicolas Capens157ba262019-12-10 17:49:14 -05002555Short4::Short4(RValue<Float4> cast)
2556{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002557 // TODO(b/150791192): Generalize and optimize
2558 auto smin = std::numeric_limits<short>::min();
2559 auto smax = std::numeric_limits<short>::max();
2560 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002561}
2562
2563RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2564{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002565 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002566 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002567 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002568 Short4 result;
2569 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2570 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2571 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2572 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002573
2574 return result;
2575 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002576 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002577 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002578 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002579 }
2580}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002581
Nicolas Capens157ba262019-12-10 17:49:14 -05002582RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2583{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002584 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002585 if(emulateIntrinsics)
2586 {
2587 Short4 result;
2588 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2589 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2590 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2591 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2592
2593 return result;
2594 }
2595 else
2596 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002597 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002598 }
2599}
2600
2601RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2602{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002603 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002604 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002605 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002606 ::basicBlock->appendInst(cmp);
2607
2608 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002609 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002610 ::basicBlock->appendInst(select);
2611
2612 return RValue<Short4>(V(result));
2613}
2614
2615RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2616{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002617 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002618 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002619 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002620 ::basicBlock->appendInst(cmp);
2621
2622 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002623 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002624 ::basicBlock->appendInst(select);
2625
2626 return RValue<Short4>(V(result));
2627}
2628
2629RValue<Short> SaturateSigned(RValue<Int> x)
2630{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002631 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002632 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2633}
2634
2635RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2636{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002637 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002638 if(emulateIntrinsics)
2639 {
2640 Short4 result;
2641 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2642 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2643 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2644 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2645
2646 return result;
2647 }
2648 else
2649 {
2650 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002651 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002652 auto paddsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002653 paddsw->addArg(x.value());
2654 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002655 ::basicBlock->appendInst(paddsw);
2656
2657 return RValue<Short4>(V(result));
2658 }
2659}
2660
2661RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2662{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002663 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002664 if(emulateIntrinsics)
2665 {
2666 Short4 result;
2667 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2668 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2669 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2670 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2671
2672 return result;
2673 }
2674 else
2675 {
2676 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002677 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002678 auto psubsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002679 psubsw->addArg(x.value());
2680 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002681 ::basicBlock->appendInst(psubsw);
2682
2683 return RValue<Short4>(V(result));
2684 }
2685}
2686
2687RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2688{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002689 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002690 if(emulateIntrinsics)
2691 {
2692 Short4 result;
2693 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2694 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2695 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2696 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2697
2698 return result;
2699 }
2700 else
2701 {
2702 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002703 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002704 auto pmulhw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002705 pmulhw->addArg(x.value());
2706 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002707 ::basicBlock->appendInst(pmulhw);
2708
2709 return RValue<Short4>(V(result));
2710 }
2711}
2712
2713RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2714{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002715 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002716 if(emulateIntrinsics)
2717 {
2718 Int2 result;
2719 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2720 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2721
2722 return result;
2723 }
2724 else
2725 {
2726 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002727 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002728 auto pmaddwd = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002729 pmaddwd->addArg(x.value());
2730 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002731 ::basicBlock->appendInst(pmaddwd);
2732
2733 return As<Int2>(V(result));
2734 }
2735}
2736
2737RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2738{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002739 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002740 if(emulateIntrinsics)
2741 {
2742 SByte8 result;
2743 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2744 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2745 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2746 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2747 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2748 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2749 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2750 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2751
2752 return result;
2753 }
2754 else
2755 {
2756 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002757 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002758 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002759 pack->addArg(x.value());
2760 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002761 ::basicBlock->appendInst(pack);
2762
2763 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2764 }
2765}
2766
2767RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2768{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002769 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002770 if(emulateIntrinsics)
2771 {
2772 Byte8 result;
2773 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2774 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2775 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2776 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2777 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2778 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2779 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2780 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2781
2782 return result;
2783 }
2784 else
2785 {
2786 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002787 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002788 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002789 pack->addArg(x.value());
2790 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002791 ::basicBlock->appendInst(pack);
2792
2793 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2794 }
2795}
2796
2797RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2798{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002799 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002800 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002801}
2802
2803RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2804{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002805 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002806 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002807}
2808
Nicolas Capens519cf222020-05-08 15:27:19 -04002809Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002810{
2811 return T(Type_v4i16);
2812}
2813
2814UShort4::UShort4(RValue<Float4> cast, bool saturate)
2815{
2816 if(saturate)
2817 {
2818 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002819 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002820 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2821 // PackUnsigned takes care of 0x0000 saturation.
2822 Int4 int4(Min(cast, Float4(0xFFFF)));
2823 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002824 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002825 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002826 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002827 // ARM saturates the 32-bit integer result on overflow/undeflow.
2828 Int4 int4(cast);
2829 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002830 }
2831 else
2832 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002833 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002834 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002835 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002836 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002837 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002838 *this = Short4(Int4(cast));
2839 }
2840}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002841
Nicolas Capens157ba262019-12-10 17:49:14 -05002842RValue<UShort> Extract(RValue<UShort4> val, int i)
2843{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002844 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002845}
2846
2847RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2848{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002849 return RValue<UShort4>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002850}
2851
2852RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2853{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002854 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002855 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002856
Nicolas Capens157ba262019-12-10 17:49:14 -05002857 {
2858 UShort4 result;
2859 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2860 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2861 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2862 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2863
2864 return result;
2865 }
2866 else
2867 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002868 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002869 }
2870}
2871
2872RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2873{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002874 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002875 if(emulateIntrinsics)
2876 {
2877 UShort4 result;
2878 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2879 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2880 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2881 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2882
2883 return result;
2884 }
2885 else
2886 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002887 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002888 }
2889}
2890
2891RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2892{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002893 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002894 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002895 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002896 ::basicBlock->appendInst(cmp);
2897
2898 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002899 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002900 ::basicBlock->appendInst(select);
2901
2902 return RValue<UShort4>(V(result));
2903}
2904
2905RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2906{
2907 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002908 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002909 ::basicBlock->appendInst(cmp);
2910
2911 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002912 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002913 ::basicBlock->appendInst(select);
2914
2915 return RValue<UShort4>(V(result));
2916}
2917
2918RValue<UShort> SaturateUnsigned(RValue<Int> x)
2919{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002920 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002921 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2922}
2923
2924RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2925{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002926 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002927 if(emulateIntrinsics)
2928 {
2929 UShort4 result;
2930 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2931 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2932 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2933 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2934
2935 return result;
2936 }
2937 else
2938 {
2939 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002940 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002941 auto paddusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002942 paddusw->addArg(x.value());
2943 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002944 ::basicBlock->appendInst(paddusw);
2945
2946 return RValue<UShort4>(V(result));
2947 }
2948}
2949
2950RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2951{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002952 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002953 if(emulateIntrinsics)
2954 {
2955 UShort4 result;
2956 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2957 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2958 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2959 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2960
2961 return result;
2962 }
2963 else
2964 {
2965 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002966 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002967 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002968 psubusw->addArg(x.value());
2969 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002970 ::basicBlock->appendInst(psubusw);
2971
2972 return RValue<UShort4>(V(result));
2973 }
2974}
2975
2976RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2977{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002978 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002979 if(emulateIntrinsics)
2980 {
2981 UShort4 result;
2982 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2983 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2984 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2985 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2986
2987 return result;
2988 }
2989 else
2990 {
2991 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002992 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002993 auto pmulhuw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002994 pmulhuw->addArg(x.value());
2995 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002996 ::basicBlock->appendInst(pmulhuw);
2997
2998 return RValue<UShort4>(V(result));
2999 }
3000}
3001
3002RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
3003{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003004 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003005 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3006
3007 // Scalarized implementation.
3008 Int4 result;
3009 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
3010 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
3011 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
3012 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
3013
3014 return result;
3015}
3016
3017RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3018{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003019 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003020 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3021
3022 if(false) // Partial product based implementation.
3023 {
3024 auto xh = x >> 16;
3025 auto yh = y >> 16;
3026 auto xl = x & UInt4(0x0000FFFF);
3027 auto yl = y & UInt4(0x0000FFFF);
3028 auto xlyh = xl * yh;
3029 auto xhyl = xh * yl;
3030 auto xlyhh = xlyh >> 16;
3031 auto xhylh = xhyl >> 16;
3032 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3033 auto xhyll = xhyl & UInt4(0x0000FFFF);
3034 auto xlylh = (xl * yl) >> 16;
3035 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3036
3037 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003038 }
3039
Nicolas Capens157ba262019-12-10 17:49:14 -05003040 // Scalarized implementation.
3041 Int4 result;
3042 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3043 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3044 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3045 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3046
3047 return As<UInt4>(result);
3048}
3049
3050RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3051{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003052 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003053 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003054 return UShort4(0);
3055}
3056
Nicolas Capens519cf222020-05-08 15:27:19 -04003057Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003058{
3059 return T(Type_v4i16);
3060}
3061
3062RValue<Short> Extract(RValue<Short8> val, int i)
3063{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003064 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003065 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003066}
3067
3068RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3069{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003070 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003071 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003072}
3073
3074RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3075{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003076 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003077 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003078 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003079 Short8 result;
3080 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3081 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3082 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3083 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3084 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3085 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3086 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3087 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003088
Nicolas Capens157ba262019-12-10 17:49:14 -05003089 return result;
3090 }
3091 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003092 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003093 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003094 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003095}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003096
Nicolas Capens157ba262019-12-10 17:49:14 -05003097RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3098{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003099 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003100 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003101 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003102 Short8 result;
3103 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3104 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3105 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3106 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3107 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3108 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3109 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3110 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003111
Nicolas Capens157ba262019-12-10 17:49:14 -05003112 return result;
3113 }
3114 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003115 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003116 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003117 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003118}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003119
Nicolas Capens157ba262019-12-10 17:49:14 -05003120RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3121{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003122 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003123 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003124 return Int4(0);
3125}
3126
3127RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3128{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003129 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003130 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003131 return Short8(0);
3132}
3133
Nicolas Capens519cf222020-05-08 15:27:19 -04003134Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003135{
3136 return T(Ice::IceType_v8i16);
3137}
3138
3139RValue<UShort> Extract(RValue<UShort8> val, int i)
3140{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003141 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003142 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003143}
3144
3145RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3146{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003147 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003148 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003149}
3150
3151RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3152{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003153 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003154 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003155 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003156 UShort8 result;
3157 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3158 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3159 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3160 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3161 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3162 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3163 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3164 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003165
Nicolas Capens157ba262019-12-10 17:49:14 -05003166 return result;
3167 }
3168 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003169 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003170 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003171 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003172}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003173
Nicolas Capens157ba262019-12-10 17:49:14 -05003174RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3175{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003176 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003177 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003178 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003179 UShort8 result;
3180 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3181 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3182 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3183 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3184 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3185 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3186 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3187 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003188
Nicolas Capens157ba262019-12-10 17:49:14 -05003189 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003190 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003191 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003192 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003193 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003194 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003195}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003196
Nicolas Capens157ba262019-12-10 17:49:14 -05003197RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3198{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003199 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003200 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003201 return UShort8(0);
3202}
3203
Nicolas Capens519cf222020-05-08 15:27:19 -04003204Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003205{
3206 return T(Ice::IceType_v8i16);
3207}
3208
Ben Clayton713b8d32019-12-17 20:37:56 +00003209RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003210{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003211 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003212 RValue<Int> res = val;
3213 val += 1;
3214 return res;
3215}
3216
Ben Clayton713b8d32019-12-17 20:37:56 +00003217const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003218{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003219 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003220 val += 1;
3221 return val;
3222}
3223
Ben Clayton713b8d32019-12-17 20:37:56 +00003224RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003225{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003226 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003227 RValue<Int> res = val;
3228 val -= 1;
3229 return res;
3230}
3231
Ben Clayton713b8d32019-12-17 20:37:56 +00003232const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003233{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003234 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003235 val -= 1;
3236 return val;
3237}
3238
3239RValue<Int> RoundInt(RValue<Float> cast)
3240{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003241 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003242 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003243 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003244 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3245 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003246 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003247 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003248 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003249 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003250 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003251 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003252 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003253 ::basicBlock->appendInst(nearbyint);
3254
3255 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003256 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003257}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003258
Nicolas Capens519cf222020-05-08 15:27:19 -04003259Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003260{
3261 return T(Ice::IceType_i32);
3262}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003263
Nicolas Capens519cf222020-05-08 15:27:19 -04003264Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003265{
3266 return T(Ice::IceType_i64);
3267}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003268
Nicolas Capens157ba262019-12-10 17:49:14 -05003269UInt::UInt(RValue<Float> cast)
3270{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003271 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003272 // Smallest positive value representable in UInt, but not in Int
3273 const unsigned int ustart = 0x80000000u;
3274 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003275
Nicolas Capens157ba262019-12-10 17:49:14 -05003276 // If the value is negative, store 0, otherwise store the result of the conversion
3277 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003278 // Check if the value can be represented as an Int
3279 IfThenElse(cast >= ustartf,
3280 // If the value is too large, subtract ustart and re-add it after conversion.
3281 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3282 // Otherwise, just convert normally
3283 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003284 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003285}
Nicolas Capensa8086512016-11-07 17:32:17 -05003286
Ben Clayton713b8d32019-12-17 20:37:56 +00003287RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003288{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003289 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003290 RValue<UInt> res = val;
3291 val += 1;
3292 return res;
3293}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003294
Ben Clayton713b8d32019-12-17 20:37:56 +00003295const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003296{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003297 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003298 val += 1;
3299 return val;
3300}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003301
Ben Clayton713b8d32019-12-17 20:37:56 +00003302RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003303{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003304 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003305 RValue<UInt> res = val;
3306 val -= 1;
3307 return res;
3308}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003309
Ben Clayton713b8d32019-12-17 20:37:56 +00003310const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003313 val -= 1;
3314 return val;
3315}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003316
Nicolas Capens598f8d82016-09-26 15:09:10 -04003317// RValue<UInt> RoundUInt(RValue<Float> cast)
3318// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003319// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003320// }
3321
Nicolas Capens519cf222020-05-08 15:27:19 -04003322Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003323{
3324 return T(Ice::IceType_i32);
3325}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003326
3327// Int2::Int2(RValue<Int> cast)
3328// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003329// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003330// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331//
3332// Constant *shuffle[2];
3333// shuffle[0] = Nucleus::createConstantInt(0);
3334// shuffle[1] = Nucleus::createConstantInt(0);
3335//
Nicolas Capens519cf222020-05-08 15:27:19 -04003336// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003337//
3338// storeValue(replicate);
3339// }
3340
Nicolas Capens157ba262019-12-10 17:49:14 -05003341RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3342{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003343 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003344 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003345 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003346 Int2 result;
3347 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3348 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003349
Nicolas Capens157ba262019-12-10 17:49:14 -05003350 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003351 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003352 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003353 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003354 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003355 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003356}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003357
Nicolas Capens157ba262019-12-10 17:49:14 -05003358RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003360 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003361 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003362 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003363 Int2 result;
3364 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3365 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3366
3367 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003368 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003369 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003370 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003371 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003372 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003373}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003374
Nicolas Capens519cf222020-05-08 15:27:19 -04003375Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003376{
3377 return T(Type_v2i32);
3378}
3379
3380RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3381{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003382 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003383 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003384 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003385 UInt2 result;
3386 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3387 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003388
Nicolas Capens157ba262019-12-10 17:49:14 -05003389 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003390 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003391 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003392 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003393 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003394 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003395}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003396
Nicolas Capens157ba262019-12-10 17:49:14 -05003397RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3398{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003399 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003400 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003401 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003402 UInt2 result;
3403 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3404 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003405
Nicolas Capens157ba262019-12-10 17:49:14 -05003406 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003407 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003408 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003409 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003410 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003411 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003412}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003413
Nicolas Capens519cf222020-05-08 15:27:19 -04003414Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003415{
3416 return T(Type_v2i32);
3417}
3418
Ben Clayton713b8d32019-12-17 20:37:56 +00003419Int4::Int4(RValue<Byte4> cast)
3420 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003421{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003422 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003423 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003424 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3425
3426 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003427 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003428 Value *b = Nucleus::createBitCast(a, Byte16::type());
3429 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003430
Ben Clayton713b8d32019-12-17 20:37:56 +00003431 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003432 Value *d = Nucleus::createBitCast(c, Short8::type());
3433 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003434
Nicolas Capens519cf222020-05-08 15:27:19 -04003435 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003436 storeValue(f);
3437}
3438
Ben Clayton713b8d32019-12-17 20:37:56 +00003439Int4::Int4(RValue<SByte4> cast)
3440 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003441{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003442 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003443 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003444 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3445
Ben Clayton713b8d32019-12-17 20:37:56 +00003446 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003447 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003448 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3449
Ben Clayton713b8d32019-12-17 20:37:56 +00003450 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003451 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003452 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3453
3454 *this = As<Int4>(e) >> 24;
3455}
3456
Ben Clayton713b8d32019-12-17 20:37:56 +00003457Int4::Int4(RValue<Short4> cast)
3458 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003459{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003460 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003461 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003462 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003463
3464 *this = As<Int4>(c) >> 16;
3465}
3466
Ben Clayton713b8d32019-12-17 20:37:56 +00003467Int4::Int4(RValue<UShort4> cast)
3468 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003469{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003470 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003471 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003472 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003473 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003474 storeValue(d);
3475}
3476
Ben Clayton713b8d32019-12-17 20:37:56 +00003477Int4::Int4(RValue<Int> rhs)
3478 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003479{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003480 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003481 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003482
Ben Clayton713b8d32019-12-17 20:37:56 +00003483 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003484 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3485
3486 storeValue(replicate);
3487}
3488
3489RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3490{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003491 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003492 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003493 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003494 Int4 result;
3495 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3496 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3497 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3498 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003499
Nicolas Capens157ba262019-12-10 17:49:14 -05003500 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003501 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003502 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003503 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003504 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003505 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003506}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003507
Nicolas Capens157ba262019-12-10 17:49:14 -05003508RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3509{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003510 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003511 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003512 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003513 Int4 result;
3514 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3515 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3516 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3517 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003518
Nicolas Capens157ba262019-12-10 17:49:14 -05003519 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003520 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003521 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003522 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003523 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003524 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003525}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003526
Nicolas Capens157ba262019-12-10 17:49:14 -05003527RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3528{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003529 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003530 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003531}
3532
3533RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3534{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003535 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003536 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003537}
3538
3539RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3540{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003541 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003542 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003543}
3544
3545RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3546{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003547 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003548 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003549}
3550
3551RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3552{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003553 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003554 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003555}
3556
3557RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3558{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003559 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003560 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003561}
3562
Nicolas Capens629bf952022-01-18 15:08:14 -05003563RValue<Int4> Abs(RValue<Int4> x)
3564{
3565 // TODO: Optimize.
3566 auto negative = x >> 31;
3567 return (x ^ negative) - negative;
3568}
3569
Nicolas Capens157ba262019-12-10 17:49:14 -05003570RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3571{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003572 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003573 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003574 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003575 ::basicBlock->appendInst(cmp);
3576
3577 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003578 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003579 ::basicBlock->appendInst(select);
3580
3581 return RValue<Int4>(V(result));
3582}
3583
3584RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3585{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003586 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003587 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003588 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003589 ::basicBlock->appendInst(cmp);
3590
3591 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003592 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003593 ::basicBlock->appendInst(select);
3594
3595 return RValue<Int4>(V(result));
3596}
3597
3598RValue<Int4> RoundInt(RValue<Float4> cast)
3599{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003600 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003601 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003602 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003603 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3604 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003605 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003606 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003607 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003608 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003609 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003610 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003611 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003612 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003613
3614 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003615 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003616}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003617
Nicolas Capenseeb81842021-01-12 17:44:40 -05003618RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3619{
3620 RR_DEBUG_INFO_UPDATE_LOC();
3621
3622 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3623 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3624 // saturate to 0x80000000.
3625 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3626
3627 if(emulateIntrinsics || CPUID::ARM)
3628 {
3629 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3630 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3631 }
3632 else
3633 {
3634 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3635 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003636 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capenseeb81842021-01-12 17:44:40 -05003637 nearbyint->addArg(clamped.value());
3638 ::basicBlock->appendInst(nearbyint);
3639
3640 return RValue<Int4>(V(result));
3641 }
3642}
3643
Nicolas Capens157ba262019-12-10 17:49:14 -05003644RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3645{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003646 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003647 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003648 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003649 Short8 result;
3650 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3651 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3652 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3653 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3654 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3655 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3656 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3657 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003658
Nicolas Capens157ba262019-12-10 17:49:14 -05003659 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003660 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003661 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003662 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003663 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003664 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003665 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003666 pack->addArg(x.value());
3667 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003668 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003669
Nicolas Capens157ba262019-12-10 17:49:14 -05003670 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003671 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003672}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003673
Nicolas Capens157ba262019-12-10 17:49:14 -05003674RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3675{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003676 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003677 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003678 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003679 RValue<Int4> sx = As<Int4>(x);
3680 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003681
Nicolas Capens157ba262019-12-10 17:49:14 -05003682 RValue<Int4> sy = As<Int4>(y);
3683 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003684
Nicolas Capens157ba262019-12-10 17:49:14 -05003685 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003686 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003687 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003688 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003689 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003690 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003691 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003692 pack->addArg(x.value());
3693 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003694 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003695
Nicolas Capens157ba262019-12-10 17:49:14 -05003696 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003697 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003698}
Nicolas Capens33438a62017-09-27 11:47:35 -04003699
Nicolas Capens157ba262019-12-10 17:49:14 -05003700RValue<Int> SignMask(RValue<Int4> x)
3701{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003702 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003703 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003704 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003705 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3706 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003707 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003708 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003709 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003710 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003711 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003712 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003713 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003714 ::basicBlock->appendInst(movmsk);
3715
3716 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003717 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003718}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003719
Nicolas Capens519cf222020-05-08 15:27:19 -04003720Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003721{
3722 return T(Ice::IceType_v4i32);
3723}
3724
Ben Clayton713b8d32019-12-17 20:37:56 +00003725UInt4::UInt4(RValue<Float4> cast)
3726 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003727{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003728 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003729 // Smallest positive value representable in UInt, but not in Int
3730 const unsigned int ustart = 0x80000000u;
3731 const float ustartf = float(ustart);
3732
3733 // Check if the value can be represented as an Int
3734 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3735 // If the value is too large, subtract ustart and re-add it after conversion.
3736 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003737 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003738 (~uiValue & Int4(cast));
3739 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003740 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003741}
3742
Ben Clayton713b8d32019-12-17 20:37:56 +00003743UInt4::UInt4(RValue<UInt> rhs)
3744 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003745{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003746 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003747 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003748
Ben Clayton713b8d32019-12-17 20:37:56 +00003749 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003750 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3751
3752 storeValue(replicate);
3753}
3754
3755RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3756{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003757 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003758 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003759 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003760 UInt4 result;
3761 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3762 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3763 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3764 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003765
Nicolas Capens157ba262019-12-10 17:49:14 -05003766 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003767 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003768 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003769 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003770 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003771 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003772}
Ben Clayton88816fa2019-05-15 17:08:14 +01003773
Nicolas Capens157ba262019-12-10 17:49:14 -05003774RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3775{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003776 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003777 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003778 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003779 UInt4 result;
3780 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3781 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3782 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3783 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003784
Nicolas Capens157ba262019-12-10 17:49:14 -05003785 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003786 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003787 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003788 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003789 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003790 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003791}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003792
Nicolas Capens157ba262019-12-10 17:49:14 -05003793RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3794{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003795 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003796 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003797}
3798
3799RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3800{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003801 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003802 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003803}
3804
3805RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3806{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003807 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003808 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003809}
3810
3811RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3812{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003813 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003814 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003815}
3816
3817RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3818{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003819 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003820 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003821}
3822
3823RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3824{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003825 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003826 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003827}
3828
3829RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3830{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003831 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003832 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003833 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003834 ::basicBlock->appendInst(cmp);
3835
3836 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003837 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003838 ::basicBlock->appendInst(select);
3839
3840 return RValue<UInt4>(V(result));
3841}
3842
3843RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003846 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003847 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003848 ::basicBlock->appendInst(cmp);
3849
3850 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003851 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003852 ::basicBlock->appendInst(select);
3853
3854 return RValue<UInt4>(V(result));
3855}
3856
Nicolas Capens519cf222020-05-08 15:27:19 -04003857Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003858{
3859 return T(Ice::IceType_v4i32);
3860}
3861
Nicolas Capens519cf222020-05-08 15:27:19 -04003862Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003863{
3864 return T(Ice::IceType_i16);
3865}
3866
3867RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3868{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003869 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003870 return 1.0f / x;
3871}
3872
3873RValue<Float> RcpSqrt_pp(RValue<Float> x)
3874{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003875 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003876 return Rcp_pp(Sqrt(x));
3877}
3878
3879RValue<Float> Sqrt(RValue<Float> x)
3880{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003881 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003882 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003883 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003884 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003885 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003886 ::basicBlock->appendInst(sqrt);
3887
3888 return RValue<Float>(V(result));
3889}
3890
3891RValue<Float> Round(RValue<Float> x)
3892{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003893 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003894 return Float4(Round(Float4(x))).x;
3895}
3896
3897RValue<Float> Trunc(RValue<Float> x)
3898{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003899 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003900 return Float4(Trunc(Float4(x))).x;
3901}
3902
3903RValue<Float> Frac(RValue<Float> x)
3904{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003905 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003906 return Float4(Frac(Float4(x))).x;
3907}
3908
3909RValue<Float> Floor(RValue<Float> x)
3910{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003911 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003912 return Float4(Floor(Float4(x))).x;
3913}
3914
3915RValue<Float> Ceil(RValue<Float> x)
3916{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003917 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003918 return Float4(Ceil(Float4(x))).x;
3919}
3920
Nicolas Capens519cf222020-05-08 15:27:19 -04003921Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003922{
3923 return T(Ice::IceType_f32);
3924}
3925
Nicolas Capens519cf222020-05-08 15:27:19 -04003926Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003927{
3928 return T(Type_v2f32);
3929}
3930
Ben Clayton713b8d32019-12-17 20:37:56 +00003931Float4::Float4(RValue<Float> rhs)
3932 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003933{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003934 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003935 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003936
Ben Clayton713b8d32019-12-17 20:37:56 +00003937 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003938 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3939
3940 storeValue(replicate);
3941}
3942
Nicolas Capensbc74bc22022-01-26 10:47:00 -05003943RValue<Float4> MulAdd(RValue<Float4> x, RValue<Float4> y, RValue<Float4> z)
3944{
3945 // TODO(b/214591655): Use FMA when available.
3946 return x * y + z;
3947}
3948
Nicolas Capens75d79f22022-01-31 17:46:26 -05003949RValue<Float4> FMA(RValue<Float4> x, RValue<Float4> y, RValue<Float4> z)
3950{
3951 // TODO(b/214591655): Use FMA instructions when available.
3952 return emulated::FMA(x, y, z);
3953}
3954
Nicolas Capens629bf952022-01-18 15:08:14 -05003955RValue<Float4> Abs(RValue<Float4> x)
3956{
3957 // TODO: Optimize.
3958 Value *vector = Nucleus::createBitCast(x.value(), Int4::type());
3959 int64_t constantVector[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
3960 Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, Int4::type()));
3961
3962 return As<Float4>(result);
3963}
3964
Nicolas Capens157ba262019-12-10 17:49:14 -05003965RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3966{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003967 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003968 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003969 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003970 ::basicBlock->appendInst(cmp);
3971
3972 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003973 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003974 ::basicBlock->appendInst(select);
3975
3976 return RValue<Float4>(V(result));
3977}
3978
3979RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3980{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003981 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003982 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003983 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003984 ::basicBlock->appendInst(cmp);
3985
3986 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003987 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003988 ::basicBlock->appendInst(select);
3989
3990 return RValue<Float4>(V(result));
3991}
3992
3993RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3994{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003995 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003996 return Float4(1.0f) / x;
3997}
3998
3999RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
4000{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004001 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004002 return Rcp_pp(Sqrt(x));
4003}
4004
Antonio Maioranod1561872020-12-14 14:03:53 -05004005bool HasRcpApprox()
4006{
4007 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4008 return false;
4009}
4010
4011RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
4012{
4013 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4014 UNREACHABLE("RValue<Float4> RcpApprox()");
4015 return { 0.0f };
4016}
4017
4018RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
4019{
4020 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4021 UNREACHABLE("RValue<Float> RcpApprox()");
4022 return { 0.0f };
4023}
4024
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05004025bool HasRcpSqrtApprox()
4026{
4027 return false;
4028}
4029
4030RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
4031{
4032 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4033 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
4034 return { 0.0f };
4035}
4036
4037RValue<Float> RcpSqrtApprox(RValue<Float> x)
4038{
4039 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4040 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
4041 return { 0.0f };
4042}
4043
Nicolas Capens157ba262019-12-10 17:49:14 -05004044RValue<Float4> Sqrt(RValue<Float4> x)
4045{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004046 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004047 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04004048 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004049 Float4 result;
4050 result.x = Sqrt(Float(Float4(x).x));
4051 result.y = Sqrt(Float(Float4(x).y));
4052 result.z = Sqrt(Float(Float4(x).z));
4053 result.w = Sqrt(Float(Float4(x).w));
4054
4055 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04004056 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004057 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04004058 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004059 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004060 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004061 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004062 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04004063 ::basicBlock->appendInst(sqrt);
4064
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04004065 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04004066 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04004067}
Nicolas Capens157ba262019-12-10 17:49:14 -05004068
4069RValue<Int> SignMask(RValue<Float4> x)
4070{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004071 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004072 if(emulateIntrinsics || CPUID::ARM)
4073 {
4074 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
4075 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
4076 }
4077 else
4078 {
4079 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004080 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004081 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004082 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004083 ::basicBlock->appendInst(movmsk);
4084
4085 return RValue<Int>(V(result));
4086 }
4087}
4088
4089RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
4090{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004091 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004092 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004093}
4094
4095RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
4096{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004097 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004098 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004099}
4100
4101RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4102{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004103 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004104 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004105}
4106
4107RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4108{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004109 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004110 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004111}
4112
4113RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4114{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004115 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004116 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004117}
4118
4119RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4120{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004121 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004122 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004123}
4124
4125RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4126{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004127 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004128 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004129}
4130
4131RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4132{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004133 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004134 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004135}
4136
4137RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4138{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004139 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004140 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004141}
4142
4143RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4144{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004145 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004146 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004147}
4148
4149RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4150{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004151 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004152 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004153}
4154
4155RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4156{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004157 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004158 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004159}
4160
4161RValue<Float4> Round(RValue<Float4> x)
4162{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004163 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004164 if(emulateIntrinsics || CPUID::ARM)
4165 {
4166 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4167 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4168 }
4169 else if(CPUID::SSE4_1)
4170 {
4171 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004172 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004173 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004174 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004175 round->addArg(::context->getConstantInt32(0));
4176 ::basicBlock->appendInst(round);
4177
4178 return RValue<Float4>(V(result));
4179 }
4180 else
4181 {
4182 return Float4(RoundInt(x));
4183 }
4184}
4185
4186RValue<Float4> Trunc(RValue<Float4> x)
4187{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004188 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004189 if(CPUID::SSE4_1)
4190 {
4191 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004192 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004193 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004194 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004195 round->addArg(::context->getConstantInt32(3));
4196 ::basicBlock->appendInst(round);
4197
4198 return RValue<Float4>(V(result));
4199 }
4200 else
4201 {
4202 return Float4(Int4(x));
4203 }
4204}
4205
4206RValue<Float4> Frac(RValue<Float4> x)
4207{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004208 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004209 Float4 frc;
4210
4211 if(CPUID::SSE4_1)
4212 {
4213 frc = x - Floor(x);
4214 }
4215 else
4216 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004217 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004218
Ben Clayton713b8d32019-12-17 20:37:56 +00004219 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004220 }
4221
4222 // x - floor(x) can be 1.0 for very small negative x.
4223 // Clamp against the value just below 1.0.
4224 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4225}
4226
4227RValue<Float4> Floor(RValue<Float4> x)
4228{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004229 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004230 if(CPUID::SSE4_1)
4231 {
4232 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004233 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004234 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004235 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004236 round->addArg(::context->getConstantInt32(1));
4237 ::basicBlock->appendInst(round);
4238
4239 return RValue<Float4>(V(result));
4240 }
4241 else
4242 {
4243 return x - Frac(x);
4244 }
4245}
4246
4247RValue<Float4> Ceil(RValue<Float4> x)
4248{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004249 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004250 if(CPUID::SSE4_1)
4251 {
4252 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004253 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004254 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004255 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004256 round->addArg(::context->getConstantInt32(2));
4257 ::basicBlock->appendInst(round);
4258
4259 return RValue<Float4>(V(result));
4260 }
4261 else
4262 {
4263 return -Floor(-x);
4264 }
4265}
4266
Nicolas Capens519cf222020-05-08 15:27:19 -04004267Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004268{
4269 return T(Ice::IceType_v4f32);
4270}
4271
4272RValue<Long> Ticks()
4273{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004274 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004275 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004276 return Long(Int(0));
4277}
4278
Ben Clayton713b8d32019-12-17 20:37:56 +00004279RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004280{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004281 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004282 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004283}
4284
Ben Clayton713b8d32019-12-17 20:37:56 +00004285RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004286{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004287 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004288 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004289}
4290
Ben Clayton713b8d32019-12-17 20:37:56 +00004291Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004292{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004293 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004294 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004295}
4296
4297void Breakpoint()
4298{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004299 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004300 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004301 auto trap = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05004302 ::basicBlock->appendInst(trap);
4303}
4304
Ben Clayton713b8d32019-12-17 20:37:56 +00004305void Nucleus::createFence(std::memory_order memoryOrder)
4306{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004307 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004308 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004309 auto inst = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004310 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4311 inst->addArg(order);
4312 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004313}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004314
Ben Clayton713b8d32019-12-17 20:37:56 +00004315Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4316{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004317 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense4b77942021-08-03 17:09:41 -04004318 UNIMPLEMENTED("b/155867273 Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004319 return nullptr;
4320}
Nicolas Capense4b77942021-08-03 17:09:41 -04004321
Ben Clayton713b8d32019-12-17 20:37:56 +00004322void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense4b77942021-08-03 17:09:41 -04004325 UNIMPLEMENTED("b/155867273 Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004326}
Nicolas Capens157ba262019-12-10 17:49:14 -05004327
4328RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004331 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4332}
4333
4334RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004337 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4338}
4339
4340void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004343 return emulated::Scatter(base, val, offsets, mask, alignment);
4344}
4345
4346void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004348 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004349 return emulated::Scatter(base, val, offsets, mask, alignment);
4350}
4351
4352RValue<Float> Exp2(RValue<Float> x)
4353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004354 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004355 return emulated::Exp2(x);
4356}
4357
4358RValue<Float> Log2(RValue<Float> x)
4359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004360 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004361 return emulated::Log2(x);
4362}
4363
4364RValue<Float4> Sin(RValue<Float4> x)
4365{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004366 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004367 return emulated::Sin(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004368}
4369
4370RValue<Float4> Cos(RValue<Float4> x)
4371{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004372 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004373 return emulated::Cos(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004374}
4375
4376RValue<Float4> Tan(RValue<Float4> x)
4377{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004378 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004379 return emulated::Tan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004380}
4381
Nicolas Capensd04f3f52022-02-05 01:19:14 -05004382RValue<Float4> Asin(RValue<Float4> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05004383{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004384 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004385 return emulated::Asin(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004386}
4387
Nicolas Capensd04f3f52022-02-05 01:19:14 -05004388RValue<Float4> Acos(RValue<Float4> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05004389{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004390 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004391 return emulated::Acos(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004392}
4393
4394RValue<Float4> Atan(RValue<Float4> x)
4395{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004396 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004397 return emulated::Atan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004398}
4399
4400RValue<Float4> Sinh(RValue<Float4> x)
4401{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004402 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004403 return emulated::Sinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004404}
4405
4406RValue<Float4> Cosh(RValue<Float4> x)
4407{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004408 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004409 return emulated::Cosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004410}
4411
4412RValue<Float4> Tanh(RValue<Float4> x)
4413{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004414 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004415 return emulated::Tanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004416}
4417
4418RValue<Float4> Asinh(RValue<Float4> x)
4419{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004420 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004421 return emulated::Asinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004422}
4423
4424RValue<Float4> Acosh(RValue<Float4> x)
4425{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004426 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004427 return emulated::Acosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004428}
4429
4430RValue<Float4> Atanh(RValue<Float4> x)
4431{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004432 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004433 return emulated::Atanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004434}
4435
4436RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4437{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004438 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004439 return emulated::Atan2(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004440}
4441
4442RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4443{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004444 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004445 return emulated::Pow(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004446}
4447
4448RValue<Float4> Exp(RValue<Float4> x)
4449{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004450 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004451 return emulated::Exp(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004452}
4453
4454RValue<Float4> Log(RValue<Float4> x)
4455{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004456 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004457 return emulated::Log(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004458}
4459
4460RValue<Float4> Exp2(RValue<Float4> x)
4461{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004462 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004463 return emulated::Exp2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004464}
4465
4466RValue<Float4> Log2(RValue<Float4> x)
4467{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004468 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensf7c42b02022-02-05 00:31:49 -05004469 return emulated::Log2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004470}
4471
4472RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4473{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004474 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004475 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004476 {
Ben Claytonce54c592020-02-07 11:30:51 +00004477 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004478 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004479 }
4480 else
4481 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004482 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004483 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004484 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004485 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004486 ::basicBlock->appendInst(ctlz);
4487
4488 return RValue<UInt>(V(result));
4489 }
4490}
4491
4492RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4493{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004494 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004495 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004496 {
Ben Claytonce54c592020-02-07 11:30:51 +00004497 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004498 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004499 }
4500 else
4501 {
4502 // TODO: implement vectorized version in Subzero
4503 UInt4 result;
4504 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4505 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4506 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4507 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4508 return result;
4509 }
4510}
4511
4512RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4513{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004514 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004515 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004516 {
Ben Claytonce54c592020-02-07 11:30:51 +00004517 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004518 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004519 }
4520 else
4521 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004522 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004523 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004524 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004525 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004526 ::basicBlock->appendInst(ctlz);
4527
4528 return RValue<UInt>(V(result));
4529 }
4530}
4531
4532RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4533{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004534 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004535 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004536 {
Ben Claytonce54c592020-02-07 11:30:51 +00004537 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004538 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004539 }
4540 else
4541 {
4542 // TODO: implement vectorized version in Subzero
4543 UInt4 result;
4544 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4545 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4546 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4547 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4548 return result;
4549 }
4550}
4551
Antonio Maiorano370cba52019-12-31 11:36:07 -05004552RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4553{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004554 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004555 return emulated::MinAtomic(x, y, memoryOrder);
4556}
4557
4558RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4559{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004560 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004561 return emulated::MinAtomic(x, y, memoryOrder);
4562}
4563
4564RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4565{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004566 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004567 return emulated::MaxAtomic(x, y, memoryOrder);
4568}
4569
4570RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4571{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004572 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004573 return emulated::MaxAtomic(x, y, memoryOrder);
4574}
4575
Antonio Maioranoaae33732020-02-14 14:52:34 -05004576void EmitDebugLocation()
4577{
4578#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004579 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004580#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004581}
Ben Clayton713b8d32019-12-17 20:37:56 +00004582void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004583void FlushDebug() {}
4584
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004585namespace {
4586namespace coro {
4587
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004588// Instance data per generated coroutine
4589// This is the "handle" type used for Coroutine functions
4590// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4591struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004592{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004593 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004594 bool done = false; // the coroutine should stop at the next yield()
4595 bool terminated = false; // the coroutine has finished.
4596 bool inRoutine = false; // is the coroutine currently executing?
4597 marl::Scheduler::Fiber *mainFiber = nullptr;
4598 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004599 void *promisePtr = nullptr;
4600};
4601
4602CoroutineData *createCoroutineData()
4603{
4604 return new CoroutineData{};
4605}
4606
4607void destroyCoroutineData(CoroutineData *coroData)
4608{
4609 delete coroData;
4610}
4611
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004612// suspend() pauses execution of the coroutine, and resumes execution from the
4613// caller's call to await().
4614// Returns true if await() is called again, or false if coroutine_destroy()
4615// is called.
4616bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004617{
Ben Claytonc3466532020-03-24 11:54:05 +00004618 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4619 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4620 ASSERT(coroData->inRoutine);
4621 coroData->inRoutine = false;
4622 coroData->mainFiber->notify();
4623 while(!coroData->inRoutine)
4624 {
4625 coroData->routineFiber->wait();
4626 }
4627 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004628}
4629
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004630// resume() is called by await(), blocking until the coroutine calls yield()
4631// or the coroutine terminates.
4632void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004633{
Ben Claytonc3466532020-03-24 11:54:05 +00004634 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4635 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4636 ASSERT(!coroData->inRoutine);
4637 coroData->inRoutine = true;
4638 coroData->routineFiber->notify();
4639 while(coroData->inRoutine)
4640 {
4641 coroData->mainFiber->wait();
4642 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004643}
4644
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004645// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4646// until the coroutine ends, and deletes the coroutine data.
4647void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004648{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004649 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004650 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4651 ASSERT(!coroData->inRoutine);
4652 if(!coroData->terminated)
4653 {
4654 coroData->done = true;
4655 coroData->inRoutine = true;
4656 coroData->routineFiber->notify();
4657 while(!coroData->terminated)
4658 {
4659 coroData->mainFiber->wait();
4660 }
4661 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004662 if(coroData->useInternalScheduler)
4663 {
4664 ::getOrCreateScheduler().unbind();
4665 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004666 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004667}
4668
4669namespace detail {
4670thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4671} // namespace detail
4672
4673void setHandleParam(Nucleus::CoroutineHandle handle)
4674{
4675 ASSERT(!detail::coroHandle);
4676 detail::coroHandle = handle;
4677}
4678
4679Nucleus::CoroutineHandle getHandleParam()
4680{
4681 ASSERT(detail::coroHandle);
4682 auto handle = detail::coroHandle;
4683 detail::coroHandle = {};
4684 return handle;
4685}
4686
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004687bool isDone(Nucleus::CoroutineHandle handle)
4688{
4689 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004690 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004691}
4692
4693void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4694{
4695 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4696 coroData->promisePtr = promisePtr;
4697}
4698
4699void *getPromisePtr(Nucleus::CoroutineHandle handle)
4700{
4701 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4702 return coroData->promisePtr;
4703}
4704
4705} // namespace coro
4706} // namespace
4707
4708// Used to generate coroutines.
4709// Lifetime: from yield to acquireCoroutine
4710class CoroutineGenerator
4711{
4712public:
4713 CoroutineGenerator()
4714 {
4715 }
4716
4717 // Inserts instructions at the top of the current function to make it a coroutine.
4718 void generateCoroutineBegin()
4719 {
4720 // Begin building the main coroutine_begin() function.
4721 // We insert these instructions at the top of the entry node,
4722 // before existing reactor-generated instructions.
4723
4724 // CoroutineHandle coroutine_begin(<Arguments>)
4725 // {
4726 // this->handle = coro::getHandleParam();
4727 //
4728 // YieldType promise;
4729 // coro::setPromisePtr(handle, &promise); // For await
4730 //
4731 // ... <REACTOR CODE> ...
4732 //
4733
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004734 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004735 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004736
4737 // YieldType promise;
4738 // coro::setPromisePtr(handle, &promise); // For await
4739 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004740 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004741 }
4742
4743 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4744 void generateYield(Value *val)
4745 {
4746 // ... <REACTOR CODE> ...
4747 //
4748 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004749 // if (!coro::suspend(handle)) {
4750 // return false; // coroutine has been stopped by the caller.
4751 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004752 //
4753 // ... <REACTOR CODE> ...
4754
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004755 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004756 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004757
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004758 // if (!coro::suspend(handle)) {
4759 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4760 auto doneBlock = Nucleus::createBasicBlock();
4761 auto resumeBlock = Nucleus::createBasicBlock();
4762 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4763
4764 // return false; // coroutine has been stopped by the caller.
4765 ::basicBlock = doneBlock;
4766 Nucleus::createRetVoid(); // coroutine return value is ignored.
4767
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004768 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004769 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004770 }
4771
4772 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4773
4774 // Generates the await function for the current coroutine.
4775 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4776 static FunctionUniquePtr generateAwaitFunction()
4777 {
4778 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4779 // {
4780 // if (coro::isDone())
4781 // {
4782 // return false;
4783 // }
4784 // else // resume
4785 // {
4786 // YieldType* promise = coro::getPromisePtr(handle);
4787 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004788 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004789 // return true;
4790 // }
4791 // }
4792
4793 // Subzero doesn't support bool types (IceType_i1) as return type
4794 const Ice::Type ReturnType = Ice::IceType_i32;
4795 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4796 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4797
4798 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4799 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4800
4801 Ice::Variable *handle = awaitFunc->getArgs()[0];
4802 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4803
4804 auto doneBlock = awaitFunc->makeNode();
4805 {
4806 // return false;
4807 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4808 doneBlock->appendInst(ret);
4809 }
4810
4811 auto resumeBlock = awaitFunc->makeNode();
4812 {
4813 // YieldType* promise = coro::getPromisePtr(handle);
4814 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4815
4816 // *out = *promise;
4817 // Load promise value
4818 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4819 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4820 resumeBlock->appendInst(load);
4821 // Then store it in output param
4822 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4823 resumeBlock->appendInst(store);
4824
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004825 // coro::resume(handle);
4826 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004827
4828 // return true;
4829 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4830 resumeBlock->appendInst(ret);
4831 }
4832
4833 // if (coro::isDone())
4834 // {
4835 // <doneBlock>
4836 // }
4837 // else // resume
4838 // {
4839 // <resumeBlock>
4840 // }
4841 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004842 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004843 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4844 bb->appendInst(br);
4845
4846 return FunctionUniquePtr{ awaitFunc };
4847 }
4848
4849 // Generates the destroy function for the current coroutine.
4850 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4851 static FunctionUniquePtr generateDestroyFunction()
4852 {
4853 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4854 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004855 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004856 // return;
4857 // }
4858
4859 const Ice::Type ReturnType = Ice::IceType_void;
4860 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4861
4862 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4863 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4864
4865 Ice::Variable *handle = destroyFunc->getArgs()[0];
4866
4867 auto *bb = destroyFunc->getEntryNode();
4868
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004869 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4870 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004871
4872 // return;
4873 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4874 bb->appendInst(ret);
4875
4876 return FunctionUniquePtr{ destroyFunc };
4877 }
4878
4879private:
4880 Ice::Variable *handle{};
4881 Ice::Variable *promise{};
4882};
4883
4884static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4885{
4886 // This doubles up as our coroutine handle
4887 auto coroData = coro::createCoroutineData();
4888
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004889 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4890 if(coroData->useInternalScheduler)
4891 {
4892 ::getOrCreateScheduler().bind();
4893 }
4894
Ben Clayton76e9e532020-03-16 20:35:04 +00004895 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004896 // Store handle in TLS so that the coroutine can grab it right away, before
4897 // any fiber switch occurs.
4898 coro::setHandleParam(coroData);
4899
Ben Claytonc3466532020-03-24 11:54:05 +00004900 ASSERT(!coroData->routineFiber);
4901 coroData->routineFiber = marl::Scheduler::Fiber::current();
4902
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004903 beginFunc();
4904
Ben Claytonc3466532020-03-24 11:54:05 +00004905 ASSERT(coroData->inRoutine);
4906 coroData->done = true; // coroutine is done.
4907 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4908 coroData->inRoutine = false;
4909 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004910 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004911
Ben Claytonc3466532020-03-24 11:54:05 +00004912 ASSERT(!coroData->mainFiber);
4913 coroData->mainFiber = marl::Scheduler::Fiber::current();
4914
4915 // block until the first yield or coroutine end
4916 ASSERT(!coroData->inRoutine);
4917 coroData->inRoutine = true;
4918 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4919 while(coroData->inRoutine)
4920 {
4921 coroData->mainFiber->wait();
4922 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004923
4924 return coroData;
4925}
4926
4927void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4928{
4929 // Start by creating a regular function
4930 createFunction(yieldType, params);
4931
4932 // Save in case yield() is called
4933 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4934 ::coroYieldType = yieldType;
4935}
4936
4937void Nucleus::yield(Value *val)
4938{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004939 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004940 Variable::materializeAll();
4941
4942 // On first yield, we start generating coroutine functions
4943 if(!::coroGen)
4944 {
4945 ::coroGen = std::make_shared<CoroutineGenerator>();
4946 ::coroGen->generateCoroutineBegin();
4947 }
4948
4949 ASSERT(::coroGen);
4950 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004951}
4952
Ben Clayton713b8d32019-12-17 20:37:56 +00004953static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4954{
4955 return false;
4956}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004957
4958static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4959{
4960}
Nicolas Capens157ba262019-12-10 17:49:14 -05004961
Sean Risser705231f2021-08-19 18:17:24 -04004962std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit *cfgEdit /* = nullptr */)
Nicolas Capens157ba262019-12-10 17:49:14 -05004963{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004964 if(::coroGen)
4965 {
4966 // Finish generating coroutine functions
4967 {
4968 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004969 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004970 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004971
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004972 auto awaitFunc = ::coroGen->generateAwaitFunction();
4973 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004974
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004975 // At this point, we no longer need the CoroutineGenerator.
4976 ::coroGen.reset();
4977 ::coroYieldType = nullptr;
4978
4979 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4980 { name, "await", "destroy" },
4981 cfgEdit);
4982
4983 return routine;
4984 }
4985 else
4986 {
4987 {
4988 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004989 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004990 }
4991
4992 ::coroYieldType = nullptr;
4993
4994 // Not an actual coroutine (no yields), so return stubs for await and destroy
4995 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4996
4997 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4998 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4999 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
5000 return routine;
5001 }
Nicolas Capens157ba262019-12-10 17:49:14 -05005002}
5003
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005004Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00005005{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005006 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
5007
5008 if(isCoroutine)
5009 {
5010 return rr::invokeCoroutineBegin(func);
5011 }
5012 else
5013 {
5014 // For regular routines, just invoke the begin func directly
5015 return func();
5016 }
Ben Clayton713b8d32019-12-17 20:37:56 +00005017}
Nicolas Capens157ba262019-12-10 17:49:14 -05005018
5019} // namespace rr