blob: ada10727ad94b4d80dbee0ce4964e75cb82e9fd5 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano9c14bda2020-09-18 16:33:36 -040016#include "EmulatedIntrinsics.hpp"
17#include "OptimalIntrinsics.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050018#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000019#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050020#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040021
Nicolas Capens1a3ce872018-10-10 10:42:36 -040022#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000023#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040024
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040026#include "src/IceCfgNode.h"
27#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceELFStreamer.h"
29#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040030#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000031#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040032
Ben Clayton713b8d32019-12-17 20:37:56 +000033#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040034#include "llvm/Support/FileSystem.h"
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -050035#include "llvm/Support/ManagedStatic.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040036#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040037
Antonio Maiorano8bce0672020-02-28 13:13:45 -050038#include "marl/event.h"
39
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000041# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040042#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040043
Nicolas Capensbd65da92017-01-05 16:31:06 -050044#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000045# ifndef WIN32_LEAN_AND_MEAN
46# define WIN32_LEAN_AND_MEAN
47# endif // !WIN32_LEAN_AND_MEAN
48# ifndef NOMINMAX
49# define NOMINMAX
50# endif // !NOMINMAX
51# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050052#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040053
Ben Clayton683bad82020-02-10 23:57:09 +000054#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040055#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000056#include <limits>
57#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040058
Antonio Maiorano02a39532020-01-21 15:15:34 -050059// Subzero utility functions
60// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050062namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050063
64Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
65{
66 uint32_t sequenceNumber = 0;
Nicolas Capensff010f92021-02-01 12:22:53 -050067 auto *function = Ice::Cfg::create(context, sequenceNumber).release();
68
69 function->setStackSizeLimit(512 * 1024); // 512 KiB
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050070
71 Ice::CfgLocalAllocatorScope allocScope{ function };
72
73 for(auto type : paramTypes)
74 {
75 Ice::Variable *arg = function->makeVariable(type);
76 function->addArg(arg);
77 }
78
79 Ice::CfgNode *node = function->makeNode();
80 function->setEntryNode(node);
81
82 return function;
83}
84
85Ice::Type getPointerType(Ice::Type elementType)
86{
87 if(sizeof(void *) == 8)
88 {
89 return Ice::IceType_i64;
90 }
91 else
92 {
93 return Ice::IceType_i32;
94 }
95}
96
97Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
98{
99 int typeSize = Ice::typeWidthInBytes(type);
100 int totalSize = typeSize * (arraySize ? arraySize : 1);
101
102 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
103 auto address = function->makeVariable(getPointerType(type));
104 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
105 function->getEntryNode()->getInsts().push_front(alloca);
106
107 return address;
108}
109
110Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500111{
112 if(sizeof(void *) == 8)
113 {
114 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
115 }
116 else
117 {
118 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
119 }
120}
121
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400122// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
123Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500124
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400125// Wrapper for calls on C functions with Ice types
126Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
127{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500128 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400129
130 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
131 // then truncate result to bool.
132 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
133 const bool returningBool = (retTy == Ice::IceType_i1);
134 if(returningBool)
135 {
136 ret = function->makeVariable(Ice::IceType_i32);
137 }
138 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500139 {
140 ret = function->makeVariable(retTy);
141 }
142
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400143 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500144 for(auto arg : iceArgs)
145 {
146 call->addArg(arg);
147 }
148
149 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400150
151 if(returningBool)
152 {
153 // Truncate result to bool so that if any (lsb) bits were set, result will be true
154 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
155 }
156
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500157 return ret;
158}
159
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400160Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
161{
162 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
163 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
164}
165
Antonio Maiorano62427e02020-02-13 09:18:05 -0500166// Wrapper for calls on C functions with Ice types
167template<typename Return, typename... CArgs, typename... RArgs>
168Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
169{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400170 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
171
Nicolas Capens519cf222020-05-08 15:27:19 -0400172 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500173 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500174 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500175}
176
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400177Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
178{
179 Ice::Variable *to = function->makeVariable(toType);
180 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
181 basicBlock->appendInst(cast);
182 return to;
183}
184
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500185Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500186{
Antonio Maiorano02a39532020-01-21 15:15:34 -0500187 Ice::Variable *result = function->makeVariable(type);
188 auto load = Ice::InstLoad::create(function, result, ptr, align);
189 basicBlock->appendInst(load);
190
191 return result;
192}
193
194} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500195} // namespace
196
Ben Clayton713b8d32019-12-17 20:37:56 +0000197namespace rr {
198class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500199class CoroutineGenerator;
200} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500201
202namespace {
203
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -0500204// Used to automatically invoke llvm_shutdown() when driver is unloaded
205llvm::llvm_shutdown_obj llvmShutdownObj;
206
Nicolas Capens157ba262019-12-10 17:49:14 -0500207// Default configuration settings. Must be accessed under mutex lock.
208std::mutex defaultConfigLock;
209rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000210{
Nicolas Capens157ba262019-12-10 17:49:14 -0500211 // This uses a static in a function to avoid the cost of a global static
212 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
213 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000214 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500215 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000216}
217
Nicolas Capens157ba262019-12-10 17:49:14 -0500218Ice::GlobalContext *context = nullptr;
219Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400220Ice::CfgNode *entryBlock = nullptr;
221Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500222Ice::CfgNode *basicBlock = nullptr;
223Ice::CfgLocalAllocatorScope *allocator = nullptr;
224rr::ELFMemoryStreamer *routine = nullptr;
225
226std::mutex codegenMutex;
227
228Ice::ELFFileStreamer *elfFile = nullptr;
229Ice::Fdstream *out = nullptr;
230
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500231// Coroutine globals
232rr::Type *coroYieldType = nullptr;
233std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500234marl::Scheduler &getOrCreateScheduler()
235{
236 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100237 marl::Scheduler::Config cfg;
238 cfg.setWorkerThreadCount(8);
239 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500240 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500241
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500242 return *scheduler;
243}
Nicolas Capens54313fb2021-02-19 14:26:27 -0500244
245rr::Nucleus::OptimizerCallback *optimizerCallback = nullptr;
246
Nicolas Capens157ba262019-12-10 17:49:14 -0500247} // Anonymous namespace
248
249namespace {
250
251#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000252# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500253#endif
254
Ben Clayton713b8d32019-12-17 20:37:56 +0000255#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
256# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500257#endif
258
Antonio Maiorano370cba52019-12-31 11:36:07 -0500259Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400260{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500261 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100262 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500263 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000264 case rr::Optimization::Level::None: return Ice::Opt_m1;
265 case rr::Optimization::Level::Less: return Ice::Opt_m1;
266 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500267 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
268 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100269 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500270 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400271}
272
Antonio Maiorano370cba52019-12-31 11:36:07 -0500273Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
274{
275 switch(memoryOrder)
276 {
277 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
278 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
279 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
280 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
281 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
282 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
283 }
284 return Ice::Intrinsics::MemoryOrderInvalid;
285}
286
Nicolas Capens157ba262019-12-10 17:49:14 -0500287class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500288{
Nicolas Capens157ba262019-12-10 17:49:14 -0500289public:
290 const static bool ARM;
291 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400292
Nicolas Capens157ba262019-12-10 17:49:14 -0500293private:
294 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100295 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000296#if defined(__i386__) || defined(__x86_64__)
297# if defined(_WIN32)
298 __cpuid(registers, info);
299# else
300 __asm volatile("cpuid"
301 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
302 : "a"(info));
303# endif
304#else
305 registers[0] = 0;
306 registers[1] = 0;
307 registers[2] = 0;
308 registers[3] = 0;
309#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100310 }
311
Nicolas Capens157ba262019-12-10 17:49:14 -0500312 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500313 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000314#if defined(__arm__) || defined(__aarch64__)
315 return true;
316#elif defined(__i386__) || defined(__x86_64__)
317 return false;
318#elif defined(__mips__)
319 return false;
320#else
321# error "Unknown architecture"
322#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500323 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500324
Nicolas Capens157ba262019-12-10 17:49:14 -0500325 static bool detectSSE4_1()
326 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000327#if defined(__i386__) || defined(__x86_64__)
328 int registers[4];
329 cpuid(registers, 1);
330 return (registers[2] & 0x00080000) != 0;
331#else
332 return false;
333#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500334 }
335};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500336
Nicolas Capens157ba262019-12-10 17:49:14 -0500337const bool CPUID::ARM = CPUID::detectARM();
338const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
339const bool emulateIntrinsics = false;
340const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400341
Nicolas Capens157ba262019-12-10 17:49:14 -0500342constexpr bool subzeroDumpEnabled = false;
343constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500344
345#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500346static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
347static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500348#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500349
350} // anonymous namespace
351
352namespace rr {
353
Antonio Maioranoab210f92019-12-13 16:26:24 -0500354std::string BackendName()
355{
356 return "Subzero";
357}
358
Ben Clayton713b8d32019-12-17 20:37:56 +0000359const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500360 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500361};
362
363enum EmulatedType
364{
365 EmulatedShift = 16,
366 EmulatedV2 = 2 << EmulatedShift,
367 EmulatedV4 = 4 << EmulatedShift,
368 EmulatedV8 = 8 << EmulatedShift,
369 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
370
371 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
372 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
373 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000374 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
375 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500376 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
377};
378
Ben Clayton713b8d32019-12-17 20:37:56 +0000379class Value : public Ice::Operand
380{};
381class SwitchCases : public Ice::InstSwitch
382{};
383class BasicBlock : public Ice::CfgNode
384{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500385
386Ice::Type T(Type *t)
387{
388 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
389 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500390}
391
Nicolas Capens157ba262019-12-10 17:49:14 -0500392Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400393{
Ben Clayton713b8d32019-12-17 20:37:56 +0000394 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500395}
396
397Type *T(EmulatedType t)
398{
Ben Clayton713b8d32019-12-17 20:37:56 +0000399 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500400}
401
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500402std::vector<Ice::Type> T(const std::vector<Type *> &types)
403{
404 std::vector<Ice::Type> result;
405 result.reserve(types.size());
406 for(auto &t : types)
407 {
408 result.push_back(T(t));
409 }
410 return result;
411}
412
Nicolas Capens157ba262019-12-10 17:49:14 -0500413Value *V(Ice::Operand *v)
414{
Ben Clayton713b8d32019-12-17 20:37:56 +0000415 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500416}
417
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500418Ice::Operand *V(Value *v)
419{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500420 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500421}
422
Antonio Maiorano62427e02020-02-13 09:18:05 -0500423std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
424{
425 std::vector<Ice::Operand *> result;
426 result.reserve(values.size());
427 for(auto &v : values)
428 {
429 result.push_back(V(v));
430 }
431 return result;
432}
433
Nicolas Capens157ba262019-12-10 17:49:14 -0500434BasicBlock *B(Ice::CfgNode *b)
435{
Ben Clayton713b8d32019-12-17 20:37:56 +0000436 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500437}
438
439static size_t typeSize(Type *type)
440{
441 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400442 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500443 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800444 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000445 case Type_v2i32: return 8;
446 case Type_v4i16: return 8;
447 case Type_v2i16: return 4;
448 case Type_v8i8: return 8;
449 case Type_v4i8: return 4;
450 case Type_v2f32: return 8;
451 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500452 }
453 }
454
455 return Ice::typeWidthInBytes(T(type));
456}
457
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400458static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500459{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400460 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500461 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
462 {
463 Nucleus::createRetVoid();
464 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400465
466 // Connect the entry block to the top of the initial basic block
467 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
468 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500469}
470
Ben Clayton713b8d32019-12-17 20:37:56 +0000471using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
472using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500473
474inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
475{
Ben Clayton713b8d32019-12-17 20:37:56 +0000476 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500477}
478
479inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
480{
481 return &sectionHeader(elfHeader)[index];
482}
483
484static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
485{
486 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
487
488 uint32_t index = relocation.getSymbol();
489 int table = relocationTable.sh_link;
490 void *symbolValue = nullptr;
491
492 if(index != SHN_UNDEF)
493 {
494 if(table == SHN_UNDEF) return nullptr;
495 const SectionHeader *symbolTable = elfSection(elfHeader, table);
496
497 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
498 if(index >= symtab_entries)
499 {
500 ASSERT(index < symtab_entries && "Symbol Index out of range");
501 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800502 }
503
Nicolas Capens157ba262019-12-10 17:49:14 -0500504 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000505 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500506 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800507
Nicolas Capens157ba262019-12-10 17:49:14 -0500508 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400509 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500510 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000511 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400512 }
513 else
514 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500515 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400516 }
Nicolas Capens66478362016-10-13 15:36:36 -0400517 }
518
Nicolas Capens157ba262019-12-10 17:49:14 -0500519 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000520 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500521
522 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400523 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400524 switch(relocation.getType())
525 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000526 case R_ARM_NONE:
527 // No relocation
528 break;
529 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500530 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000531 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500532 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
533 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
534 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400535 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000536 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500537 {
538 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
539 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
540 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400541 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000542 default:
543 ASSERT(false && "Unsupported relocation type");
544 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400545 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500546 }
547 else
548 {
549 switch(relocation.getType())
550 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000551 case R_386_NONE:
552 // No relocation
553 break;
554 case R_386_32:
555 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
556 break;
557 case R_386_PC32:
558 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
559 break;
560 default:
561 ASSERT(false && "Unsupported relocation type");
562 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500563 }
Nicolas Capens66478362016-10-13 15:36:36 -0400564 }
565
Nicolas Capens157ba262019-12-10 17:49:14 -0500566 return symbolValue;
567}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400568
Nicolas Capens157ba262019-12-10 17:49:14 -0500569static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
570{
571 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
572
573 uint32_t index = relocation.getSymbol();
574 int table = relocationTable.sh_link;
575 void *symbolValue = nullptr;
576
577 if(index != SHN_UNDEF)
578 {
579 if(table == SHN_UNDEF) return nullptr;
580 const SectionHeader *symbolTable = elfSection(elfHeader, table);
581
582 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
583 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400584 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500585 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400586 return nullptr;
587 }
588
Nicolas Capens157ba262019-12-10 17:49:14 -0500589 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000590 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500591 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400592
Nicolas Capens157ba262019-12-10 17:49:14 -0500593 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400594 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500595 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000596 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 }
598 else
599 {
600 return nullptr;
601 }
602 }
Nicolas Capens66478362016-10-13 15:36:36 -0400603
Nicolas Capens157ba262019-12-10 17:49:14 -0500604 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000605 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
606 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400607
Nicolas Capens157ba262019-12-10 17:49:14 -0500608 switch(relocation.getType())
609 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000610 case R_X86_64_NONE:
611 // No relocation
612 break;
613 case R_X86_64_64:
614 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
615 break;
616 case R_X86_64_PC32:
617 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
618 break;
619 case R_X86_64_32S:
620 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
621 break;
622 default:
623 ASSERT(false && "Unsupported relocation type");
624 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500625 }
626
627 return symbolValue;
628}
629
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400630struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500631{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400632 const void *entry;
633 size_t codeSize = 0;
634};
635
636std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
637{
638 ASSERT(functionNames.size() > 0);
639 std::vector<EntryPoint> entryPoints(functionNames.size());
640
Ben Clayton713b8d32019-12-17 20:37:56 +0000641 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500642
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400643 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500644 if(!elfHeader->checkMagic())
645 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400646 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500647 }
648
649 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000650 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
651#if defined(__i386__)
652 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
653#elif defined(__x86_64__)
654 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
655#elif defined(__arm__)
656 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
657#elif defined(__aarch64__)
658 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
659#elif defined(__mips__)
660 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
661#else
662# error "Unsupported platform"
663#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500664
Ben Clayton713b8d32019-12-17 20:37:56 +0000665 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500666
667 for(int i = 0; i < elfHeader->e_shnum; i++)
668 {
669 if(sectionHeader[i].sh_type == SHT_PROGBITS)
670 {
671 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
672 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400673 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500674 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400675 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500676
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400677 for(size_t j = 0; j < functionNames.size(); ++j)
678 {
679 if(strstr(sectionName, functionNames[j]) != nullptr)
680 {
681 return j;
682 }
683 }
684
685 UNREACHABLE("Failed to find executable section that matches input function names");
686 return static_cast<size_t>(-1);
687 };
688
689 size_t index = findSectionNameEntryIndex();
690 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
691 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400692 }
693 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500694 else if(sectionHeader[i].sh_type == SHT_REL)
695 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000696 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400697
Nicolas Capens157ba262019-12-10 17:49:14 -0500698 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
699 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000700 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500701 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
702 }
703 }
704 else if(sectionHeader[i].sh_type == SHT_RELA)
705 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000706 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500707
708 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
709 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000710 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500711 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
712 }
713 }
714 }
715
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400716 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500717}
718
719template<typename T>
720struct ExecutableAllocator
721{
722 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000723 template<class U>
724 ExecutableAllocator(const ExecutableAllocator<U> &other)
725 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500726
727 using value_type = T;
728 using size_type = std::size_t;
729
730 T *allocate(size_type n)
731 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000732 return (T *)allocateMemoryPages(
733 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500734 }
735
736 void deallocate(T *p, size_type n)
737 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800738 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500739 }
740};
741
742class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
743{
744 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
745 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
746
747public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000748 ELFMemoryStreamer()
749 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500750 {
751 position = 0;
752 buffer.reserve(0x1000);
753 }
754
755 ~ELFMemoryStreamer() override
756 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500757 }
758
759 void write8(uint8_t Value) override
760 {
761 if(position == (uint64_t)buffer.size())
762 {
763 buffer.push_back(Value);
764 position++;
765 }
766 else if(position < (uint64_t)buffer.size())
767 {
768 buffer[position] = Value;
769 position++;
770 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000771 else
772 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500773 }
774
775 void writeBytes(llvm::StringRef Bytes) override
776 {
777 std::size_t oldSize = buffer.size();
778 buffer.resize(oldSize + Bytes.size());
779 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
780 position += Bytes.size();
781 }
782
783 uint64_t tell() const override { return position; }
784
785 void seek(uint64_t Off) override { position = Off; }
786
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400787 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500788 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400789 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500790
791#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500792 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
793#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400794 for(auto &entryPoint : entryPoints)
795 {
796 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
797 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500798#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500799
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400800 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400801 }
802
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500803 void finalize()
804 {
805 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
806
807 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
808 }
809
Ben Clayton713b8d32019-12-17 20:37:56 +0000810 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400811 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500812 ASSERT(func);
813 funcs[index] = func;
814 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400815
Nicolas Capens157ba262019-12-10 17:49:14 -0500816 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400817 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500818 ASSERT(funcs[index]);
819 return funcs[index];
820 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400821
Antonio Maiorano02a39532020-01-21 15:15:34 -0500822 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500823 {
Nicolas Capens4e75f452021-01-28 01:52:56 -0500824 // Check if we already have a suitable constant.
825 for(const auto &c : constantsPool)
826 {
827 void *ptr = c.data.get();
828 size_t space = c.space;
829
830 void *alignedPtr = std::align(alignment, size, ptr, space);
831
832 if(space < size)
833 {
834 continue;
835 }
836
837 if(memcmp(data, alignedPtr, size) == 0)
838 {
839 return alignedPtr;
840 }
841 }
842
Antonio Maiorano02a39532020-01-21 15:15:34 -0500843 // TODO(b/148086935): Replace with a buffer allocator.
844 size_t space = size + alignment;
845 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
846 void *ptr = buf.get();
847 void *alignedPtr = std::align(alignment, size, ptr, space);
848 ASSERT(alignedPtr);
849 memcpy(alignedPtr, data, size);
Nicolas Capens4e75f452021-01-28 01:52:56 -0500850 constantsPool.emplace_back(std::move(buf), space);
851
Antonio Maiorano02a39532020-01-21 15:15:34 -0500852 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500853 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400854
Nicolas Capens157ba262019-12-10 17:49:14 -0500855private:
Nicolas Capens4e75f452021-01-28 01:52:56 -0500856 struct Constant
857 {
858 Constant(std::unique_ptr<uint8_t[]> data, size_t space)
859 : data(std::move(data))
860 , space(space)
861 {}
862
863 std::unique_ptr<uint8_t[]> data;
864 size_t space;
865 };
866
Ben Clayton713b8d32019-12-17 20:37:56 +0000867 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500868 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
869 std::size_t position;
Nicolas Capens4e75f452021-01-28 01:52:56 -0500870 std::vector<Constant> constantsPool;
Nicolas Capens157ba262019-12-10 17:49:14 -0500871};
872
Antonio Maiorano62427e02020-02-13 09:18:05 -0500873#ifdef ENABLE_RR_PRINT
874void VPrintf(const std::vector<Value *> &vals)
875{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400876 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500877}
878#endif // ENABLE_RR_PRINT
879
Nicolas Capens157ba262019-12-10 17:49:14 -0500880Nucleus::Nucleus()
881{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400882 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500883
884 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
885 Ice::ClFlags::getParsedClFlags(Flags);
886
Ben Clayton713b8d32019-12-17 20:37:56 +0000887#if defined(__arm__)
888 Flags.setTargetArch(Ice::Target_ARM32);
889 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
890#elif defined(__mips__)
891 Flags.setTargetArch(Ice::Target_MIPS32);
892 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
893#else // x86
894 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
895 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
896#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500897 Flags.setOutFileType(Ice::FT_Elf);
898 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
899 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
900 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
901 Flags.setDisableHybridAssembly(true);
902
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500903 // Emit functions into separate sections in the ELF so we can find them by name
904 Flags.setFunctionSections(true);
905
Nicolas Capens157ba262019-12-10 17:49:14 -0500906 static llvm::raw_os_ostream cout(std::cout);
907 static llvm::raw_os_ostream cerr(std::cerr);
908
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500909 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500910 {
911 // Decorate text asm with liveness info
912 Flags.setDecorateAsm(true);
913 }
914
Ben Clayton713b8d32019-12-17 20:37:56 +0000915 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500916 {
917 std::error_code errorCode;
918 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
919 ::elfFile = new Ice::ELFFileStreamer(*out);
920 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
921 }
922 else
923 {
924 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
925 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
926 ::routine = elfMemory;
927 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400928
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400929#if !__has_feature(memory_sanitizer)
930 // thread_local variables in shared libraries are initialized at load-time,
931 // but this is not observed by MemorySanitizer if the loader itself was not
932 // instrumented, leading to false-positive unitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400933 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400934#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500935 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500936}
937
938Nucleus::~Nucleus()
939{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400940 delete Variable::unmaterializedVariables;
941 Variable::unmaterializedVariables = nullptr;
942
Nicolas Capens157ba262019-12-10 17:49:14 -0500943 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500944 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500945
946 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500947 ::allocator = nullptr;
948
Nicolas Capens157ba262019-12-10 17:49:14 -0500949 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500950 ::function = nullptr;
951
Nicolas Capens157ba262019-12-10 17:49:14 -0500952 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500953 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500954
955 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500956 ::elfFile = nullptr;
957
Nicolas Capens157ba262019-12-10 17:49:14 -0500958 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500959 ::out = nullptr;
960
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400961 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500962 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400963 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500964
965 ::codegenMutex.unlock();
966}
967
968void Nucleus::setDefaultConfig(const Config &cfg)
969{
970 std::unique_lock<std::mutex> lock(::defaultConfigLock);
971 ::defaultConfig() = cfg;
972}
973
974void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
975{
976 std::unique_lock<std::mutex> lock(::defaultConfigLock);
977 auto &config = ::defaultConfig();
978 config = cfgEdit.apply(config);
979}
980
981Config Nucleus::getDefaultConfig()
982{
983 std::unique_lock<std::mutex> lock(::defaultConfigLock);
984 return ::defaultConfig();
985}
986
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500987// This function lowers and produces executable binary code in memory for the input functions,
988// and returns a Routine with the entry points to these functions.
989template<size_t Count>
990static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500991{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500992 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
993 // and GlobalContext::emitItems.
994
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500995 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500996 {
997 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500998 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500999 }
1000
1001 ::context->emitFileHeader();
1002
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001003 // Translate
1004
1005 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -05001006 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001007 Ice::Cfg *currFunc = functions[i];
1008
1009 // Install function allocator in TLS for Cfg-specific container allocators
1010 Ice::CfgLocalAllocatorScope allocScope(currFunc);
1011
1012 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
1013
Nicolas Capens54313fb2021-02-19 14:26:27 -05001014 if(::optimizerCallback)
1015 {
1016 Nucleus::OptimizerReport report;
1017 rr::optimize(currFunc, &report);
1018 ::optimizerCallback(&report);
1019 ::optimizerCallback = nullptr;
1020 }
1021 else
1022 {
1023 rr::optimize(currFunc);
1024 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001025
1026 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001027 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001028
1029 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001030 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001031
1032 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1033
1034 if(subzeroEmitTextAsm)
1035 {
1036 currFunc->emit();
1037 }
1038
1039 currFunc->emitIAS();
Nicolas Capensff010f92021-02-01 12:22:53 -05001040
1041 if(currFunc->hasError())
1042 {
1043 return nullptr;
1044 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001045 }
1046
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001047 // Emit items
1048
1049 ::context->lowerGlobals("");
1050
Nicolas Capens157ba262019-12-10 17:49:14 -05001051 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001052
1053 for(size_t i = 0; i < Count; ++i)
1054 {
1055 Ice::Cfg *currFunc = functions[i];
1056
1057 // Accumulate globals from functions to emit into the "last" section at the end
1058 auto globals = currFunc->getGlobalInits();
1059 if(globals && !globals->empty())
1060 {
1061 ::context->getGlobals()->merge(globals.get());
1062 }
1063
1064 auto assembler = currFunc->releaseAssembler();
1065 assembler->alignFunction();
1066 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1067 }
1068
Nicolas Capens157ba262019-12-10 17:49:14 -05001069 ::context->lowerGlobals("last");
1070 ::context->lowerConstants();
1071 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001072
Nicolas Capens157ba262019-12-10 17:49:14 -05001073 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001074 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001075 objectWriter->writeNonUserSections();
1076
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001077 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001078 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1079 ASSERT(entryPoints.size() == Count);
1080 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001081 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001082 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001083 }
1084
1085 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001086
1087 Routine *handoffRoutine = ::routine;
1088 ::routine = nullptr;
1089
1090 return std::shared_ptr<Routine>(handoffRoutine);
1091}
1092
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001093std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1094{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001095 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001096 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1097}
1098
Nicolas Capens157ba262019-12-10 17:49:14 -05001099Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1100{
1101 Ice::Type type = T(t);
1102 int typeSize = Ice::typeWidthInBytes(type);
1103 int totalSize = typeSize * (arraySize ? arraySize : 1);
1104
1105 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1106 auto address = ::function->makeVariable(T(getPointerType(t)));
1107 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1108 ::function->getEntryNode()->getInsts().push_front(alloca);
1109
1110 return V(address);
1111}
1112
1113BasicBlock *Nucleus::createBasicBlock()
1114{
1115 return B(::function->makeNode());
1116}
1117
1118BasicBlock *Nucleus::getInsertBlock()
1119{
1120 return B(::basicBlock);
1121}
1122
1123void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1124{
Ben Clayton713b8d32019-12-17 20:37:56 +00001125 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001126
1127 Variable::materializeAll();
1128
1129 ::basicBlock = basicBlock;
1130}
1131
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001132void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001133{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001134 ASSERT(::function == nullptr);
1135 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001136 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001137 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001138 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001139
1140 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1141
1142 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1143 // becomes invalid if another one is created; for example, when creating await and destroy functions
1144 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1145 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001146 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1147
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001148 ::entryBlock = ::function->getEntryNode();
1149 ::basicBlock = ::function->makeNode();
1150 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001151}
1152
1153Value *Nucleus::getArgument(unsigned int index)
1154{
1155 return V(::function->getArgs()[index]);
1156}
1157
1158void Nucleus::createRetVoid()
1159{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001160 RR_DEBUG_INFO_UPDATE_LOC();
1161
Nicolas Capens157ba262019-12-10 17:49:14 -05001162 // Code generated after this point is unreachable, so any variables
1163 // being read can safely return an undefined value. We have to avoid
1164 // materializing variables after the terminator ret instruction.
1165 Variable::killUnmaterialized();
1166
1167 Ice::InstRet *ret = Ice::InstRet::create(::function);
1168 ::basicBlock->appendInst(ret);
1169}
1170
1171void Nucleus::createRet(Value *v)
1172{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001173 RR_DEBUG_INFO_UPDATE_LOC();
1174
Nicolas Capens157ba262019-12-10 17:49:14 -05001175 // Code generated after this point is unreachable, so any variables
1176 // being read can safely return an undefined value. We have to avoid
1177 // materializing variables after the terminator ret instruction.
1178 Variable::killUnmaterialized();
1179
1180 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1181 ::basicBlock->appendInst(ret);
1182}
1183
1184void Nucleus::createBr(BasicBlock *dest)
1185{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001186 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001187 Variable::materializeAll();
1188
1189 auto br = Ice::InstBr::create(::function, dest);
1190 ::basicBlock->appendInst(br);
1191}
1192
1193void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1194{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001195 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001196 Variable::materializeAll();
1197
1198 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1199 ::basicBlock->appendInst(br);
1200}
1201
1202static bool isCommutative(Ice::InstArithmetic::OpKind op)
1203{
1204 switch(op)
1205 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001206 case Ice::InstArithmetic::Add:
1207 case Ice::InstArithmetic::Fadd:
1208 case Ice::InstArithmetic::Mul:
1209 case Ice::InstArithmetic::Fmul:
1210 case Ice::InstArithmetic::And:
1211 case Ice::InstArithmetic::Or:
1212 case Ice::InstArithmetic::Xor:
1213 return true;
1214 default:
1215 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001216 }
1217}
1218
1219static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1220{
1221 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1222
1223 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1224
1225 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1226 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1227 ::basicBlock->appendInst(arithmetic);
1228
1229 return V(result);
1230}
1231
1232Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1233{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001234 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001235 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1236}
1237
1238Value *Nucleus::createSub(Value *lhs, Value *rhs)
1239{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001240 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001241 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1242}
1243
1244Value *Nucleus::createMul(Value *lhs, Value *rhs)
1245{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001246 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001247 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1248}
1249
1250Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1251{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001252 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001253 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1254}
1255
1256Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1257{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001258 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001259 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1260}
1261
1262Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1263{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001264 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001265 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1266}
1267
1268Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001271 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1272}
1273
1274Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001277 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1278}
1279
1280Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001282 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001283 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1284}
1285
1286Value *Nucleus::createURem(Value *lhs, Value *rhs)
1287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001289 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1290}
1291
1292Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001295 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1296}
1297
1298Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001300 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001301 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1302 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001303 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001304 return nullptr;
1305}
1306
1307RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1308{
1309 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001310}
1311
1312Value *Nucleus::createShl(Value *lhs, Value *rhs)
1313{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001314 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001315 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1316}
1317
1318Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1319{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001320 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001321 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1322}
1323
1324Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1325{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001326 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001327 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1328}
1329
1330Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1331{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001332 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001333 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1334}
1335
1336Value *Nucleus::createOr(Value *lhs, Value *rhs)
1337{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001338 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001339 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1340}
1341
1342Value *Nucleus::createXor(Value *lhs, Value *rhs)
1343{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001344 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001345 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1346}
1347
1348Value *Nucleus::createNeg(Value *v)
1349{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001350 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001351 return createSub(createNullValue(T(v->getType())), v);
1352}
1353
1354Value *Nucleus::createFNeg(Value *v)
1355{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001356 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001357 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1358 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001359
1360 return createFSub(negativeZero, v);
1361}
1362
1363Value *Nucleus::createNot(Value *v)
1364{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001365 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001366 if(Ice::isScalarIntegerType(v->getType()))
1367 {
1368 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1369 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001370 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001371 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001372 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001373 return createXor(v, createConstantVector(c, T(v->getType())));
1374 }
1375}
1376
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001377static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1378{
1379#if defined(__i386__) || defined(__x86_64__)
1380 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1381 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1382 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1383#else
1384 if(atomic)
1385 {
1386 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1387 }
1388 if(memoryOrder != std::memory_order_relaxed)
1389 {
1390 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1391 }
1392#endif
1393
1394 // Vulkan doesn't allow sequential memory order
1395 ASSERT(memoryOrder != std::memory_order_seq_cst);
1396}
1397
Nicolas Capens157ba262019-12-10 17:49:14 -05001398Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1399{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001400 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001401 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001402
1403 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001404 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001405
Ben Clayton713b8d32019-12-17 20:37:56 +00001406 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001407 {
1408 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001409 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001410 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001411 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001412 auto pointer = RValue<Pointer<Byte>>(ptr);
1413 Int x = *Pointer<Int>(pointer);
1414
1415 Int4 vector;
1416 vector = Insert(vector, x, 0);
1417
Antonio Maiorano02a39532020-01-21 15:15:34 -05001418 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001419 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1420 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001421 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001422 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001423 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001424 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001425 auto pointer = RValue<Pointer<Byte>>(ptr);
1426 Int x = *Pointer<Int>(pointer);
1427 Int y = *Pointer<Int>(pointer + 4);
1428
1429 Int4 vector;
1430 vector = Insert(vector, x, 0);
1431 vector = Insert(vector, y, 1);
1432
Antonio Maiorano02a39532020-01-21 15:15:34 -05001433 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001434 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1435 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001436 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001437 else
1438 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001439 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001440 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001441 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001442 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Antonio Maiorano02a39532020-01-21 15:15:34 -05001443 result = ::function->makeVariable(T(type));
Nicolas Capens33a77f72021-02-08 15:04:38 -05001444 auto load = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001445 load->addArg(ptr);
1446 load->addArg(::context->getConstantInt32(typeSize(type)));
1447 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001448 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001449 }
1450 else
1451 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001452 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001453 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001454
Antonio Maiorano02a39532020-01-21 15:15:34 -05001455 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001456 return V(result);
1457}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001458
Nicolas Capens157ba262019-12-10 17:49:14 -05001459Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1460{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001461 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001462 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001463
Ben Clayton713b8d32019-12-17 20:37:56 +00001464#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001465 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001466 if(align != 0)
1467 {
1468 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1469 call->addArg(ptr);
1470 call->addArg(::context->getConstantInt64(typeSize(type)));
1471 ::basicBlock->appendInst(call);
1472 }
1473#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001474
Nicolas Capens157ba262019-12-10 17:49:14 -05001475 int valueType = (int)reinterpret_cast<intptr_t>(type);
1476
Ben Clayton713b8d32019-12-17 20:37:56 +00001477 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001478 {
1479 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001480 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001481 if(typeSize(type) == 4)
1482 {
1483 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1484 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1485 ::basicBlock->appendInst(bitcast);
1486
1487 RValue<Int4> v(V(vector));
1488
1489 auto pointer = RValue<Pointer<Byte>>(ptr);
1490 Int x = Extract(v, 0);
1491 *Pointer<Int>(pointer) = x;
1492 }
1493 else if(typeSize(type) == 8)
1494 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001495 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001496 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1497 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1498 ::basicBlock->appendInst(bitcast);
1499
1500 RValue<Int4> v(V(vector));
1501
1502 auto pointer = RValue<Pointer<Byte>>(ptr);
1503 Int x = Extract(v, 0);
1504 *Pointer<Int>(pointer) = x;
1505 Int y = Extract(v, 1);
1506 *Pointer<Int>(pointer + 4) = y;
1507 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001508 else
1509 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001510 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001511 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001512 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001513 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001514 auto store = Ice::InstIntrinsic::create(::function, 3, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001515 store->addArg(value);
1516 store->addArg(ptr);
1517 store->addArg(::context->getConstantInt32(typeSize(type)));
1518 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001519 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001520 }
1521 else
1522 {
1523 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001524
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001525 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001526 ::basicBlock->appendInst(store);
1527 }
1528
1529 return value;
1530}
1531
1532Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1533{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001534 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001535 ASSERT(index->getType() == Ice::IceType_i32);
1536
1537 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1538 {
1539 int32_t offset = constant->getValue() * (int)typeSize(type);
1540
1541 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001542 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001543 return ptr;
1544 }
1545
Nicolas Capens157ba262019-12-10 17:49:14 -05001546 return createAdd(ptr, createConstantInt(offset));
1547 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001548
Nicolas Capens157ba262019-12-10 17:49:14 -05001549 if(!Ice::isByteSizedType(T(type)))
1550 {
1551 index = createMul(index, createConstantInt((int)typeSize(type)));
1552 }
1553
Ben Clayton713b8d32019-12-17 20:37:56 +00001554 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001555 {
1556 if(unsignedIndex)
1557 {
1558 index = createZExt(index, T(Ice::IceType_i64));
1559 }
1560 else
1561 {
1562 index = createSExt(index, T(Ice::IceType_i64));
1563 }
1564 }
1565
1566 return createAdd(ptr, index);
1567}
1568
Antonio Maiorano370cba52019-12-31 11:36:07 -05001569static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1570{
1571 Ice::Variable *result = ::function->makeVariable(value->getType());
1572
1573 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001574 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001575 auto op = ::context->getConstantInt32(rmwOp);
1576 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1577 inst->addArg(op);
1578 inst->addArg(ptr);
1579 inst->addArg(value);
1580 inst->addArg(order);
1581 ::basicBlock->appendInst(inst);
1582
1583 return V(result);
1584}
1585
Nicolas Capens157ba262019-12-10 17:49:14 -05001586Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1587{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001588 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001589 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001590}
1591
1592Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1593{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001594 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001595 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001596}
1597
1598Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1599{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001600 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001601 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001602}
1603
1604Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1605{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001606 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001607 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001608}
1609
1610Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1611{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001612 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001613 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001614}
1615
1616Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1617{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001618 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001619 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001620}
1621
1622Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1623{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001624 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001625 Ice::Variable *result = ::function->makeVariable(value->getType());
1626
1627 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001628 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001629 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1630 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1631 inst->addArg(ptr);
1632 inst->addArg(compare);
1633 inst->addArg(value);
1634 inst->addArg(orderEq);
1635 inst->addArg(orderNeq);
1636 ::basicBlock->appendInst(inst);
1637
1638 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001639}
1640
1641static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1642{
1643 if(v->getType() == T(destType))
1644 {
1645 return v;
1646 }
1647
1648 Ice::Variable *result = ::function->makeVariable(T(destType));
1649 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1650 ::basicBlock->appendInst(cast);
1651
1652 return V(result);
1653}
1654
1655Value *Nucleus::createTrunc(Value *v, Type *destType)
1656{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001657 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001658 return createCast(Ice::InstCast::Trunc, v, destType);
1659}
1660
1661Value *Nucleus::createZExt(Value *v, Type *destType)
1662{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001663 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001664 return createCast(Ice::InstCast::Zext, v, destType);
1665}
1666
1667Value *Nucleus::createSExt(Value *v, Type *destType)
1668{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001669 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001670 return createCast(Ice::InstCast::Sext, v, destType);
1671}
1672
1673Value *Nucleus::createFPToUI(Value *v, Type *destType)
1674{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001675 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001676 return createCast(Ice::InstCast::Fptoui, v, destType);
1677}
1678
1679Value *Nucleus::createFPToSI(Value *v, Type *destType)
1680{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001681 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001682 return createCast(Ice::InstCast::Fptosi, v, destType);
1683}
1684
1685Value *Nucleus::createSIToFP(Value *v, Type *destType)
1686{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001687 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001688 return createCast(Ice::InstCast::Sitofp, v, destType);
1689}
1690
1691Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1692{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001693 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001694 return createCast(Ice::InstCast::Fptrunc, v, destType);
1695}
1696
1697Value *Nucleus::createFPExt(Value *v, Type *destType)
1698{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001699 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001700 return createCast(Ice::InstCast::Fpext, v, destType);
1701}
1702
1703Value *Nucleus::createBitCast(Value *v, Type *destType)
1704{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001705 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001706 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1707 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1708 // emulate them by writing to the stack and reading back as the destination type.
1709 if(emulateMismatchedBitCast)
1710 {
1711 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1712 {
1713 Value *address = allocateStackVariable(destType);
1714 createStore(v, address, T(v->getType()));
1715 return createLoad(address, destType);
1716 }
1717 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1718 {
1719 Value *address = allocateStackVariable(T(v->getType()));
1720 createStore(v, address, T(v->getType()));
1721 return createLoad(address, destType);
1722 }
1723 }
1724
1725 return createCast(Ice::InstCast::Bitcast, v, destType);
1726}
1727
1728static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1729{
1730 ASSERT(lhs->getType() == rhs->getType());
1731
1732 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1733 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1734 ::basicBlock->appendInst(cmp);
1735
1736 return V(result);
1737}
1738
Nicolas Capens157ba262019-12-10 17:49:14 -05001739Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1740{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001741 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001742 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1743}
1744
1745Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1746{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001747 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001748 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1749}
1750
1751Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1752{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001753 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001754 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1755}
1756
1757Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1758{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001759 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001760 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1761}
1762
1763Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1764{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001765 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001766 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1767}
1768
1769Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1770{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001771 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001772 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1773}
1774
1775Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1776{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001777 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001778 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1779}
1780
1781Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1782{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001783 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001784 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1785}
1786
1787Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1788{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001789 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001790 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1791}
1792
1793Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1794{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001795 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001796 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1797}
1798
1799static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1800{
1801 ASSERT(lhs->getType() == rhs->getType());
1802 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1803
1804 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1805 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1806 ::basicBlock->appendInst(cmp);
1807
1808 return V(result);
1809}
1810
1811Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1812{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001813 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001814 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1815}
1816
1817Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1818{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001819 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001820 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1821}
1822
1823Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1824{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001825 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001826 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1827}
1828
1829Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1830{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001831 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001832 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1833}
1834
1835Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1836{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001837 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001838 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1839}
1840
1841Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1842{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001843 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001844 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1845}
1846
1847Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1848{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001849 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001850 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1851}
1852
1853Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1854{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001855 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001856 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1857}
1858
1859Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1860{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001861 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001862 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1863}
1864
1865Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1866{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001867 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001868 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1869}
1870
1871Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1872{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001873 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001874 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1875}
1876
1877Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1878{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001879 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001880 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1881}
1882
1883Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1884{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001885 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001886 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1887}
1888
1889Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1890{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001891 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001892 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1893}
1894
1895Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1896{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001897 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001898 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001899 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001900 ::basicBlock->appendInst(extract);
1901
1902 return V(result);
1903}
1904
1905Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1906{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001907 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001908 auto result = ::function->makeVariable(vector->getType());
1909 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1910 ::basicBlock->appendInst(insert);
1911
1912 return V(result);
1913}
1914
1915Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1916{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001917 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001918 ASSERT(V1->getType() == V2->getType());
1919
1920 int size = Ice::typeNumElements(V1->getType());
1921 auto result = ::function->makeVariable(V1->getType());
1922 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1923
1924 for(int i = 0; i < size; i++)
1925 {
1926 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1927 }
1928
1929 ::basicBlock->appendInst(shuffle);
1930
1931 return V(result);
1932}
1933
1934Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1935{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001936 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001937 ASSERT(ifTrue->getType() == ifFalse->getType());
1938
1939 auto result = ::function->makeVariable(ifTrue->getType());
1940 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1941 ::basicBlock->appendInst(select);
1942
1943 return V(result);
1944}
1945
1946SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1947{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001948 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001949 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1950 ::basicBlock->appendInst(switchInst);
1951
Ben Clayton713b8d32019-12-17 20:37:56 +00001952 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001953}
1954
1955void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1956{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001957 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001958 switchCases->addBranch(label, label, branch);
1959}
1960
1961void Nucleus::createUnreachable()
1962{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001963 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001964 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1965 ::basicBlock->appendInst(unreachable);
1966}
1967
Antonio Maiorano62427e02020-02-13 09:18:05 -05001968Type *Nucleus::getType(Value *value)
1969{
1970 return T(V(value)->getType());
1971}
1972
1973Type *Nucleus::getContainedType(Type *vectorType)
1974{
1975 Ice::Type vecTy = T(vectorType);
1976 switch(vecTy)
1977 {
1978 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1979 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1980 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1981 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1982 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1983 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1984 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1985 default:
1986 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1987 return {};
1988 }
1989}
1990
Nicolas Capens157ba262019-12-10 17:49:14 -05001991Type *Nucleus::getPointerType(Type *ElementType)
1992{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001993 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001994}
1995
Antonio Maiorano62427e02020-02-13 09:18:05 -05001996static constexpr Ice::Type getNaturalIntType()
1997{
1998 constexpr size_t intSize = sizeof(int);
1999 static_assert(intSize == 4 || intSize == 8, "");
2000 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
2001}
2002
2003Type *Nucleus::getPrintfStorageType(Type *valueType)
2004{
2005 Ice::Type valueTy = T(valueType);
2006 switch(valueTy)
2007 {
2008 case Ice::IceType_i32:
2009 return T(getNaturalIntType());
2010
2011 case Ice::IceType_f32:
2012 return T(Ice::IceType_f64);
2013
2014 default:
2015 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
2016 return {};
2017 }
2018}
2019
Nicolas Capens157ba262019-12-10 17:49:14 -05002020Value *Nucleus::createNullValue(Type *Ty)
2021{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002022 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002023 if(Ice::isVectorType(T(Ty)))
2024 {
2025 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002026 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002027 return createConstantVector(c, Ty);
2028 }
2029 else
2030 {
2031 return V(::context->getConstantZero(T(Ty)));
2032 }
2033}
2034
2035Value *Nucleus::createConstantLong(int64_t i)
2036{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002037 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002038 return V(::context->getConstantInt64(i));
2039}
2040
2041Value *Nucleus::createConstantInt(int i)
2042{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002043 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002044 return V(::context->getConstantInt32(i));
2045}
2046
2047Value *Nucleus::createConstantInt(unsigned int i)
2048{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002049 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002050 return V(::context->getConstantInt32(i));
2051}
2052
2053Value *Nucleus::createConstantBool(bool b)
2054{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002055 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002056 return V(::context->getConstantInt1(b));
2057}
2058
2059Value *Nucleus::createConstantByte(signed char i)
2060{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002061 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002062 return V(::context->getConstantInt8(i));
2063}
2064
2065Value *Nucleus::createConstantByte(unsigned char i)
2066{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002067 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002068 return V(::context->getConstantInt8(i));
2069}
2070
2071Value *Nucleus::createConstantShort(short i)
2072{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002073 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002074 return V(::context->getConstantInt16(i));
2075}
2076
2077Value *Nucleus::createConstantShort(unsigned short i)
2078{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002079 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002080 return V(::context->getConstantInt16(i));
2081}
2082
2083Value *Nucleus::createConstantFloat(float x)
2084{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002085 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002086 return V(::context->getConstantFloat(x));
2087}
2088
2089Value *Nucleus::createNullPointer(Type *Ty)
2090{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002091 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002092 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002093}
2094
Antonio Maiorano02a39532020-01-21 15:15:34 -05002095static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2096{
2097 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2098}
2099
Nicolas Capens157ba262019-12-10 17:49:14 -05002100Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2101{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002102 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002103 const int vectorSize = 16;
2104 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2105 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002106
2107 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002108 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002109
Antonio Maioranoa0957112020-03-04 15:06:19 -05002110 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002111 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002112
2113 switch((int)reinterpret_cast<intptr_t>(type))
2114 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002115 case Ice::IceType_v4i32:
2116 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002117 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002118 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002119 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002120 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002121 }
2122 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002123 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002124 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002125 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002126 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002127 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002128 }
2129 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002130 case Ice::IceType_v8i16:
2131 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002132 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002133 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002134 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002135 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002136 }
2137 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002138 case Ice::IceType_v16i8:
2139 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002140 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002141 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002142 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002143 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002144 }
2145 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002146 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002147 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002148 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002149 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002150 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002151 }
2152 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002153 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002154 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002155 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002156 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002157 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002158 }
2159 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002160 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002161 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002162 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002163 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002164 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002165 }
2166 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002167 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002168 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002169 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002170 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002171 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002172 }
2173 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002174 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002175 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002176 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002177 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002178 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002179 }
2180 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002181 default:
2182 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002183 }
2184
Antonio Maiorano02a39532020-01-21 15:15:34 -05002185 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002186
Antonio Maiorano02a39532020-01-21 15:15:34 -05002187 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002188 return V(result);
2189}
2190
2191Value *Nucleus::createConstantVector(const double *constants, Type *type)
2192{
Ben Clayton713b8d32019-12-17 20:37:56 +00002193 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002194}
2195
Antonio Maiorano62427e02020-02-13 09:18:05 -05002196Value *Nucleus::createConstantString(const char *v)
2197{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002198 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002199 return V(IceConstantData(v, strlen(v) + 1));
2200}
2201
Nicolas Capens54313fb2021-02-19 14:26:27 -05002202void Nucleus::setOptimizerCallback(OptimizerCallback *callback)
2203{
2204 ::optimizerCallback = callback;
2205}
2206
Nicolas Capens519cf222020-05-08 15:27:19 -04002207Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002208{
2209 return T(Ice::IceType_void);
2210}
2211
Nicolas Capens519cf222020-05-08 15:27:19 -04002212Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002213{
2214 return T(Ice::IceType_i1);
2215}
2216
Nicolas Capens519cf222020-05-08 15:27:19 -04002217Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002218{
2219 return T(Ice::IceType_i8);
2220}
2221
Nicolas Capens519cf222020-05-08 15:27:19 -04002222Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002223{
2224 return T(Ice::IceType_i8);
2225}
2226
Nicolas Capens519cf222020-05-08 15:27:19 -04002227Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002228{
2229 return T(Ice::IceType_i16);
2230}
2231
Nicolas Capens519cf222020-05-08 15:27:19 -04002232Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002233{
2234 return T(Ice::IceType_i16);
2235}
2236
Nicolas Capens519cf222020-05-08 15:27:19 -04002237Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002238{
2239 return T(Type_v4i8);
2240}
2241
Nicolas Capens519cf222020-05-08 15:27:19 -04002242Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002243{
2244 return T(Type_v4i8);
2245}
2246
Ben Clayton713b8d32019-12-17 20:37:56 +00002247namespace {
2248RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002249{
Ben Clayton713b8d32019-12-17 20:37:56 +00002250 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002251}
2252
Ben Clayton713b8d32019-12-17 20:37:56 +00002253RValue<Byte> Extract(RValue<Byte8> val, int i)
2254{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002255 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002256}
2257
2258RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2259{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002260 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002261}
2262} // namespace
2263
Nicolas Capens157ba262019-12-10 17:49:14 -05002264RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2265{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002266 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002267 if(emulateIntrinsics)
2268 {
2269 Byte8 result;
2270 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2271 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2272 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2273 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2274 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2275 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2276 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2277 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2278
2279 return result;
2280 }
2281 else
2282 {
2283 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002284 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002285 auto paddusb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002286 paddusb->addArg(x.value());
2287 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002288 ::basicBlock->appendInst(paddusb);
2289
2290 return RValue<Byte8>(V(result));
2291 }
2292}
2293
2294RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2295{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002296 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002297 if(emulateIntrinsics)
2298 {
2299 Byte8 result;
2300 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2301 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2302 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2303 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2304 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2305 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2306 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2307 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2308
2309 return result;
2310 }
2311 else
2312 {
2313 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002314 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002315 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002316 psubusw->addArg(x.value());
2317 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002318 ::basicBlock->appendInst(psubusw);
2319
2320 return RValue<Byte8>(V(result));
2321 }
2322}
2323
2324RValue<SByte> Extract(RValue<SByte8> val, int i)
2325{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002326 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002327 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002328}
2329
2330RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2331{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002332 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002333 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002334}
2335
2336RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2337{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002338 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002339 if(emulateIntrinsics)
2340 {
2341 SByte8 result;
2342 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2343 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2344 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2345 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2346 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2347 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2348 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2349 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2350
2351 return result;
2352 }
2353 else
2354 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002355#if defined(__i386__) || defined(__x86_64__)
2356 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2357 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2358 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002359
Ben Clayton713b8d32019-12-17 20:37:56 +00002360 return As<SByte8>(hi | lo);
2361#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002362 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002363#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002364 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002365}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002366
Nicolas Capens157ba262019-12-10 17:49:14 -05002367RValue<Int> SignMask(RValue<Byte8> x)
2368{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002369 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002370 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002371 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002372 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2373 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002374 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002375 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002376 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002377 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002378 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002379 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002380 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002381 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002382
Nicolas Capens157ba262019-12-10 17:49:14 -05002383 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002384 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002385}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002386
2387// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2388// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002389// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002390// }
2391
Nicolas Capens157ba262019-12-10 17:49:14 -05002392RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2393{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002394 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002395 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002396}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002397
Nicolas Capens519cf222020-05-08 15:27:19 -04002398Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002399{
2400 return T(Type_v8i8);
2401}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002402
Nicolas Capens598f8d82016-09-26 15:09:10 -04002403// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2404// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002405// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002406// }
2407
2408// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2409// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002410// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002411// }
2412
Nicolas Capens157ba262019-12-10 17:49:14 -05002413RValue<SByte> SaturateSigned(RValue<Short> x)
2414{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002415 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002416 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2417}
2418
2419RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2420{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002421 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002422 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002423 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002424 SByte8 result;
2425 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2426 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2427 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2428 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2429 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2430 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2431 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2432 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002433
Nicolas Capens157ba262019-12-10 17:49:14 -05002434 return result;
2435 }
2436 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002437 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002438 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002439 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002440 auto paddsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002441 paddsb->addArg(x.value());
2442 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002443 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002444
Nicolas Capens157ba262019-12-10 17:49:14 -05002445 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002446 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002447}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002448
Nicolas Capens157ba262019-12-10 17:49:14 -05002449RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2450{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002451 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002452 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002453 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002454 SByte8 result;
2455 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2456 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2457 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2458 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2459 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2460 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2461 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2462 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002463
Nicolas Capens157ba262019-12-10 17:49:14 -05002464 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002465 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002466 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002467 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002468 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002469 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002470 auto psubsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002471 psubsb->addArg(x.value());
2472 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002473 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002474
Nicolas Capens157ba262019-12-10 17:49:14 -05002475 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002476 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002477}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002478
Nicolas Capens157ba262019-12-10 17:49:14 -05002479RValue<Int> SignMask(RValue<SByte8> x)
2480{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002481 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002482 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002483 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002484 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2485 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002486 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002487 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002488 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002489 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002490 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002491 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002492 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002493 ::basicBlock->appendInst(movmsk);
2494
2495 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002496 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002497}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002498
Nicolas Capens157ba262019-12-10 17:49:14 -05002499RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2500{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002501 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002502 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002503}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002504
Nicolas Capens157ba262019-12-10 17:49:14 -05002505RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2506{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002507 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002508 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002509}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002510
Nicolas Capens519cf222020-05-08 15:27:19 -04002511Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002512{
2513 return T(Type_v8i8);
2514}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002515
Nicolas Capens519cf222020-05-08 15:27:19 -04002516Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002517{
2518 return T(Ice::IceType_v16i8);
2519}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002520
Nicolas Capens519cf222020-05-08 15:27:19 -04002521Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002522{
2523 return T(Ice::IceType_v16i8);
2524}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002525
Nicolas Capens519cf222020-05-08 15:27:19 -04002526Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002527{
2528 return T(Type_v2i16);
2529}
Nicolas Capensd4227962016-11-09 14:24:25 -05002530
Nicolas Capens519cf222020-05-08 15:27:19 -04002531Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002532{
2533 return T(Type_v2i16);
2534}
Nicolas Capensd4227962016-11-09 14:24:25 -05002535
Nicolas Capens157ba262019-12-10 17:49:14 -05002536Short4::Short4(RValue<Int4> cast)
2537{
Ben Clayton713b8d32019-12-17 20:37:56 +00002538 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002539 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002540 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2541
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002542 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002543 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002544
2545 storeValue(short4);
2546}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002547
2548// Short4::Short4(RValue<Float> cast)
2549// {
2550// }
2551
Nicolas Capens157ba262019-12-10 17:49:14 -05002552Short4::Short4(RValue<Float4> cast)
2553{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002554 // TODO(b/150791192): Generalize and optimize
2555 auto smin = std::numeric_limits<short>::min();
2556 auto smax = std::numeric_limits<short>::max();
2557 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002558}
2559
2560RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2561{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002562 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002563 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002564 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002565 Short4 result;
2566 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2567 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2568 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2569 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002570
2571 return result;
2572 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002573 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002574 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002575 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002576 }
2577}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002578
Nicolas Capens157ba262019-12-10 17:49:14 -05002579RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2580{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002581 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002582 if(emulateIntrinsics)
2583 {
2584 Short4 result;
2585 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2586 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2587 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2588 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2589
2590 return result;
2591 }
2592 else
2593 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002594 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002595 }
2596}
2597
2598RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2599{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002600 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002601 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002602 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002603 ::basicBlock->appendInst(cmp);
2604
2605 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002606 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002607 ::basicBlock->appendInst(select);
2608
2609 return RValue<Short4>(V(result));
2610}
2611
2612RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2613{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002614 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002615 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002616 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002617 ::basicBlock->appendInst(cmp);
2618
2619 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002620 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002621 ::basicBlock->appendInst(select);
2622
2623 return RValue<Short4>(V(result));
2624}
2625
2626RValue<Short> SaturateSigned(RValue<Int> x)
2627{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002628 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002629 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2630}
2631
2632RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2633{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002634 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002635 if(emulateIntrinsics)
2636 {
2637 Short4 result;
2638 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2639 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2640 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2641 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2642
2643 return result;
2644 }
2645 else
2646 {
2647 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002648 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002649 auto paddsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002650 paddsw->addArg(x.value());
2651 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002652 ::basicBlock->appendInst(paddsw);
2653
2654 return RValue<Short4>(V(result));
2655 }
2656}
2657
2658RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2659{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002660 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002661 if(emulateIntrinsics)
2662 {
2663 Short4 result;
2664 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2665 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2666 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2667 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2668
2669 return result;
2670 }
2671 else
2672 {
2673 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002674 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002675 auto psubsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002676 psubsw->addArg(x.value());
2677 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002678 ::basicBlock->appendInst(psubsw);
2679
2680 return RValue<Short4>(V(result));
2681 }
2682}
2683
2684RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2685{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002686 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002687 if(emulateIntrinsics)
2688 {
2689 Short4 result;
2690 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2691 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2692 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2693 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2694
2695 return result;
2696 }
2697 else
2698 {
2699 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002700 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002701 auto pmulhw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002702 pmulhw->addArg(x.value());
2703 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002704 ::basicBlock->appendInst(pmulhw);
2705
2706 return RValue<Short4>(V(result));
2707 }
2708}
2709
2710RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2711{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002712 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002713 if(emulateIntrinsics)
2714 {
2715 Int2 result;
2716 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2717 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2718
2719 return result;
2720 }
2721 else
2722 {
2723 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002724 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002725 auto pmaddwd = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002726 pmaddwd->addArg(x.value());
2727 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002728 ::basicBlock->appendInst(pmaddwd);
2729
2730 return As<Int2>(V(result));
2731 }
2732}
2733
2734RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2735{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002736 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002737 if(emulateIntrinsics)
2738 {
2739 SByte8 result;
2740 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2741 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2742 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2743 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2744 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2745 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2746 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2747 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2748
2749 return result;
2750 }
2751 else
2752 {
2753 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002754 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002755 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002756 pack->addArg(x.value());
2757 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002758 ::basicBlock->appendInst(pack);
2759
2760 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2761 }
2762}
2763
2764RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2765{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002766 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002767 if(emulateIntrinsics)
2768 {
2769 Byte8 result;
2770 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2771 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2772 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2773 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2774 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2775 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2776 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2777 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2778
2779 return result;
2780 }
2781 else
2782 {
2783 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002784 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002785 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002786 pack->addArg(x.value());
2787 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002788 ::basicBlock->appendInst(pack);
2789
2790 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2791 }
2792}
2793
2794RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2795{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002796 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002797 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002798}
2799
2800RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2801{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002802 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002803 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002804}
2805
Nicolas Capens519cf222020-05-08 15:27:19 -04002806Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002807{
2808 return T(Type_v4i16);
2809}
2810
2811UShort4::UShort4(RValue<Float4> cast, bool saturate)
2812{
2813 if(saturate)
2814 {
2815 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002816 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002817 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2818 // PackUnsigned takes care of 0x0000 saturation.
2819 Int4 int4(Min(cast, Float4(0xFFFF)));
2820 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002821 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002822 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002823 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002824 // ARM saturates the 32-bit integer result on overflow/undeflow.
2825 Int4 int4(cast);
2826 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002827 }
2828 else
2829 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002830 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002831 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002832 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002833 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002834 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002835 *this = Short4(Int4(cast));
2836 }
2837}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002838
Nicolas Capens157ba262019-12-10 17:49:14 -05002839RValue<UShort> Extract(RValue<UShort4> val, int i)
2840{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002841 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002842}
2843
2844RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2845{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002846 return RValue<UShort4>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002847}
2848
2849RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002852 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002853
Nicolas Capens157ba262019-12-10 17:49:14 -05002854 {
2855 UShort4 result;
2856 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2857 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2858 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2859 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2860
2861 return result;
2862 }
2863 else
2864 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002865 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002866 }
2867}
2868
2869RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2870{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002871 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002872 if(emulateIntrinsics)
2873 {
2874 UShort4 result;
2875 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2876 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2877 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2878 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2879
2880 return result;
2881 }
2882 else
2883 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002884 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002885 }
2886}
2887
2888RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2889{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002890 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002891 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002892 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002893 ::basicBlock->appendInst(cmp);
2894
2895 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002896 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002897 ::basicBlock->appendInst(select);
2898
2899 return RValue<UShort4>(V(result));
2900}
2901
2902RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2903{
2904 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002905 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002906 ::basicBlock->appendInst(cmp);
2907
2908 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002909 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002910 ::basicBlock->appendInst(select);
2911
2912 return RValue<UShort4>(V(result));
2913}
2914
2915RValue<UShort> SaturateUnsigned(RValue<Int> x)
2916{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002917 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002918 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2919}
2920
2921RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2922{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002923 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002924 if(emulateIntrinsics)
2925 {
2926 UShort4 result;
2927 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2928 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2929 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2930 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2931
2932 return result;
2933 }
2934 else
2935 {
2936 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002937 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002938 auto paddusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002939 paddusw->addArg(x.value());
2940 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002941 ::basicBlock->appendInst(paddusw);
2942
2943 return RValue<UShort4>(V(result));
2944 }
2945}
2946
2947RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2948{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002949 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002950 if(emulateIntrinsics)
2951 {
2952 UShort4 result;
2953 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2954 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2955 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2956 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2957
2958 return result;
2959 }
2960 else
2961 {
2962 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002963 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002964 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002965 psubusw->addArg(x.value());
2966 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002967 ::basicBlock->appendInst(psubusw);
2968
2969 return RValue<UShort4>(V(result));
2970 }
2971}
2972
2973RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2974{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002975 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002976 if(emulateIntrinsics)
2977 {
2978 UShort4 result;
2979 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2980 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2981 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2982 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2983
2984 return result;
2985 }
2986 else
2987 {
2988 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002989 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002990 auto pmulhuw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002991 pmulhuw->addArg(x.value());
2992 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002993 ::basicBlock->appendInst(pmulhuw);
2994
2995 return RValue<UShort4>(V(result));
2996 }
2997}
2998
2999RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
3000{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003001 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003002 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3003
3004 // Scalarized implementation.
3005 Int4 result;
3006 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
3007 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
3008 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
3009 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
3010
3011 return result;
3012}
3013
3014RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3015{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003016 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003017 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3018
3019 if(false) // Partial product based implementation.
3020 {
3021 auto xh = x >> 16;
3022 auto yh = y >> 16;
3023 auto xl = x & UInt4(0x0000FFFF);
3024 auto yl = y & UInt4(0x0000FFFF);
3025 auto xlyh = xl * yh;
3026 auto xhyl = xh * yl;
3027 auto xlyhh = xlyh >> 16;
3028 auto xhylh = xhyl >> 16;
3029 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3030 auto xhyll = xhyl & UInt4(0x0000FFFF);
3031 auto xlylh = (xl * yl) >> 16;
3032 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3033
3034 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003035 }
3036
Nicolas Capens157ba262019-12-10 17:49:14 -05003037 // Scalarized implementation.
3038 Int4 result;
3039 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3040 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3041 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3042 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3043
3044 return As<UInt4>(result);
3045}
3046
3047RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3048{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003049 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003050 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003051 return UShort4(0);
3052}
3053
Nicolas Capens519cf222020-05-08 15:27:19 -04003054Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003055{
3056 return T(Type_v4i16);
3057}
3058
3059RValue<Short> Extract(RValue<Short8> val, int i)
3060{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003061 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003062 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003063}
3064
3065RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3066{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003067 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003068 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003069}
3070
3071RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3072{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003073 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003074 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003075 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003076 Short8 result;
3077 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3078 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3079 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3080 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3081 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3082 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3083 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3084 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003085
Nicolas Capens157ba262019-12-10 17:49:14 -05003086 return result;
3087 }
3088 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003089 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003090 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003091 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003092}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003093
Nicolas Capens157ba262019-12-10 17:49:14 -05003094RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3095{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003096 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003097 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003098 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003099 Short8 result;
3100 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3101 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3102 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3103 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3104 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3105 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3106 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3107 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003108
Nicolas Capens157ba262019-12-10 17:49:14 -05003109 return result;
3110 }
3111 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003112 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003113 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003114 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003115}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003116
Nicolas Capens157ba262019-12-10 17:49:14 -05003117RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3118{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003119 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003120 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003121 return Int4(0);
3122}
3123
3124RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3125{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003126 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003127 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003128 return Short8(0);
3129}
3130
Nicolas Capens519cf222020-05-08 15:27:19 -04003131Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003132{
3133 return T(Ice::IceType_v8i16);
3134}
3135
3136RValue<UShort> Extract(RValue<UShort8> val, int i)
3137{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003138 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003139 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003140}
3141
3142RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3143{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003144 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003145 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003146}
3147
3148RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3149{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003150 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003151 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003152 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003153 UShort8 result;
3154 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3155 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3156 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3157 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3158 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3159 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3160 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3161 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003162
Nicolas Capens157ba262019-12-10 17:49:14 -05003163 return result;
3164 }
3165 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003166 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003167 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003168 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003169}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003170
Nicolas Capens157ba262019-12-10 17:49:14 -05003171RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3172{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003173 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003174 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003175 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003176 UShort8 result;
3177 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3178 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3179 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3180 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3181 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3182 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3183 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3184 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003185
Nicolas Capens157ba262019-12-10 17:49:14 -05003186 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003187 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003188 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003189 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003190 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003191 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003192}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003193
Nicolas Capens157ba262019-12-10 17:49:14 -05003194RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3195{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003196 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003197 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003198 return UShort8(0);
3199}
3200
Nicolas Capens519cf222020-05-08 15:27:19 -04003201Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003202{
3203 return T(Ice::IceType_v8i16);
3204}
3205
Ben Clayton713b8d32019-12-17 20:37:56 +00003206RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003207{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003208 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003209 RValue<Int> res = val;
3210 val += 1;
3211 return res;
3212}
3213
Ben Clayton713b8d32019-12-17 20:37:56 +00003214const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003215{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003216 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003217 val += 1;
3218 return val;
3219}
3220
Ben Clayton713b8d32019-12-17 20:37:56 +00003221RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003222{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003223 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003224 RValue<Int> res = val;
3225 val -= 1;
3226 return res;
3227}
3228
Ben Clayton713b8d32019-12-17 20:37:56 +00003229const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003230{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003231 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003232 val -= 1;
3233 return val;
3234}
3235
3236RValue<Int> RoundInt(RValue<Float> cast)
3237{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003238 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003239 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003240 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003241 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3242 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003243 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003244 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003245 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003246 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003247 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003248 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003249 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003250 ::basicBlock->appendInst(nearbyint);
3251
3252 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003253 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003254}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003255
Nicolas Capens519cf222020-05-08 15:27:19 -04003256Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003257{
3258 return T(Ice::IceType_i32);
3259}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003260
Nicolas Capens519cf222020-05-08 15:27:19 -04003261Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003262{
3263 return T(Ice::IceType_i64);
3264}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003265
Nicolas Capens157ba262019-12-10 17:49:14 -05003266UInt::UInt(RValue<Float> cast)
3267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003268 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003269 // Smallest positive value representable in UInt, but not in Int
3270 const unsigned int ustart = 0x80000000u;
3271 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003272
Nicolas Capens157ba262019-12-10 17:49:14 -05003273 // If the value is negative, store 0, otherwise store the result of the conversion
3274 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003275 // Check if the value can be represented as an Int
3276 IfThenElse(cast >= ustartf,
3277 // If the value is too large, subtract ustart and re-add it after conversion.
3278 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3279 // Otherwise, just convert normally
3280 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003281 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003282}
Nicolas Capensa8086512016-11-07 17:32:17 -05003283
Ben Clayton713b8d32019-12-17 20:37:56 +00003284RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003285{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003286 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003287 RValue<UInt> res = val;
3288 val += 1;
3289 return res;
3290}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003291
Ben Clayton713b8d32019-12-17 20:37:56 +00003292const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003295 val += 1;
3296 return val;
3297}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003298
Ben Clayton713b8d32019-12-17 20:37:56 +00003299RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003300{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003301 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003302 RValue<UInt> res = val;
3303 val -= 1;
3304 return res;
3305}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003306
Ben Clayton713b8d32019-12-17 20:37:56 +00003307const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003308{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003309 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003310 val -= 1;
3311 return val;
3312}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003313
Nicolas Capens598f8d82016-09-26 15:09:10 -04003314// RValue<UInt> RoundUInt(RValue<Float> cast)
3315// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003316// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003317// }
3318
Nicolas Capens519cf222020-05-08 15:27:19 -04003319Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003320{
3321 return T(Ice::IceType_i32);
3322}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003323
3324// Int2::Int2(RValue<Int> cast)
3325// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003326// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003327// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003328//
3329// Constant *shuffle[2];
3330// shuffle[0] = Nucleus::createConstantInt(0);
3331// shuffle[1] = Nucleus::createConstantInt(0);
3332//
Nicolas Capens519cf222020-05-08 15:27:19 -04003333// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003334//
3335// storeValue(replicate);
3336// }
3337
Nicolas Capens157ba262019-12-10 17:49:14 -05003338RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3339{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003340 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003341 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003342 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003343 Int2 result;
3344 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3345 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003346
Nicolas Capens157ba262019-12-10 17:49:14 -05003347 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003348 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003349 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003350 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003351 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003352 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003353}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003354
Nicolas Capens157ba262019-12-10 17:49:14 -05003355RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3356{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003357 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003358 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003359 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003360 Int2 result;
3361 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3362 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3363
3364 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003365 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003366 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003367 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003368 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003369 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003370}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003371
Nicolas Capens519cf222020-05-08 15:27:19 -04003372Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003373{
3374 return T(Type_v2i32);
3375}
3376
3377RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3378{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003379 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003380 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003381 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003382 UInt2 result;
3383 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3384 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003385
Nicolas Capens157ba262019-12-10 17:49:14 -05003386 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003387 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003388 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003389 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003390 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003391 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003392}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003393
Nicolas Capens157ba262019-12-10 17:49:14 -05003394RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3395{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003396 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003397 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003398 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003399 UInt2 result;
3400 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3401 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003402
Nicolas Capens157ba262019-12-10 17:49:14 -05003403 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003404 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003405 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003406 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003407 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003408 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003409}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003410
Nicolas Capens519cf222020-05-08 15:27:19 -04003411Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003412{
3413 return T(Type_v2i32);
3414}
3415
Ben Clayton713b8d32019-12-17 20:37:56 +00003416Int4::Int4(RValue<Byte4> cast)
3417 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003418{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003419 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003420 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003421 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3422
3423 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003424 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003425 Value *b = Nucleus::createBitCast(a, Byte16::type());
3426 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003427
Ben Clayton713b8d32019-12-17 20:37:56 +00003428 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003429 Value *d = Nucleus::createBitCast(c, Short8::type());
3430 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003431
Nicolas Capens519cf222020-05-08 15:27:19 -04003432 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003433 storeValue(f);
3434}
3435
Ben Clayton713b8d32019-12-17 20:37:56 +00003436Int4::Int4(RValue<SByte4> cast)
3437 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003438{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003439 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003440 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003441 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3442
Ben Clayton713b8d32019-12-17 20:37:56 +00003443 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003444 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003445 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3446
Ben Clayton713b8d32019-12-17 20:37:56 +00003447 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003448 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003449 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3450
3451 *this = As<Int4>(e) >> 24;
3452}
3453
Ben Clayton713b8d32019-12-17 20:37:56 +00003454Int4::Int4(RValue<Short4> cast)
3455 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003456{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003457 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003458 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003459 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003460
3461 *this = As<Int4>(c) >> 16;
3462}
3463
Ben Clayton713b8d32019-12-17 20:37:56 +00003464Int4::Int4(RValue<UShort4> cast)
3465 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003466{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003467 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003468 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003469 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003470 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003471 storeValue(d);
3472}
3473
Ben Clayton713b8d32019-12-17 20:37:56 +00003474Int4::Int4(RValue<Int> rhs)
3475 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003476{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003477 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003478 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003479
Ben Clayton713b8d32019-12-17 20:37:56 +00003480 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003481 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3482
3483 storeValue(replicate);
3484}
3485
3486RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3487{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003488 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003489 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003490 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003491 Int4 result;
3492 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3493 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3494 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3495 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003496
Nicolas Capens157ba262019-12-10 17:49:14 -05003497 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003498 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003499 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003500 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003501 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003502 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003503}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003504
Nicolas Capens157ba262019-12-10 17:49:14 -05003505RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3506{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003507 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003508 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003509 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003510 Int4 result;
3511 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3512 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3513 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3514 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003515
Nicolas Capens157ba262019-12-10 17:49:14 -05003516 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003517 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003518 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003519 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003520 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003521 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003522}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003523
Nicolas Capens157ba262019-12-10 17:49:14 -05003524RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3525{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003526 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003527 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003528}
3529
3530RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3531{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003532 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003533 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003534}
3535
3536RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3537{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003538 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003539 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003540}
3541
3542RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3543{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003544 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003545 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003546}
3547
3548RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3549{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003550 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003551 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003552}
3553
3554RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3555{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003556 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003557 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003558}
3559
3560RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3561{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003562 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003563 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003564 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003565 ::basicBlock->appendInst(cmp);
3566
3567 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003568 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003569 ::basicBlock->appendInst(select);
3570
3571 return RValue<Int4>(V(result));
3572}
3573
3574RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3575{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003576 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003577 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003578 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003579 ::basicBlock->appendInst(cmp);
3580
3581 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003582 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003583 ::basicBlock->appendInst(select);
3584
3585 return RValue<Int4>(V(result));
3586}
3587
3588RValue<Int4> RoundInt(RValue<Float4> cast)
3589{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003590 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003591 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003592 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003593 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3594 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003595 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003596 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003597 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003598 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003599 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003600 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003601 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003602 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003603
3604 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003605 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003606}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003607
Nicolas Capenseeb81842021-01-12 17:44:40 -05003608RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3609{
3610 RR_DEBUG_INFO_UPDATE_LOC();
3611
3612 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3613 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3614 // saturate to 0x80000000.
3615 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3616
3617 if(emulateIntrinsics || CPUID::ARM)
3618 {
3619 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3620 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3621 }
3622 else
3623 {
3624 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3625 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003626 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capenseeb81842021-01-12 17:44:40 -05003627 nearbyint->addArg(clamped.value());
3628 ::basicBlock->appendInst(nearbyint);
3629
3630 return RValue<Int4>(V(result));
3631 }
3632}
3633
Nicolas Capens157ba262019-12-10 17:49:14 -05003634RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3635{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003636 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003637 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003638 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003639 Short8 result;
3640 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3641 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3642 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3643 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3644 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3645 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3646 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3647 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003648
Nicolas Capens157ba262019-12-10 17:49:14 -05003649 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003650 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003651 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003652 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003653 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003654 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003655 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003656 pack->addArg(x.value());
3657 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003658 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003659
Nicolas Capens157ba262019-12-10 17:49:14 -05003660 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003661 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003662}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003663
Nicolas Capens157ba262019-12-10 17:49:14 -05003664RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3665{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003666 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003667 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003668 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003669 RValue<Int4> sx = As<Int4>(x);
3670 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003671
Nicolas Capens157ba262019-12-10 17:49:14 -05003672 RValue<Int4> sy = As<Int4>(y);
3673 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003674
Nicolas Capens157ba262019-12-10 17:49:14 -05003675 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003676 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003677 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003678 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003679 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003680 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003681 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003682 pack->addArg(x.value());
3683 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003684 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003685
Nicolas Capens157ba262019-12-10 17:49:14 -05003686 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003687 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003688}
Nicolas Capens33438a62017-09-27 11:47:35 -04003689
Nicolas Capens157ba262019-12-10 17:49:14 -05003690RValue<Int> SignMask(RValue<Int4> x)
3691{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003692 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003693 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003694 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003695 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3696 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003697 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003698 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003699 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003700 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003701 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003702 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003703 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003704 ::basicBlock->appendInst(movmsk);
3705
3706 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003707 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003708}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003709
Nicolas Capens519cf222020-05-08 15:27:19 -04003710Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003711{
3712 return T(Ice::IceType_v4i32);
3713}
3714
Ben Clayton713b8d32019-12-17 20:37:56 +00003715UInt4::UInt4(RValue<Float4> cast)
3716 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003717{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003718 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003719 // Smallest positive value representable in UInt, but not in Int
3720 const unsigned int ustart = 0x80000000u;
3721 const float ustartf = float(ustart);
3722
3723 // Check if the value can be represented as an Int
3724 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3725 // If the value is too large, subtract ustart and re-add it after conversion.
3726 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003727 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003728 (~uiValue & Int4(cast));
3729 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003730 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003731}
3732
Ben Clayton713b8d32019-12-17 20:37:56 +00003733UInt4::UInt4(RValue<UInt> rhs)
3734 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003735{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003736 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003737 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003738
Ben Clayton713b8d32019-12-17 20:37:56 +00003739 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003740 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3741
3742 storeValue(replicate);
3743}
3744
3745RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3746{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003747 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003748 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003749 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003750 UInt4 result;
3751 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3752 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3753 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3754 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003755
Nicolas Capens157ba262019-12-10 17:49:14 -05003756 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003757 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003758 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003759 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003760 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003761 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003762}
Ben Clayton88816fa2019-05-15 17:08:14 +01003763
Nicolas Capens157ba262019-12-10 17:49:14 -05003764RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3765{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003766 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003767 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003768 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003769 UInt4 result;
3770 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3771 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3772 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3773 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003774
Nicolas Capens157ba262019-12-10 17:49:14 -05003775 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003776 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003777 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003778 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003779 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003780 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003781}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003782
Nicolas Capens157ba262019-12-10 17:49:14 -05003783RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3784{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003785 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003786 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003787}
3788
3789RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003792 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003793}
3794
3795RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003798 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003799}
3800
3801RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3802{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003803 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003804 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003805}
3806
3807RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3808{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003809 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003810 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003811}
3812
3813RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3814{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003815 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003816 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003817}
3818
3819RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3820{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003821 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003822 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003823 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003824 ::basicBlock->appendInst(cmp);
3825
3826 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003827 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003828 ::basicBlock->appendInst(select);
3829
3830 return RValue<UInt4>(V(result));
3831}
3832
3833RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3834{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003835 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003836 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003837 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003838 ::basicBlock->appendInst(cmp);
3839
3840 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003841 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003842 ::basicBlock->appendInst(select);
3843
3844 return RValue<UInt4>(V(result));
3845}
3846
Nicolas Capens519cf222020-05-08 15:27:19 -04003847Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003848{
3849 return T(Ice::IceType_v4i32);
3850}
3851
Nicolas Capens519cf222020-05-08 15:27:19 -04003852Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003853{
3854 return T(Ice::IceType_i16);
3855}
3856
3857RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3858{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003859 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003860 return 1.0f / x;
3861}
3862
3863RValue<Float> RcpSqrt_pp(RValue<Float> x)
3864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003866 return Rcp_pp(Sqrt(x));
3867}
3868
3869RValue<Float> Sqrt(RValue<Float> x)
3870{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003871 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003872 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003873 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003874 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003875 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003876 ::basicBlock->appendInst(sqrt);
3877
3878 return RValue<Float>(V(result));
3879}
3880
3881RValue<Float> Round(RValue<Float> x)
3882{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003883 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003884 return Float4(Round(Float4(x))).x;
3885}
3886
3887RValue<Float> Trunc(RValue<Float> x)
3888{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003889 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003890 return Float4(Trunc(Float4(x))).x;
3891}
3892
3893RValue<Float> Frac(RValue<Float> x)
3894{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003895 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003896 return Float4(Frac(Float4(x))).x;
3897}
3898
3899RValue<Float> Floor(RValue<Float> x)
3900{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003901 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003902 return Float4(Floor(Float4(x))).x;
3903}
3904
3905RValue<Float> Ceil(RValue<Float> x)
3906{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003907 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003908 return Float4(Ceil(Float4(x))).x;
3909}
3910
Nicolas Capens519cf222020-05-08 15:27:19 -04003911Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003912{
3913 return T(Ice::IceType_f32);
3914}
3915
Nicolas Capens519cf222020-05-08 15:27:19 -04003916Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003917{
3918 return T(Type_v2f32);
3919}
3920
Ben Clayton713b8d32019-12-17 20:37:56 +00003921Float4::Float4(RValue<Float> rhs)
3922 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003923{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003924 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003925 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003926
Ben Clayton713b8d32019-12-17 20:37:56 +00003927 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003928 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3929
3930 storeValue(replicate);
3931}
3932
3933RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3934{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003935 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003936 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003937 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003938 ::basicBlock->appendInst(cmp);
3939
3940 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003941 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003942 ::basicBlock->appendInst(select);
3943
3944 return RValue<Float4>(V(result));
3945}
3946
3947RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3948{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003949 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003950 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003951 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003952 ::basicBlock->appendInst(cmp);
3953
3954 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003955 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003956 ::basicBlock->appendInst(select);
3957
3958 return RValue<Float4>(V(result));
3959}
3960
3961RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3962{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003963 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003964 return Float4(1.0f) / x;
3965}
3966
3967RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3968{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003969 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003970 return Rcp_pp(Sqrt(x));
3971}
3972
Antonio Maioranod1561872020-12-14 14:03:53 -05003973bool HasRcpApprox()
3974{
3975 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3976 return false;
3977}
3978
3979RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
3980{
3981 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3982 UNREACHABLE("RValue<Float4> RcpApprox()");
3983 return { 0.0f };
3984}
3985
3986RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
3987{
3988 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3989 UNREACHABLE("RValue<Float> RcpApprox()");
3990 return { 0.0f };
3991}
3992
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05003993bool HasRcpSqrtApprox()
3994{
3995 return false;
3996}
3997
3998RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
3999{
4000 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4001 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
4002 return { 0.0f };
4003}
4004
4005RValue<Float> RcpSqrtApprox(RValue<Float> x)
4006{
4007 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4008 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
4009 return { 0.0f };
4010}
4011
Nicolas Capens157ba262019-12-10 17:49:14 -05004012RValue<Float4> Sqrt(RValue<Float4> x)
4013{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004014 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004015 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04004016 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004017 Float4 result;
4018 result.x = Sqrt(Float(Float4(x).x));
4019 result.y = Sqrt(Float(Float4(x).y));
4020 result.z = Sqrt(Float(Float4(x).z));
4021 result.w = Sqrt(Float(Float4(x).w));
4022
4023 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04004024 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004025 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04004026 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004027 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004028 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004029 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004030 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04004031 ::basicBlock->appendInst(sqrt);
4032
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04004033 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04004034 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04004035}
Nicolas Capens157ba262019-12-10 17:49:14 -05004036
4037RValue<Int> SignMask(RValue<Float4> x)
4038{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004039 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004040 if(emulateIntrinsics || CPUID::ARM)
4041 {
4042 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
4043 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
4044 }
4045 else
4046 {
4047 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004048 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004049 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004050 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004051 ::basicBlock->appendInst(movmsk);
4052
4053 return RValue<Int>(V(result));
4054 }
4055}
4056
4057RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
4058{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004059 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004060 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004061}
4062
4063RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
4064{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004065 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004066 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004067}
4068
4069RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4070{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004071 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004072 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004073}
4074
4075RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4076{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004077 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004078 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004079}
4080
4081RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4082{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004083 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004084 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004085}
4086
4087RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4088{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004089 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004090 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004091}
4092
4093RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4094{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004095 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004096 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004097}
4098
4099RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4100{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004101 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004102 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004103}
4104
4105RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4106{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004107 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004108 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004109}
4110
4111RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4112{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004113 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004114 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004115}
4116
4117RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4118{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004119 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004120 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004121}
4122
4123RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4124{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004125 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004126 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004127}
4128
4129RValue<Float4> Round(RValue<Float4> x)
4130{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004131 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004132 if(emulateIntrinsics || CPUID::ARM)
4133 {
4134 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4135 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4136 }
4137 else if(CPUID::SSE4_1)
4138 {
4139 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004140 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004141 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004142 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004143 round->addArg(::context->getConstantInt32(0));
4144 ::basicBlock->appendInst(round);
4145
4146 return RValue<Float4>(V(result));
4147 }
4148 else
4149 {
4150 return Float4(RoundInt(x));
4151 }
4152}
4153
4154RValue<Float4> Trunc(RValue<Float4> x)
4155{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004156 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004157 if(CPUID::SSE4_1)
4158 {
4159 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004160 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004161 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004162 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004163 round->addArg(::context->getConstantInt32(3));
4164 ::basicBlock->appendInst(round);
4165
4166 return RValue<Float4>(V(result));
4167 }
4168 else
4169 {
4170 return Float4(Int4(x));
4171 }
4172}
4173
4174RValue<Float4> Frac(RValue<Float4> x)
4175{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004176 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004177 Float4 frc;
4178
4179 if(CPUID::SSE4_1)
4180 {
4181 frc = x - Floor(x);
4182 }
4183 else
4184 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004185 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004186
Ben Clayton713b8d32019-12-17 20:37:56 +00004187 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004188 }
4189
4190 // x - floor(x) can be 1.0 for very small negative x.
4191 // Clamp against the value just below 1.0.
4192 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4193}
4194
4195RValue<Float4> Floor(RValue<Float4> x)
4196{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004197 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004198 if(CPUID::SSE4_1)
4199 {
4200 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004201 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004202 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004203 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004204 round->addArg(::context->getConstantInt32(1));
4205 ::basicBlock->appendInst(round);
4206
4207 return RValue<Float4>(V(result));
4208 }
4209 else
4210 {
4211 return x - Frac(x);
4212 }
4213}
4214
4215RValue<Float4> Ceil(RValue<Float4> x)
4216{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004217 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004218 if(CPUID::SSE4_1)
4219 {
4220 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004221 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004222 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004223 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004224 round->addArg(::context->getConstantInt32(2));
4225 ::basicBlock->appendInst(round);
4226
4227 return RValue<Float4>(V(result));
4228 }
4229 else
4230 {
4231 return -Floor(-x);
4232 }
4233}
4234
Nicolas Capens519cf222020-05-08 15:27:19 -04004235Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004236{
4237 return T(Ice::IceType_v4f32);
4238}
4239
4240RValue<Long> Ticks()
4241{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004242 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004243 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004244 return Long(Int(0));
4245}
4246
Ben Clayton713b8d32019-12-17 20:37:56 +00004247RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004248{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004249 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004250 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004251}
4252
Ben Clayton713b8d32019-12-17 20:37:56 +00004253RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004254{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004255 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004256 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004257}
4258
Ben Clayton713b8d32019-12-17 20:37:56 +00004259Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004260{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004261 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004262 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004263}
4264
4265void Breakpoint()
4266{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004267 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004268 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004269 auto trap = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05004270 ::basicBlock->appendInst(trap);
4271}
4272
Ben Clayton713b8d32019-12-17 20:37:56 +00004273void Nucleus::createFence(std::memory_order memoryOrder)
4274{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004275 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004276 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004277 auto inst = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004278 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4279 inst->addArg(order);
4280 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004281}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004282
Ben Clayton713b8d32019-12-17 20:37:56 +00004283Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004285 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004286 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004287 return nullptr;
4288}
4289void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004291 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004292 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004293}
Nicolas Capens157ba262019-12-10 17:49:14 -05004294
4295RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4296{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004297 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004298 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4299}
4300
4301RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4302{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004303 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004304 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4305}
4306
4307void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4308{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004309 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004310 return emulated::Scatter(base, val, offsets, mask, alignment);
4311}
4312
4313void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4314{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004315 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004316 return emulated::Scatter(base, val, offsets, mask, alignment);
4317}
4318
4319RValue<Float> Exp2(RValue<Float> x)
4320{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004321 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004322 return emulated::Exp2(x);
4323}
4324
4325RValue<Float> Log2(RValue<Float> x)
4326{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004327 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004328 return emulated::Log2(x);
4329}
4330
4331RValue<Float4> Sin(RValue<Float4> x)
4332{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004333 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004334 return optimal::Sin(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004335}
4336
4337RValue<Float4> Cos(RValue<Float4> x)
4338{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004339 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004340 return optimal::Cos(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004341}
4342
4343RValue<Float4> Tan(RValue<Float4> x)
4344{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004345 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004346 return optimal::Tan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004347}
4348
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004349RValue<Float4> Asin(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004350{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004351 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004352 if(p == Precision::Full)
4353 {
4354 return emulated::Asin(x);
4355 }
4356 return optimal::Asin_8_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004357}
4358
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004359RValue<Float4> Acos(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004360{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004361 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004362 // Surprisingly, deqp-vk's precision.acos.highp/mediump tests pass when using the 4-term polynomial approximation
4363 // version of acos, unlike for Asin, which requires higher precision algorithms.
4364 return optimal::Acos_4_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004365}
4366
4367RValue<Float4> Atan(RValue<Float4> x)
4368{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004369 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004370 return optimal::Atan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004371}
4372
4373RValue<Float4> Sinh(RValue<Float4> x)
4374{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004375 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004376 return optimal::Sinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004377}
4378
4379RValue<Float4> Cosh(RValue<Float4> x)
4380{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004381 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004382 return optimal::Cosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004383}
4384
4385RValue<Float4> Tanh(RValue<Float4> x)
4386{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004387 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004388 return optimal::Tanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004389}
4390
4391RValue<Float4> Asinh(RValue<Float4> x)
4392{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004393 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004394 return optimal::Asinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004395}
4396
4397RValue<Float4> Acosh(RValue<Float4> x)
4398{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004399 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004400 return optimal::Acosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004401}
4402
4403RValue<Float4> Atanh(RValue<Float4> x)
4404{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004405 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004406 return optimal::Atanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004407}
4408
4409RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4410{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004411 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004412 return optimal::Atan2(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004413}
4414
4415RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4416{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004417 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004418 return optimal::Pow(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004419}
4420
4421RValue<Float4> Exp(RValue<Float4> x)
4422{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004423 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004424 return optimal::Exp(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004425}
4426
4427RValue<Float4> Log(RValue<Float4> x)
4428{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004429 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004430 return optimal::Log(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004431}
4432
4433RValue<Float4> Exp2(RValue<Float4> x)
4434{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004435 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004436 return optimal::Exp2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004437}
4438
4439RValue<Float4> Log2(RValue<Float4> x)
4440{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004441 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004442 return optimal::Log2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004443}
4444
4445RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4446{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004447 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004448 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004449 {
Ben Claytonce54c592020-02-07 11:30:51 +00004450 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004451 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004452 }
4453 else
4454 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004455 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004456 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004457 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004458 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004459 ::basicBlock->appendInst(ctlz);
4460
4461 return RValue<UInt>(V(result));
4462 }
4463}
4464
4465RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4466{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004467 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004468 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004469 {
Ben Claytonce54c592020-02-07 11:30:51 +00004470 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004471 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004472 }
4473 else
4474 {
4475 // TODO: implement vectorized version in Subzero
4476 UInt4 result;
4477 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4478 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4479 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4480 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4481 return result;
4482 }
4483}
4484
4485RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4486{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004487 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004488 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004489 {
Ben Claytonce54c592020-02-07 11:30:51 +00004490 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004491 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004492 }
4493 else
4494 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004495 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004496 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004497 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004498 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004499 ::basicBlock->appendInst(ctlz);
4500
4501 return RValue<UInt>(V(result));
4502 }
4503}
4504
4505RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4506{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004507 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004508 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004509 {
Ben Claytonce54c592020-02-07 11:30:51 +00004510 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004511 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004512 }
4513 else
4514 {
4515 // TODO: implement vectorized version in Subzero
4516 UInt4 result;
4517 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4518 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4519 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4520 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4521 return result;
4522 }
4523}
4524
Antonio Maiorano370cba52019-12-31 11:36:07 -05004525RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4526{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004527 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004528 return emulated::MinAtomic(x, y, memoryOrder);
4529}
4530
4531RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4532{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004533 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004534 return emulated::MinAtomic(x, y, memoryOrder);
4535}
4536
4537RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4538{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004539 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004540 return emulated::MaxAtomic(x, y, memoryOrder);
4541}
4542
4543RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4544{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004545 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004546 return emulated::MaxAtomic(x, y, memoryOrder);
4547}
4548
Antonio Maioranoaae33732020-02-14 14:52:34 -05004549void EmitDebugLocation()
4550{
4551#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004552 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004553#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004554}
Ben Clayton713b8d32019-12-17 20:37:56 +00004555void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004556void FlushDebug() {}
4557
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004558namespace {
4559namespace coro {
4560
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004561// Instance data per generated coroutine
4562// This is the "handle" type used for Coroutine functions
4563// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4564struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004565{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004566 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004567 bool done = false; // the coroutine should stop at the next yield()
4568 bool terminated = false; // the coroutine has finished.
4569 bool inRoutine = false; // is the coroutine currently executing?
4570 marl::Scheduler::Fiber *mainFiber = nullptr;
4571 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004572 void *promisePtr = nullptr;
4573};
4574
4575CoroutineData *createCoroutineData()
4576{
4577 return new CoroutineData{};
4578}
4579
4580void destroyCoroutineData(CoroutineData *coroData)
4581{
4582 delete coroData;
4583}
4584
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004585// suspend() pauses execution of the coroutine, and resumes execution from the
4586// caller's call to await().
4587// Returns true if await() is called again, or false if coroutine_destroy()
4588// is called.
4589bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004590{
Ben Claytonc3466532020-03-24 11:54:05 +00004591 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4592 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4593 ASSERT(coroData->inRoutine);
4594 coroData->inRoutine = false;
4595 coroData->mainFiber->notify();
4596 while(!coroData->inRoutine)
4597 {
4598 coroData->routineFiber->wait();
4599 }
4600 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004601}
4602
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004603// resume() is called by await(), blocking until the coroutine calls yield()
4604// or the coroutine terminates.
4605void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004606{
Ben Claytonc3466532020-03-24 11:54:05 +00004607 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4608 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4609 ASSERT(!coroData->inRoutine);
4610 coroData->inRoutine = true;
4611 coroData->routineFiber->notify();
4612 while(coroData->inRoutine)
4613 {
4614 coroData->mainFiber->wait();
4615 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004616}
4617
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004618// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4619// until the coroutine ends, and deletes the coroutine data.
4620void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004621{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004622 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004623 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4624 ASSERT(!coroData->inRoutine);
4625 if(!coroData->terminated)
4626 {
4627 coroData->done = true;
4628 coroData->inRoutine = true;
4629 coroData->routineFiber->notify();
4630 while(!coroData->terminated)
4631 {
4632 coroData->mainFiber->wait();
4633 }
4634 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004635 if(coroData->useInternalScheduler)
4636 {
4637 ::getOrCreateScheduler().unbind();
4638 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004639 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004640}
4641
4642namespace detail {
4643thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4644} // namespace detail
4645
4646void setHandleParam(Nucleus::CoroutineHandle handle)
4647{
4648 ASSERT(!detail::coroHandle);
4649 detail::coroHandle = handle;
4650}
4651
4652Nucleus::CoroutineHandle getHandleParam()
4653{
4654 ASSERT(detail::coroHandle);
4655 auto handle = detail::coroHandle;
4656 detail::coroHandle = {};
4657 return handle;
4658}
4659
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004660bool isDone(Nucleus::CoroutineHandle handle)
4661{
4662 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004663 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004664}
4665
4666void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4667{
4668 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4669 coroData->promisePtr = promisePtr;
4670}
4671
4672void *getPromisePtr(Nucleus::CoroutineHandle handle)
4673{
4674 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4675 return coroData->promisePtr;
4676}
4677
4678} // namespace coro
4679} // namespace
4680
4681// Used to generate coroutines.
4682// Lifetime: from yield to acquireCoroutine
4683class CoroutineGenerator
4684{
4685public:
4686 CoroutineGenerator()
4687 {
4688 }
4689
4690 // Inserts instructions at the top of the current function to make it a coroutine.
4691 void generateCoroutineBegin()
4692 {
4693 // Begin building the main coroutine_begin() function.
4694 // We insert these instructions at the top of the entry node,
4695 // before existing reactor-generated instructions.
4696
4697 // CoroutineHandle coroutine_begin(<Arguments>)
4698 // {
4699 // this->handle = coro::getHandleParam();
4700 //
4701 // YieldType promise;
4702 // coro::setPromisePtr(handle, &promise); // For await
4703 //
4704 // ... <REACTOR CODE> ...
4705 //
4706
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004707 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004708 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004709
4710 // YieldType promise;
4711 // coro::setPromisePtr(handle, &promise); // For await
4712 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004713 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004714 }
4715
4716 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4717 void generateYield(Value *val)
4718 {
4719 // ... <REACTOR CODE> ...
4720 //
4721 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004722 // if (!coro::suspend(handle)) {
4723 // return false; // coroutine has been stopped by the caller.
4724 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004725 //
4726 // ... <REACTOR CODE> ...
4727
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004728 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004729 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004730
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004731 // if (!coro::suspend(handle)) {
4732 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4733 auto doneBlock = Nucleus::createBasicBlock();
4734 auto resumeBlock = Nucleus::createBasicBlock();
4735 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4736
4737 // return false; // coroutine has been stopped by the caller.
4738 ::basicBlock = doneBlock;
4739 Nucleus::createRetVoid(); // coroutine return value is ignored.
4740
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004741 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004742 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004743 }
4744
4745 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4746
4747 // Generates the await function for the current coroutine.
4748 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4749 static FunctionUniquePtr generateAwaitFunction()
4750 {
4751 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4752 // {
4753 // if (coro::isDone())
4754 // {
4755 // return false;
4756 // }
4757 // else // resume
4758 // {
4759 // YieldType* promise = coro::getPromisePtr(handle);
4760 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004761 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004762 // return true;
4763 // }
4764 // }
4765
4766 // Subzero doesn't support bool types (IceType_i1) as return type
4767 const Ice::Type ReturnType = Ice::IceType_i32;
4768 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4769 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4770
4771 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4772 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4773
4774 Ice::Variable *handle = awaitFunc->getArgs()[0];
4775 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4776
4777 auto doneBlock = awaitFunc->makeNode();
4778 {
4779 // return false;
4780 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4781 doneBlock->appendInst(ret);
4782 }
4783
4784 auto resumeBlock = awaitFunc->makeNode();
4785 {
4786 // YieldType* promise = coro::getPromisePtr(handle);
4787 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4788
4789 // *out = *promise;
4790 // Load promise value
4791 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4792 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4793 resumeBlock->appendInst(load);
4794 // Then store it in output param
4795 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4796 resumeBlock->appendInst(store);
4797
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004798 // coro::resume(handle);
4799 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004800
4801 // return true;
4802 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4803 resumeBlock->appendInst(ret);
4804 }
4805
4806 // if (coro::isDone())
4807 // {
4808 // <doneBlock>
4809 // }
4810 // else // resume
4811 // {
4812 // <resumeBlock>
4813 // }
4814 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004815 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004816 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4817 bb->appendInst(br);
4818
4819 return FunctionUniquePtr{ awaitFunc };
4820 }
4821
4822 // Generates the destroy function for the current coroutine.
4823 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4824 static FunctionUniquePtr generateDestroyFunction()
4825 {
4826 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4827 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004828 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004829 // return;
4830 // }
4831
4832 const Ice::Type ReturnType = Ice::IceType_void;
4833 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4834
4835 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4836 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4837
4838 Ice::Variable *handle = destroyFunc->getArgs()[0];
4839
4840 auto *bb = destroyFunc->getEntryNode();
4841
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004842 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4843 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004844
4845 // return;
4846 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4847 bb->appendInst(ret);
4848
4849 return FunctionUniquePtr{ destroyFunc };
4850 }
4851
4852private:
4853 Ice::Variable *handle{};
4854 Ice::Variable *promise{};
4855};
4856
4857static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4858{
4859 // This doubles up as our coroutine handle
4860 auto coroData = coro::createCoroutineData();
4861
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004862 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4863 if(coroData->useInternalScheduler)
4864 {
4865 ::getOrCreateScheduler().bind();
4866 }
4867
Ben Clayton76e9e532020-03-16 20:35:04 +00004868 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004869 // Store handle in TLS so that the coroutine can grab it right away, before
4870 // any fiber switch occurs.
4871 coro::setHandleParam(coroData);
4872
Ben Claytonc3466532020-03-24 11:54:05 +00004873 ASSERT(!coroData->routineFiber);
4874 coroData->routineFiber = marl::Scheduler::Fiber::current();
4875
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004876 beginFunc();
4877
Ben Claytonc3466532020-03-24 11:54:05 +00004878 ASSERT(coroData->inRoutine);
4879 coroData->done = true; // coroutine is done.
4880 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4881 coroData->inRoutine = false;
4882 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004883 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004884
Ben Claytonc3466532020-03-24 11:54:05 +00004885 ASSERT(!coroData->mainFiber);
4886 coroData->mainFiber = marl::Scheduler::Fiber::current();
4887
4888 // block until the first yield or coroutine end
4889 ASSERT(!coroData->inRoutine);
4890 coroData->inRoutine = true;
4891 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4892 while(coroData->inRoutine)
4893 {
4894 coroData->mainFiber->wait();
4895 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004896
4897 return coroData;
4898}
4899
4900void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4901{
4902 // Start by creating a regular function
4903 createFunction(yieldType, params);
4904
4905 // Save in case yield() is called
4906 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4907 ::coroYieldType = yieldType;
4908}
4909
4910void Nucleus::yield(Value *val)
4911{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004912 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004913 Variable::materializeAll();
4914
4915 // On first yield, we start generating coroutine functions
4916 if(!::coroGen)
4917 {
4918 ::coroGen = std::make_shared<CoroutineGenerator>();
4919 ::coroGen->generateCoroutineBegin();
4920 }
4921
4922 ASSERT(::coroGen);
4923 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004924}
4925
Ben Clayton713b8d32019-12-17 20:37:56 +00004926static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4927{
4928 return false;
4929}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004930
4931static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4932{
4933}
Nicolas Capens157ba262019-12-10 17:49:14 -05004934
4935std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4936{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004937 if(::coroGen)
4938 {
4939 // Finish generating coroutine functions
4940 {
4941 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004942 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004943 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004944
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004945 auto awaitFunc = ::coroGen->generateAwaitFunction();
4946 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004947
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004948 // At this point, we no longer need the CoroutineGenerator.
4949 ::coroGen.reset();
4950 ::coroYieldType = nullptr;
4951
4952 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4953 { name, "await", "destroy" },
4954 cfgEdit);
4955
4956 return routine;
4957 }
4958 else
4959 {
4960 {
4961 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004962 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004963 }
4964
4965 ::coroYieldType = nullptr;
4966
4967 // Not an actual coroutine (no yields), so return stubs for await and destroy
4968 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4969
4970 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4971 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4972 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4973 return routine;
4974 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004975}
4976
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004977Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004978{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004979 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4980
4981 if(isCoroutine)
4982 {
4983 return rr::invokeCoroutineBegin(func);
4984 }
4985 else
4986 {
4987 // For regular routines, just invoke the begin func directly
4988 return func();
4989 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004990}
Nicolas Capens157ba262019-12-10 17:49:14 -05004991
4992} // namespace rr