blob: cfd3d3a3d1a2ab33cb0bbe08cca4b2cd475e056b [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano9c14bda2020-09-18 16:33:36 -040016#include "EmulatedIntrinsics.hpp"
17#include "OptimalIntrinsics.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050018#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000019#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050020#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040021
Nicolas Capens1a3ce872018-10-10 10:42:36 -040022#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000023#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040024
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040026#include "src/IceCfgNode.h"
27#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceELFStreamer.h"
29#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040030#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000031#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040032
Ben Clayton713b8d32019-12-17 20:37:56 +000033#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040034#include "llvm/Support/FileSystem.h"
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -050035#include "llvm/Support/ManagedStatic.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040036#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040037
Antonio Maiorano8bce0672020-02-28 13:13:45 -050038#include "marl/event.h"
39
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000041# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040042#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040043
Nicolas Capensbd65da92017-01-05 16:31:06 -050044#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000045# ifndef WIN32_LEAN_AND_MEAN
46# define WIN32_LEAN_AND_MEAN
47# endif // !WIN32_LEAN_AND_MEAN
48# ifndef NOMINMAX
49# define NOMINMAX
50# endif // !NOMINMAX
51# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050052#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040053
Ben Clayton683bad82020-02-10 23:57:09 +000054#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040055#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000056#include <limits>
57#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040058
Antonio Maiorano02a39532020-01-21 15:15:34 -050059// Subzero utility functions
60// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050062namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050063
64Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
65{
66 uint32_t sequenceNumber = 0;
Nicolas Capensff010f92021-02-01 12:22:53 -050067 auto *function = Ice::Cfg::create(context, sequenceNumber).release();
68
69 function->setStackSizeLimit(512 * 1024); // 512 KiB
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050070
71 Ice::CfgLocalAllocatorScope allocScope{ function };
72
73 for(auto type : paramTypes)
74 {
75 Ice::Variable *arg = function->makeVariable(type);
76 function->addArg(arg);
77 }
78
79 Ice::CfgNode *node = function->makeNode();
80 function->setEntryNode(node);
81
82 return function;
83}
84
85Ice::Type getPointerType(Ice::Type elementType)
86{
87 if(sizeof(void *) == 8)
88 {
89 return Ice::IceType_i64;
90 }
91 else
92 {
93 return Ice::IceType_i32;
94 }
95}
96
97Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
98{
99 int typeSize = Ice::typeWidthInBytes(type);
100 int totalSize = typeSize * (arraySize ? arraySize : 1);
101
102 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
103 auto address = function->makeVariable(getPointerType(type));
Nicolas Capens0cfc0432021-02-05 15:18:42 -0500104 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize); // SRoA depends on the alignment to match the type size.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500105 function->getEntryNode()->getInsts().push_front(alloca);
106
107 return address;
108}
109
110Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500111{
112 if(sizeof(void *) == 8)
113 {
114 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
115 }
116 else
117 {
118 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
119 }
120}
121
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400122// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
123Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500124
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400125// Wrapper for calls on C functions with Ice types
126Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
127{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500128 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400129
130 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
131 // then truncate result to bool.
132 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
133 const bool returningBool = (retTy == Ice::IceType_i1);
134 if(returningBool)
135 {
136 ret = function->makeVariable(Ice::IceType_i32);
137 }
138 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500139 {
140 ret = function->makeVariable(retTy);
141 }
142
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400143 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500144 for(auto arg : iceArgs)
145 {
146 call->addArg(arg);
147 }
148
149 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400150
151 if(returningBool)
152 {
153 // Truncate result to bool so that if any (lsb) bits were set, result will be true
154 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
155 }
156
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500157 return ret;
158}
159
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400160Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
161{
162 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
163 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
164}
165
Antonio Maiorano62427e02020-02-13 09:18:05 -0500166// Wrapper for calls on C functions with Ice types
167template<typename Return, typename... CArgs, typename... RArgs>
Nicolas Capens629bf952022-01-18 15:08:14 -0500168Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&...args)
Antonio Maiorano62427e02020-02-13 09:18:05 -0500169{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400170 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
171
Nicolas Capens519cf222020-05-08 15:27:19 -0400172 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500173 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500174 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500175}
176
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400177Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
178{
179 Ice::Variable *to = function->makeVariable(toType);
180 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
181 basicBlock->appendInst(cast);
182 return to;
183}
184
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500185Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500186{
Antonio Maiorano02a39532020-01-21 15:15:34 -0500187 Ice::Variable *result = function->makeVariable(type);
188 auto load = Ice::InstLoad::create(function, result, ptr, align);
189 basicBlock->appendInst(load);
190
191 return result;
192}
193
194} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500195} // namespace
196
Ben Clayton713b8d32019-12-17 20:37:56 +0000197namespace rr {
198class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500199class CoroutineGenerator;
200} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500201
202namespace {
203
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -0500204// Used to automatically invoke llvm_shutdown() when driver is unloaded
205llvm::llvm_shutdown_obj llvmShutdownObj;
206
Nicolas Capens157ba262019-12-10 17:49:14 -0500207// Default configuration settings. Must be accessed under mutex lock.
208std::mutex defaultConfigLock;
209rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000210{
Nicolas Capens157ba262019-12-10 17:49:14 -0500211 // This uses a static in a function to avoid the cost of a global static
212 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
213 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000214 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500215 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000216}
217
Nicolas Capens157ba262019-12-10 17:49:14 -0500218Ice::GlobalContext *context = nullptr;
219Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400220Ice::CfgNode *entryBlock = nullptr;
221Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500222Ice::CfgNode *basicBlock = nullptr;
223Ice::CfgLocalAllocatorScope *allocator = nullptr;
224rr::ELFMemoryStreamer *routine = nullptr;
225
226std::mutex codegenMutex;
227
228Ice::ELFFileStreamer *elfFile = nullptr;
229Ice::Fdstream *out = nullptr;
230
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500231// Coroutine globals
232rr::Type *coroYieldType = nullptr;
233std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500234marl::Scheduler &getOrCreateScheduler()
235{
236 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100237 marl::Scheduler::Config cfg;
238 cfg.setWorkerThreadCount(8);
239 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500240 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500241
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500242 return *scheduler;
243}
Nicolas Capens54313fb2021-02-19 14:26:27 -0500244
245rr::Nucleus::OptimizerCallback *optimizerCallback = nullptr;
246
Nicolas Capens157ba262019-12-10 17:49:14 -0500247} // Anonymous namespace
248
249namespace {
250
251#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000252# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500253#endif
254
Ben Clayton713b8d32019-12-17 20:37:56 +0000255#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
256# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500257#endif
258
Antonio Maiorano370cba52019-12-31 11:36:07 -0500259Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400260{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500261 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100262 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500263 // Note that Opt_0 and Opt_1 are not implemented by Subzero
264 case rr::Optimization::Level::None: return Ice::Opt_m1;
265 case rr::Optimization::Level::Less: return Ice::Opt_m1;
266 case rr::Optimization::Level::Default: return Ice::Opt_2;
267 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
268 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100269 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500270 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400271}
272
Antonio Maiorano370cba52019-12-31 11:36:07 -0500273Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
274{
275 switch(memoryOrder)
276 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500277 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
278 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
279 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
280 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
281 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
282 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
Antonio Maiorano370cba52019-12-31 11:36:07 -0500283 }
284 return Ice::Intrinsics::MemoryOrderInvalid;
285}
286
Nicolas Capens157ba262019-12-10 17:49:14 -0500287class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500288{
Nicolas Capens157ba262019-12-10 17:49:14 -0500289public:
290 const static bool ARM;
291 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400292
Nicolas Capens157ba262019-12-10 17:49:14 -0500293private:
294 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100295 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000296#if defined(__i386__) || defined(__x86_64__)
297# if defined(_WIN32)
298 __cpuid(registers, info);
299# else
300 __asm volatile("cpuid"
301 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
302 : "a"(info));
303# endif
304#else
305 registers[0] = 0;
306 registers[1] = 0;
307 registers[2] = 0;
308 registers[3] = 0;
309#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100310 }
311
Sean Risser46a649d2021-08-30 15:44:33 -0400312 constexpr static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500313 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000314#if defined(__arm__) || defined(__aarch64__)
315 return true;
316#elif defined(__i386__) || defined(__x86_64__)
317 return false;
318#elif defined(__mips__)
319 return false;
320#else
321# error "Unknown architecture"
322#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500323 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500324
Nicolas Capens157ba262019-12-10 17:49:14 -0500325 static bool detectSSE4_1()
326 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000327#if defined(__i386__) || defined(__x86_64__)
328 int registers[4];
329 cpuid(registers, 1);
330 return (registers[2] & 0x00080000) != 0;
331#else
332 return false;
333#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500334 }
335};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500336
Sean Risser46a649d2021-08-30 15:44:33 -0400337constexpr bool CPUID::ARM = CPUID::detectARM();
Nicolas Capens157ba262019-12-10 17:49:14 -0500338const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
Sean Risser46a649d2021-08-30 15:44:33 -0400339constexpr bool emulateIntrinsics = false;
340constexpr bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400341
Nicolas Capens157ba262019-12-10 17:49:14 -0500342constexpr bool subzeroDumpEnabled = false;
343constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500344
345#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500346static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
347static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500348#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500349
350} // anonymous namespace
351
352namespace rr {
353
Antonio Maioranoab210f92019-12-13 16:26:24 -0500354std::string BackendName()
355{
356 return "Subzero";
357}
358
Ben Clayton713b8d32019-12-17 20:37:56 +0000359const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500360 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500361};
362
363enum EmulatedType
364{
365 EmulatedShift = 16,
366 EmulatedV2 = 2 << EmulatedShift,
367 EmulatedV4 = 4 << EmulatedShift,
368 EmulatedV8 = 8 << EmulatedShift,
369 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
370
371 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
372 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
373 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000374 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
375 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500376 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
377};
378
Ben Clayton713b8d32019-12-17 20:37:56 +0000379class Value : public Ice::Operand
380{};
381class SwitchCases : public Ice::InstSwitch
382{};
383class BasicBlock : public Ice::CfgNode
384{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500385
386Ice::Type T(Type *t)
387{
388 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
389 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500390}
391
Nicolas Capens157ba262019-12-10 17:49:14 -0500392Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400393{
Ben Clayton713b8d32019-12-17 20:37:56 +0000394 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500395}
396
397Type *T(EmulatedType t)
398{
Ben Clayton713b8d32019-12-17 20:37:56 +0000399 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500400}
401
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500402std::vector<Ice::Type> T(const std::vector<Type *> &types)
403{
404 std::vector<Ice::Type> result;
405 result.reserve(types.size());
406 for(auto &t : types)
407 {
408 result.push_back(T(t));
409 }
410 return result;
411}
412
Nicolas Capens157ba262019-12-10 17:49:14 -0500413Value *V(Ice::Operand *v)
414{
Ben Clayton713b8d32019-12-17 20:37:56 +0000415 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500416}
417
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500418Ice::Operand *V(Value *v)
419{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500420 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500421}
422
Antonio Maiorano62427e02020-02-13 09:18:05 -0500423std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
424{
425 std::vector<Ice::Operand *> result;
426 result.reserve(values.size());
427 for(auto &v : values)
428 {
429 result.push_back(V(v));
430 }
431 return result;
432}
433
Nicolas Capens157ba262019-12-10 17:49:14 -0500434BasicBlock *B(Ice::CfgNode *b)
435{
Ben Clayton713b8d32019-12-17 20:37:56 +0000436 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500437}
438
439static size_t typeSize(Type *type)
440{
441 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400442 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500443 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800444 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500445 case Type_v2i32: return 8;
446 case Type_v4i16: return 8;
447 case Type_v2i16: return 4;
448 case Type_v8i8: return 8;
449 case Type_v4i8: return 4;
450 case Type_v2f32: return 8;
451 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500452 }
453 }
454
455 return Ice::typeWidthInBytes(T(type));
456}
457
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400458static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500459{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400460 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500461 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
462 {
463 Nucleus::createRetVoid();
464 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400465
466 // Connect the entry block to the top of the initial basic block
467 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
468 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500469}
470
Ben Clayton713b8d32019-12-17 20:37:56 +0000471using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
472using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500473
474inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
475{
Ben Clayton713b8d32019-12-17 20:37:56 +0000476 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500477}
478
479inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
480{
481 return &sectionHeader(elfHeader)[index];
482}
483
484static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
485{
486 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
487
488 uint32_t index = relocation.getSymbol();
489 int table = relocationTable.sh_link;
490 void *symbolValue = nullptr;
491
492 if(index != SHN_UNDEF)
493 {
494 if(table == SHN_UNDEF) return nullptr;
495 const SectionHeader *symbolTable = elfSection(elfHeader, table);
496
497 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
498 if(index >= symtab_entries)
499 {
500 ASSERT(index < symtab_entries && "Symbol Index out of range");
501 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800502 }
503
Nicolas Capens157ba262019-12-10 17:49:14 -0500504 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000505 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500506 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800507
Nicolas Capens157ba262019-12-10 17:49:14 -0500508 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400509 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500510 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000511 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400512 }
513 else
514 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500515 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400516 }
Nicolas Capens66478362016-10-13 15:36:36 -0400517 }
518
Nicolas Capens157ba262019-12-10 17:49:14 -0500519 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000520 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500521
522 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400523 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400524 switch(relocation.getType())
525 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500526 case R_ARM_NONE:
527 // No relocation
528 break;
529 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500530 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000531 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500532 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
533 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
534 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400535 break;
Nicolas Capens112faf42019-12-13 17:32:26 -0500536 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500537 {
538 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
539 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
540 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400541 break;
Nicolas Capens112faf42019-12-13 17:32:26 -0500542 default:
543 ASSERT(false && "Unsupported relocation type");
544 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400545 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500546 }
547 else
548 {
549 switch(relocation.getType())
550 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500551 case R_386_NONE:
552 // No relocation
553 break;
554 case R_386_32:
555 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
556 break;
557 case R_386_PC32:
558 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
559 break;
560 default:
561 ASSERT(false && "Unsupported relocation type");
562 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500563 }
Nicolas Capens66478362016-10-13 15:36:36 -0400564 }
565
Nicolas Capens157ba262019-12-10 17:49:14 -0500566 return symbolValue;
567}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400568
Nicolas Capens157ba262019-12-10 17:49:14 -0500569static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
570{
571 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
572
573 uint32_t index = relocation.getSymbol();
574 int table = relocationTable.sh_link;
575 void *symbolValue = nullptr;
576
577 if(index != SHN_UNDEF)
578 {
579 if(table == SHN_UNDEF) return nullptr;
580 const SectionHeader *symbolTable = elfSection(elfHeader, table);
581
582 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
583 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400584 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500585 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400586 return nullptr;
587 }
588
Nicolas Capens157ba262019-12-10 17:49:14 -0500589 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000590 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500591 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400592
Nicolas Capens157ba262019-12-10 17:49:14 -0500593 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400594 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500595 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000596 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 }
598 else
599 {
600 return nullptr;
601 }
602 }
Nicolas Capens66478362016-10-13 15:36:36 -0400603
Nicolas Capens157ba262019-12-10 17:49:14 -0500604 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000605 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
606 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400607
Nicolas Capens157ba262019-12-10 17:49:14 -0500608 switch(relocation.getType())
609 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500610 case R_X86_64_NONE:
611 // No relocation
612 break;
613 case R_X86_64_64:
614 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
615 break;
616 case R_X86_64_PC32:
617 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
618 break;
619 case R_X86_64_32S:
620 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
621 break;
622 default:
623 ASSERT(false && "Unsupported relocation type");
624 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500625 }
626
627 return symbolValue;
628}
629
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400630struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500631{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400632 const void *entry;
633 size_t codeSize = 0;
634};
635
636std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
637{
638 ASSERT(functionNames.size() > 0);
639 std::vector<EntryPoint> entryPoints(functionNames.size());
640
Ben Clayton713b8d32019-12-17 20:37:56 +0000641 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500642
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400643 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500644 if(!elfHeader->checkMagic())
645 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400646 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500647 }
648
649 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000650 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
651#if defined(__i386__)
652 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
653#elif defined(__x86_64__)
654 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
655#elif defined(__arm__)
656 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
657#elif defined(__aarch64__)
658 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
659#elif defined(__mips__)
660 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
661#else
662# error "Unsupported platform"
663#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500664
Ben Clayton713b8d32019-12-17 20:37:56 +0000665 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500666
667 for(int i = 0; i < elfHeader->e_shnum; i++)
668 {
669 if(sectionHeader[i].sh_type == SHT_PROGBITS)
670 {
671 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
672 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400673 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500674 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400675 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500676
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400677 for(size_t j = 0; j < functionNames.size(); ++j)
678 {
679 if(strstr(sectionName, functionNames[j]) != nullptr)
680 {
681 return j;
682 }
683 }
684
685 UNREACHABLE("Failed to find executable section that matches input function names");
686 return static_cast<size_t>(-1);
687 };
688
689 size_t index = findSectionNameEntryIndex();
690 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
691 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400692 }
693 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500694 else if(sectionHeader[i].sh_type == SHT_REL)
695 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000696 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400697
Nicolas Capens157ba262019-12-10 17:49:14 -0500698 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
699 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000700 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500701 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
702 }
703 }
704 else if(sectionHeader[i].sh_type == SHT_RELA)
705 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000706 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500707
708 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
709 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000710 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500711 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
712 }
713 }
714 }
715
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400716 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500717}
718
719template<typename T>
720struct ExecutableAllocator
721{
722 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000723 template<class U>
724 ExecutableAllocator(const ExecutableAllocator<U> &other)
725 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500726
727 using value_type = T;
728 using size_type = std::size_t;
729
730 T *allocate(size_type n)
731 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000732 return (T *)allocateMemoryPages(
733 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500734 }
735
736 void deallocate(T *p, size_type n)
737 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800738 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500739 }
740};
741
742class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
743{
744 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
745 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
746
747public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000748 ELFMemoryStreamer()
749 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500750 {
751 position = 0;
752 buffer.reserve(0x1000);
753 }
754
755 ~ELFMemoryStreamer() override
756 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500757 }
758
759 void write8(uint8_t Value) override
760 {
761 if(position == (uint64_t)buffer.size())
762 {
763 buffer.push_back(Value);
764 position++;
765 }
766 else if(position < (uint64_t)buffer.size())
767 {
768 buffer[position] = Value;
769 position++;
770 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000771 else
772 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500773 }
774
775 void writeBytes(llvm::StringRef Bytes) override
776 {
777 std::size_t oldSize = buffer.size();
778 buffer.resize(oldSize + Bytes.size());
779 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
780 position += Bytes.size();
781 }
782
783 uint64_t tell() const override { return position; }
784
785 void seek(uint64_t Off) override { position = Off; }
786
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400787 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500788 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400789 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500790
791#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500792 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
793#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400794 for(auto &entryPoint : entryPoints)
795 {
796 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
797 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500798#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500799
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400800 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400801 }
802
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500803 void finalize()
804 {
805 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
806
807 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
808 }
809
Ben Clayton713b8d32019-12-17 20:37:56 +0000810 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400811 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500812 ASSERT(func);
813 funcs[index] = func;
814 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400815
Nicolas Capens157ba262019-12-10 17:49:14 -0500816 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400817 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500818 ASSERT(funcs[index]);
819 return funcs[index];
820 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400821
Antonio Maiorano02a39532020-01-21 15:15:34 -0500822 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500823 {
Nicolas Capens4e75f452021-01-28 01:52:56 -0500824 // Check if we already have a suitable constant.
825 for(const auto &c : constantsPool)
826 {
827 void *ptr = c.data.get();
828 size_t space = c.space;
829
830 void *alignedPtr = std::align(alignment, size, ptr, space);
831
832 if(space < size)
833 {
834 continue;
835 }
836
837 if(memcmp(data, alignedPtr, size) == 0)
838 {
839 return alignedPtr;
840 }
841 }
842
Antonio Maiorano02a39532020-01-21 15:15:34 -0500843 // TODO(b/148086935): Replace with a buffer allocator.
844 size_t space = size + alignment;
845 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
846 void *ptr = buf.get();
847 void *alignedPtr = std::align(alignment, size, ptr, space);
848 ASSERT(alignedPtr);
849 memcpy(alignedPtr, data, size);
Nicolas Capens4e75f452021-01-28 01:52:56 -0500850 constantsPool.emplace_back(std::move(buf), space);
851
Antonio Maiorano02a39532020-01-21 15:15:34 -0500852 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500853 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400854
Nicolas Capens157ba262019-12-10 17:49:14 -0500855private:
Nicolas Capens4e75f452021-01-28 01:52:56 -0500856 struct Constant
857 {
858 Constant(std::unique_ptr<uint8_t[]> data, size_t space)
859 : data(std::move(data))
860 , space(space)
861 {}
862
863 std::unique_ptr<uint8_t[]> data;
864 size_t space;
865 };
866
Ben Clayton713b8d32019-12-17 20:37:56 +0000867 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500868 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
869 std::size_t position;
Nicolas Capens4e75f452021-01-28 01:52:56 -0500870 std::vector<Constant> constantsPool;
Nicolas Capens157ba262019-12-10 17:49:14 -0500871};
872
Antonio Maiorano62427e02020-02-13 09:18:05 -0500873#ifdef ENABLE_RR_PRINT
874void VPrintf(const std::vector<Value *> &vals)
875{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400876 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500877}
878#endif // ENABLE_RR_PRINT
879
Nicolas Capens157ba262019-12-10 17:49:14 -0500880Nucleus::Nucleus()
881{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400882 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500883
884 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
885 Ice::ClFlags::getParsedClFlags(Flags);
886
Ben Clayton713b8d32019-12-17 20:37:56 +0000887#if defined(__arm__)
888 Flags.setTargetArch(Ice::Target_ARM32);
889 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
890#elif defined(__mips__)
891 Flags.setTargetArch(Ice::Target_MIPS32);
892 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
893#else // x86
894 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
895 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
896#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500897 Flags.setOutFileType(Ice::FT_Elf);
898 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
Nicolas Capens157ba262019-12-10 17:49:14 -0500899 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
900 Flags.setDisableHybridAssembly(true);
901
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500902 // Emit functions into separate sections in the ELF so we can find them by name
903 Flags.setFunctionSections(true);
904
Nicolas Capens157ba262019-12-10 17:49:14 -0500905 static llvm::raw_os_ostream cout(std::cout);
906 static llvm::raw_os_ostream cerr(std::cerr);
907
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500908 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500909 {
910 // Decorate text asm with liveness info
911 Flags.setDecorateAsm(true);
912 }
913
Ben Clayton713b8d32019-12-17 20:37:56 +0000914 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500915 {
916 std::error_code errorCode;
917 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
918 ::elfFile = new Ice::ELFFileStreamer(*out);
919 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
920 }
921 else
922 {
923 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
924 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
925 ::routine = elfMemory;
926 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400927
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400928#if !__has_feature(memory_sanitizer)
929 // thread_local variables in shared libraries are initialized at load-time,
930 // but this is not observed by MemorySanitizer if the loader itself was not
Nicolas Capensaf907702021-05-14 11:10:49 -0400931 // instrumented, leading to false-positive uninitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400932 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400933#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500934 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500935}
936
937Nucleus::~Nucleus()
938{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400939 delete Variable::unmaterializedVariables;
940 Variable::unmaterializedVariables = nullptr;
941
Nicolas Capens157ba262019-12-10 17:49:14 -0500942 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500943 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500944
945 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500946 ::allocator = nullptr;
947
Nicolas Capens157ba262019-12-10 17:49:14 -0500948 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500949 ::function = nullptr;
950
Nicolas Capens157ba262019-12-10 17:49:14 -0500951 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500952 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500953
954 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500955 ::elfFile = nullptr;
956
Nicolas Capens157ba262019-12-10 17:49:14 -0500957 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500958 ::out = nullptr;
959
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400960 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500961 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400962 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500963
964 ::codegenMutex.unlock();
965}
966
967void Nucleus::setDefaultConfig(const Config &cfg)
968{
969 std::unique_lock<std::mutex> lock(::defaultConfigLock);
970 ::defaultConfig() = cfg;
971}
972
973void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
974{
975 std::unique_lock<std::mutex> lock(::defaultConfigLock);
976 auto &config = ::defaultConfig();
977 config = cfgEdit.apply(config);
978}
979
980Config Nucleus::getDefaultConfig()
981{
982 std::unique_lock<std::mutex> lock(::defaultConfigLock);
983 return ::defaultConfig();
984}
985
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500986// This function lowers and produces executable binary code in memory for the input functions,
987// and returns a Routine with the entry points to these functions.
988template<size_t Count>
Sean Risser705231f2021-08-19 18:17:24 -0400989static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit *cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500990{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500991 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
992 // and GlobalContext::emitItems.
993
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500994 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500995 {
996 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500997 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500998 }
999
1000 ::context->emitFileHeader();
1001
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001002 // Translate
1003
1004 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -05001005 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001006 Ice::Cfg *currFunc = functions[i];
1007
1008 // Install function allocator in TLS for Cfg-specific container allocators
1009 Ice::CfgLocalAllocatorScope allocScope(currFunc);
1010
1011 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
1012
Nicolas Capens54313fb2021-02-19 14:26:27 -05001013 if(::optimizerCallback)
1014 {
1015 Nucleus::OptimizerReport report;
1016 rr::optimize(currFunc, &report);
1017 ::optimizerCallback(&report);
1018 ::optimizerCallback = nullptr;
1019 }
1020 else
1021 {
1022 rr::optimize(currFunc);
1023 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001024
1025 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001026 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001027
1028 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001029 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001030
1031 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1032
1033 if(subzeroEmitTextAsm)
1034 {
1035 currFunc->emit();
1036 }
1037
1038 currFunc->emitIAS();
Nicolas Capensff010f92021-02-01 12:22:53 -05001039
1040 if(currFunc->hasError())
1041 {
1042 return nullptr;
1043 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001044 }
1045
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001046 // Emit items
1047
1048 ::context->lowerGlobals("");
1049
Nicolas Capens157ba262019-12-10 17:49:14 -05001050 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001051
1052 for(size_t i = 0; i < Count; ++i)
1053 {
1054 Ice::Cfg *currFunc = functions[i];
1055
1056 // Accumulate globals from functions to emit into the "last" section at the end
1057 auto globals = currFunc->getGlobalInits();
1058 if(globals && !globals->empty())
1059 {
1060 ::context->getGlobals()->merge(globals.get());
1061 }
1062
1063 auto assembler = currFunc->releaseAssembler();
1064 assembler->alignFunction();
1065 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1066 }
1067
Nicolas Capens157ba262019-12-10 17:49:14 -05001068 ::context->lowerGlobals("last");
1069 ::context->lowerConstants();
1070 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001071
Nicolas Capens157ba262019-12-10 17:49:14 -05001072 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001073 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001074 objectWriter->writeNonUserSections();
1075
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001076 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001077 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1078 ASSERT(entryPoints.size() == Count);
1079 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001080 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001081 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001082 }
1083
1084 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001085
1086 Routine *handoffRoutine = ::routine;
1087 ::routine = nullptr;
1088
1089 return std::shared_ptr<Routine>(handoffRoutine);
1090}
1091
Sean Risser705231f2021-08-19 18:17:24 -04001092std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit *cfgEdit /* = nullptr */)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001093{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001094 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001095 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1096}
1097
Nicolas Capens157ba262019-12-10 17:49:14 -05001098Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1099{
1100 Ice::Type type = T(t);
1101 int typeSize = Ice::typeWidthInBytes(type);
1102 int totalSize = typeSize * (arraySize ? arraySize : 1);
1103
1104 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1105 auto address = ::function->makeVariable(T(getPointerType(t)));
Nicolas Capens0cfc0432021-02-05 15:18:42 -05001106 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize); // SRoA depends on the alignment to match the type size.
Nicolas Capens157ba262019-12-10 17:49:14 -05001107 ::function->getEntryNode()->getInsts().push_front(alloca);
1108
1109 return V(address);
1110}
1111
1112BasicBlock *Nucleus::createBasicBlock()
1113{
1114 return B(::function->makeNode());
1115}
1116
1117BasicBlock *Nucleus::getInsertBlock()
1118{
1119 return B(::basicBlock);
1120}
1121
1122void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1123{
Nicolas Capens7c296ec2021-02-18 14:10:26 -05001124 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001125
1126 ::basicBlock = basicBlock;
1127}
1128
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001129void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001130{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001131 ASSERT(::function == nullptr);
1132 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001133 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001134 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001135 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001136
1137 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1138
1139 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1140 // becomes invalid if another one is created; for example, when creating await and destroy functions
1141 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1142 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001143 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1144
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001145 ::entryBlock = ::function->getEntryNode();
1146 ::basicBlock = ::function->makeNode();
1147 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001148}
1149
1150Value *Nucleus::getArgument(unsigned int index)
1151{
1152 return V(::function->getArgs()[index]);
1153}
1154
1155void Nucleus::createRetVoid()
1156{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001157 RR_DEBUG_INFO_UPDATE_LOC();
1158
Nicolas Capens157ba262019-12-10 17:49:14 -05001159 // Code generated after this point is unreachable, so any variables
1160 // being read can safely return an undefined value. We have to avoid
1161 // materializing variables after the terminator ret instruction.
1162 Variable::killUnmaterialized();
1163
1164 Ice::InstRet *ret = Ice::InstRet::create(::function);
1165 ::basicBlock->appendInst(ret);
1166}
1167
1168void Nucleus::createRet(Value *v)
1169{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001170 RR_DEBUG_INFO_UPDATE_LOC();
1171
Nicolas Capens157ba262019-12-10 17:49:14 -05001172 // Code generated after this point is unreachable, so any variables
1173 // being read can safely return an undefined value. We have to avoid
1174 // materializing variables after the terminator ret instruction.
1175 Variable::killUnmaterialized();
1176
1177 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1178 ::basicBlock->appendInst(ret);
1179}
1180
1181void Nucleus::createBr(BasicBlock *dest)
1182{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001183 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001184 Variable::materializeAll();
1185
1186 auto br = Ice::InstBr::create(::function, dest);
1187 ::basicBlock->appendInst(br);
1188}
1189
1190void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1191{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001192 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001193 Variable::materializeAll();
1194
1195 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1196 ::basicBlock->appendInst(br);
1197}
1198
1199static bool isCommutative(Ice::InstArithmetic::OpKind op)
1200{
1201 switch(op)
1202 {
Nicolas Capens112faf42019-12-13 17:32:26 -05001203 case Ice::InstArithmetic::Add:
1204 case Ice::InstArithmetic::Fadd:
1205 case Ice::InstArithmetic::Mul:
1206 case Ice::InstArithmetic::Fmul:
1207 case Ice::InstArithmetic::And:
1208 case Ice::InstArithmetic::Or:
1209 case Ice::InstArithmetic::Xor:
1210 return true;
1211 default:
1212 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001213 }
1214}
1215
1216static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1217{
1218 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1219
1220 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1221
1222 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1223 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1224 ::basicBlock->appendInst(arithmetic);
1225
1226 return V(result);
1227}
1228
1229Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1230{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001231 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001232 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1233}
1234
1235Value *Nucleus::createSub(Value *lhs, Value *rhs)
1236{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001237 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001238 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1239}
1240
1241Value *Nucleus::createMul(Value *lhs, Value *rhs)
1242{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001243 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001244 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1245}
1246
1247Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1248{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001249 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001250 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1251}
1252
1253Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1254{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001255 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001256 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1257}
1258
1259Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1260{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001261 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001262 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1263}
1264
1265Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1266{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001267 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001268 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1269}
1270
1271Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1272{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001273 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001274 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1275}
1276
1277Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1278{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001279 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001280 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1281}
1282
1283Value *Nucleus::createURem(Value *lhs, Value *rhs)
1284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001285 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001286 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1287}
1288
1289Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001292 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1293}
1294
1295Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1296{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001297 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001298 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1299 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001300 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001301 return nullptr;
1302}
1303
1304RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1305{
1306 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001307}
1308
1309Value *Nucleus::createShl(Value *lhs, Value *rhs)
1310{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001311 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001312 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1313}
1314
1315Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1316{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001317 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001318 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1319}
1320
1321Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1322{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001323 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001324 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1325}
1326
1327Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1328{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001329 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001330 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1331}
1332
1333Value *Nucleus::createOr(Value *lhs, Value *rhs)
1334{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001335 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001336 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1337}
1338
1339Value *Nucleus::createXor(Value *lhs, Value *rhs)
1340{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001341 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001342 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1343}
1344
1345Value *Nucleus::createNeg(Value *v)
1346{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001347 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001348 return createSub(createNullValue(T(v->getType())), v);
1349}
1350
1351Value *Nucleus::createFNeg(Value *v)
1352{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001353 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001354 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1355 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001356
1357 return createFSub(negativeZero, v);
1358}
1359
1360Value *Nucleus::createNot(Value *v)
1361{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001362 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001363 if(Ice::isScalarIntegerType(v->getType()))
1364 {
1365 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1366 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001367 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001368 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001369 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001370 return createXor(v, createConstantVector(c, T(v->getType())));
1371 }
1372}
1373
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001374static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1375{
1376#if defined(__i386__) || defined(__x86_64__)
1377 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1378 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1379 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1380#else
1381 if(atomic)
1382 {
1383 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1384 }
1385 if(memoryOrder != std::memory_order_relaxed)
1386 {
1387 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1388 }
1389#endif
1390
1391 // Vulkan doesn't allow sequential memory order
1392 ASSERT(memoryOrder != std::memory_order_seq_cst);
1393}
1394
Nicolas Capens157ba262019-12-10 17:49:14 -05001395Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1396{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001397 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001398 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001399
1400 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001401 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001402
Ben Clayton713b8d32019-12-17 20:37:56 +00001403 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001404 {
1405 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001406 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001407 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001408 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001409 auto pointer = RValue<Pointer<Byte>>(ptr);
1410 Int x = *Pointer<Int>(pointer);
1411
1412 Int4 vector;
1413 vector = Insert(vector, x, 0);
1414
Antonio Maiorano02a39532020-01-21 15:15:34 -05001415 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001416 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1417 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001418 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001419 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001420 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001421 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001422 auto pointer = RValue<Pointer<Byte>>(ptr);
1423 Int x = *Pointer<Int>(pointer);
1424 Int y = *Pointer<Int>(pointer + 4);
1425
1426 Int4 vector;
1427 vector = Insert(vector, x, 0);
1428 vector = Insert(vector, y, 1);
1429
Antonio Maiorano02a39532020-01-21 15:15:34 -05001430 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001431 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1432 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001433 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001434 else
1435 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001436 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001437 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001438 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001439 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Antonio Maiorano02a39532020-01-21 15:15:34 -05001440 result = ::function->makeVariable(T(type));
Nicolas Capens33a77f72021-02-08 15:04:38 -05001441 auto load = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001442 load->addArg(ptr);
1443 load->addArg(::context->getConstantInt32(typeSize(type)));
1444 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001445 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001446 }
1447 else
1448 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001449 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001450 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001451
Antonio Maiorano02a39532020-01-21 15:15:34 -05001452 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001453 return V(result);
1454}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001455
Nicolas Capens157ba262019-12-10 17:49:14 -05001456Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1457{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001458 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001459 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001460
Ben Clayton713b8d32019-12-17 20:37:56 +00001461#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001462 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001463 if(align != 0)
1464 {
1465 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1466 call->addArg(ptr);
1467 call->addArg(::context->getConstantInt64(typeSize(type)));
1468 ::basicBlock->appendInst(call);
1469 }
1470#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001471
Nicolas Capens157ba262019-12-10 17:49:14 -05001472 int valueType = (int)reinterpret_cast<intptr_t>(type);
1473
Ben Clayton713b8d32019-12-17 20:37:56 +00001474 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001475 {
1476 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001477 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001478 if(typeSize(type) == 4)
1479 {
1480 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1481 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1482 ::basicBlock->appendInst(bitcast);
1483
1484 RValue<Int4> v(V(vector));
1485
1486 auto pointer = RValue<Pointer<Byte>>(ptr);
1487 Int x = Extract(v, 0);
1488 *Pointer<Int>(pointer) = x;
1489 }
1490 else if(typeSize(type) == 8)
1491 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001492 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001493 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1494 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1495 ::basicBlock->appendInst(bitcast);
1496
1497 RValue<Int4> v(V(vector));
1498
1499 auto pointer = RValue<Pointer<Byte>>(ptr);
1500 Int x = Extract(v, 0);
1501 *Pointer<Int>(pointer) = x;
1502 Int y = Extract(v, 1);
1503 *Pointer<Int>(pointer + 4) = y;
1504 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001505 else
1506 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001507 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001508 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001509 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001510 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001511 auto store = Ice::InstIntrinsic::create(::function, 3, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001512 store->addArg(value);
1513 store->addArg(ptr);
1514 store->addArg(::context->getConstantInt32(typeSize(type)));
1515 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001516 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001517 }
1518 else
1519 {
1520 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001521
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001522 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001523 ::basicBlock->appendInst(store);
1524 }
1525
1526 return value;
1527}
1528
1529Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1530{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001531 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001532 ASSERT(index->getType() == Ice::IceType_i32);
1533
1534 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1535 {
1536 int32_t offset = constant->getValue() * (int)typeSize(type);
1537
1538 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001539 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001540 return ptr;
1541 }
1542
Nicolas Capens157ba262019-12-10 17:49:14 -05001543 return createAdd(ptr, createConstantInt(offset));
1544 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001545
Nicolas Capens157ba262019-12-10 17:49:14 -05001546 if(!Ice::isByteSizedType(T(type)))
1547 {
1548 index = createMul(index, createConstantInt((int)typeSize(type)));
1549 }
1550
Ben Clayton713b8d32019-12-17 20:37:56 +00001551 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001552 {
1553 if(unsignedIndex)
1554 {
1555 index = createZExt(index, T(Ice::IceType_i64));
1556 }
1557 else
1558 {
1559 index = createSExt(index, T(Ice::IceType_i64));
1560 }
1561 }
1562
1563 return createAdd(ptr, index);
1564}
1565
Antonio Maiorano370cba52019-12-31 11:36:07 -05001566static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1567{
1568 Ice::Variable *result = ::function->makeVariable(value->getType());
1569
1570 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001571 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001572 auto op = ::context->getConstantInt32(rmwOp);
1573 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1574 inst->addArg(op);
1575 inst->addArg(ptr);
1576 inst->addArg(value);
1577 inst->addArg(order);
1578 ::basicBlock->appendInst(inst);
1579
1580 return V(result);
1581}
1582
Nicolas Capens157ba262019-12-10 17:49:14 -05001583Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1584{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001585 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001586 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001587}
1588
1589Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1590{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001591 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001592 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001593}
1594
1595Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1596{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001597 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001598 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001599}
1600
1601Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1602{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001603 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001604 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001605}
1606
1607Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1608{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001609 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001610 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001611}
1612
1613Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1614{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001615 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001616 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001617}
1618
1619Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1620{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001621 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001622 Ice::Variable *result = ::function->makeVariable(value->getType());
1623
1624 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001625 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001626 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1627 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1628 inst->addArg(ptr);
1629 inst->addArg(compare);
1630 inst->addArg(value);
1631 inst->addArg(orderEq);
1632 inst->addArg(orderNeq);
1633 ::basicBlock->appendInst(inst);
1634
1635 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001636}
1637
1638static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1639{
1640 if(v->getType() == T(destType))
1641 {
1642 return v;
1643 }
1644
1645 Ice::Variable *result = ::function->makeVariable(T(destType));
1646 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1647 ::basicBlock->appendInst(cast);
1648
1649 return V(result);
1650}
1651
1652Value *Nucleus::createTrunc(Value *v, Type *destType)
1653{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001654 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001655 return createCast(Ice::InstCast::Trunc, v, destType);
1656}
1657
1658Value *Nucleus::createZExt(Value *v, Type *destType)
1659{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001660 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001661 return createCast(Ice::InstCast::Zext, v, destType);
1662}
1663
1664Value *Nucleus::createSExt(Value *v, Type *destType)
1665{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001666 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001667 return createCast(Ice::InstCast::Sext, v, destType);
1668}
1669
1670Value *Nucleus::createFPToUI(Value *v, Type *destType)
1671{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001672 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001673 return createCast(Ice::InstCast::Fptoui, v, destType);
1674}
1675
1676Value *Nucleus::createFPToSI(Value *v, Type *destType)
1677{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001678 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001679 return createCast(Ice::InstCast::Fptosi, v, destType);
1680}
1681
1682Value *Nucleus::createSIToFP(Value *v, Type *destType)
1683{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001684 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001685 return createCast(Ice::InstCast::Sitofp, v, destType);
1686}
1687
1688Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1689{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001690 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001691 return createCast(Ice::InstCast::Fptrunc, v, destType);
1692}
1693
1694Value *Nucleus::createFPExt(Value *v, Type *destType)
1695{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001696 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001697 return createCast(Ice::InstCast::Fpext, v, destType);
1698}
1699
1700Value *Nucleus::createBitCast(Value *v, Type *destType)
1701{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001702 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001703 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1704 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1705 // emulate them by writing to the stack and reading back as the destination type.
1706 if(emulateMismatchedBitCast)
1707 {
1708 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1709 {
1710 Value *address = allocateStackVariable(destType);
1711 createStore(v, address, T(v->getType()));
1712 return createLoad(address, destType);
1713 }
1714 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1715 {
1716 Value *address = allocateStackVariable(T(v->getType()));
1717 createStore(v, address, T(v->getType()));
1718 return createLoad(address, destType);
1719 }
1720 }
1721
1722 return createCast(Ice::InstCast::Bitcast, v, destType);
1723}
1724
1725static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1726{
1727 ASSERT(lhs->getType() == rhs->getType());
1728
1729 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1730 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1731 ::basicBlock->appendInst(cmp);
1732
1733 return V(result);
1734}
1735
Nicolas Capens157ba262019-12-10 17:49:14 -05001736Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1737{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001738 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001739 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1740}
1741
1742Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1743{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001744 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001745 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1746}
1747
1748Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1749{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001750 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001751 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1752}
1753
1754Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1755{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001756 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001757 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1758}
1759
1760Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1761{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001762 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001763 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1764}
1765
1766Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1767{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001768 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001769 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1770}
1771
1772Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1773{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001774 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001775 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1776}
1777
1778Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1779{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001780 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001781 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1782}
1783
1784Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1785{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001786 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001787 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1788}
1789
1790Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1791{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001792 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001793 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1794}
1795
1796static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1797{
1798 ASSERT(lhs->getType() == rhs->getType());
1799 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1800
1801 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1802 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1803 ::basicBlock->appendInst(cmp);
1804
1805 return V(result);
1806}
1807
1808Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1809{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001810 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001811 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1812}
1813
1814Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1815{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001816 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001817 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1818}
1819
1820Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1821{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001822 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001823 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1824}
1825
1826Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1827{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001828 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001829 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1830}
1831
1832Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1833{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001834 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001835 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1836}
1837
1838Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1839{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001840 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001841 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1842}
1843
1844Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1845{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001846 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001847 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1848}
1849
1850Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1851{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001852 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001853 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1854}
1855
1856Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1857{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001858 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001859 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1860}
1861
1862Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1863{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001864 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001865 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1866}
1867
1868Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1869{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001870 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001871 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1872}
1873
1874Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1875{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001876 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001877 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1878}
1879
1880Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1881{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001882 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001883 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1884}
1885
1886Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1887{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001888 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001889 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1890}
1891
1892Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1893{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001894 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001895 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001896 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001897 ::basicBlock->appendInst(extract);
1898
1899 return V(result);
1900}
1901
1902Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1903{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001904 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001905 auto result = ::function->makeVariable(vector->getType());
1906 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1907 ::basicBlock->appendInst(insert);
1908
1909 return V(result);
1910}
1911
1912Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1913{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001914 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001915 ASSERT(V1->getType() == V2->getType());
1916
1917 int size = Ice::typeNumElements(V1->getType());
1918 auto result = ::function->makeVariable(V1->getType());
1919 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1920
1921 for(int i = 0; i < size; i++)
1922 {
1923 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1924 }
1925
1926 ::basicBlock->appendInst(shuffle);
1927
1928 return V(result);
1929}
1930
1931Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1932{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001933 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001934 ASSERT(ifTrue->getType() == ifFalse->getType());
1935
1936 auto result = ::function->makeVariable(ifTrue->getType());
1937 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1938 ::basicBlock->appendInst(select);
1939
1940 return V(result);
1941}
1942
1943SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1944{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001945 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001946 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1947 ::basicBlock->appendInst(switchInst);
1948
Ben Clayton713b8d32019-12-17 20:37:56 +00001949 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001950}
1951
1952void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1953{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001954 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001955 switchCases->addBranch(label, label, branch);
1956}
1957
1958void Nucleus::createUnreachable()
1959{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001960 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001961 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1962 ::basicBlock->appendInst(unreachable);
1963}
1964
Antonio Maiorano62427e02020-02-13 09:18:05 -05001965Type *Nucleus::getType(Value *value)
1966{
1967 return T(V(value)->getType());
1968}
1969
1970Type *Nucleus::getContainedType(Type *vectorType)
1971{
1972 Ice::Type vecTy = T(vectorType);
1973 switch(vecTy)
1974 {
Nicolas Capens112faf42019-12-13 17:32:26 -05001975 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1976 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1977 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1978 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1979 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1980 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1981 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1982 default:
1983 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1984 return {};
Antonio Maiorano62427e02020-02-13 09:18:05 -05001985 }
1986}
1987
Nicolas Capens157ba262019-12-10 17:49:14 -05001988Type *Nucleus::getPointerType(Type *ElementType)
1989{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001990 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001991}
1992
Antonio Maiorano62427e02020-02-13 09:18:05 -05001993static constexpr Ice::Type getNaturalIntType()
1994{
1995 constexpr size_t intSize = sizeof(int);
1996 static_assert(intSize == 4 || intSize == 8, "");
1997 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1998}
1999
2000Type *Nucleus::getPrintfStorageType(Type *valueType)
2001{
2002 Ice::Type valueTy = T(valueType);
2003 switch(valueTy)
2004 {
Nicolas Capens112faf42019-12-13 17:32:26 -05002005 case Ice::IceType_i32:
2006 return T(getNaturalIntType());
Antonio Maiorano62427e02020-02-13 09:18:05 -05002007
Nicolas Capens112faf42019-12-13 17:32:26 -05002008 case Ice::IceType_f32:
2009 return T(Ice::IceType_f64);
Antonio Maiorano62427e02020-02-13 09:18:05 -05002010
Nicolas Capens112faf42019-12-13 17:32:26 -05002011 default:
2012 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
2013 return {};
Antonio Maiorano62427e02020-02-13 09:18:05 -05002014 }
2015}
2016
Nicolas Capens157ba262019-12-10 17:49:14 -05002017Value *Nucleus::createNullValue(Type *Ty)
2018{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002019 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002020 if(Ice::isVectorType(T(Ty)))
2021 {
2022 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002023 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002024 return createConstantVector(c, Ty);
2025 }
2026 else
2027 {
2028 return V(::context->getConstantZero(T(Ty)));
2029 }
2030}
2031
2032Value *Nucleus::createConstantLong(int64_t i)
2033{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002034 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002035 return V(::context->getConstantInt64(i));
2036}
2037
2038Value *Nucleus::createConstantInt(int i)
2039{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002040 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002041 return V(::context->getConstantInt32(i));
2042}
2043
2044Value *Nucleus::createConstantInt(unsigned int i)
2045{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002046 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002047 return V(::context->getConstantInt32(i));
2048}
2049
2050Value *Nucleus::createConstantBool(bool b)
2051{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002052 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002053 return V(::context->getConstantInt1(b));
2054}
2055
2056Value *Nucleus::createConstantByte(signed char i)
2057{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002058 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002059 return V(::context->getConstantInt8(i));
2060}
2061
2062Value *Nucleus::createConstantByte(unsigned char i)
2063{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002064 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002065 return V(::context->getConstantInt8(i));
2066}
2067
2068Value *Nucleus::createConstantShort(short i)
2069{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002070 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002071 return V(::context->getConstantInt16(i));
2072}
2073
2074Value *Nucleus::createConstantShort(unsigned short i)
2075{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002076 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002077 return V(::context->getConstantInt16(i));
2078}
2079
2080Value *Nucleus::createConstantFloat(float x)
2081{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002082 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002083 return V(::context->getConstantFloat(x));
2084}
2085
2086Value *Nucleus::createNullPointer(Type *Ty)
2087{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002088 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002089 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002090}
2091
Antonio Maiorano02a39532020-01-21 15:15:34 -05002092static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2093{
2094 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2095}
2096
Nicolas Capens157ba262019-12-10 17:49:14 -05002097Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2098{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002099 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002100 const int vectorSize = 16;
2101 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2102 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002103
2104 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002105 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002106
Antonio Maioranoa0957112020-03-04 15:06:19 -05002107 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002108 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002109
2110 switch((int)reinterpret_cast<intptr_t>(type))
2111 {
Nicolas Capens112faf42019-12-13 17:32:26 -05002112 case Ice::IceType_v4i32:
2113 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002114 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002115 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002116 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002117 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002118 }
2119 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002120 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002121 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002122 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002123 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002124 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002125 }
2126 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002127 case Ice::IceType_v8i16:
2128 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002129 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002130 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002131 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002132 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002133 }
2134 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002135 case Ice::IceType_v16i8:
2136 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002137 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002138 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002139 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002140 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002141 }
2142 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002143 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002144 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002145 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002146 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002147 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002148 }
2149 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002150 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002151 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002152 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002153 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002154 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002155 }
2156 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002157 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002158 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002159 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002160 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002161 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002162 }
2163 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002164 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002165 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002166 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002167 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002168 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002169 }
2170 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002171 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002172 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002173 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002174 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002175 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002176 }
2177 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002178 default:
2179 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002180 }
2181
Antonio Maiorano02a39532020-01-21 15:15:34 -05002182 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002183
Antonio Maiorano02a39532020-01-21 15:15:34 -05002184 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002185 return V(result);
2186}
2187
2188Value *Nucleus::createConstantVector(const double *constants, Type *type)
2189{
Ben Clayton713b8d32019-12-17 20:37:56 +00002190 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002191}
2192
Antonio Maiorano62427e02020-02-13 09:18:05 -05002193Value *Nucleus::createConstantString(const char *v)
2194{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002195 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002196 return V(IceConstantData(v, strlen(v) + 1));
2197}
2198
Nicolas Capens54313fb2021-02-19 14:26:27 -05002199void Nucleus::setOptimizerCallback(OptimizerCallback *callback)
2200{
2201 ::optimizerCallback = callback;
2202}
2203
Nicolas Capens519cf222020-05-08 15:27:19 -04002204Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002205{
2206 return T(Ice::IceType_void);
2207}
2208
Nicolas Capens519cf222020-05-08 15:27:19 -04002209Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002210{
2211 return T(Ice::IceType_i1);
2212}
2213
Nicolas Capens519cf222020-05-08 15:27:19 -04002214Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002215{
2216 return T(Ice::IceType_i8);
2217}
2218
Nicolas Capens519cf222020-05-08 15:27:19 -04002219Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002220{
2221 return T(Ice::IceType_i8);
2222}
2223
Nicolas Capens519cf222020-05-08 15:27:19 -04002224Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002225{
2226 return T(Ice::IceType_i16);
2227}
2228
Nicolas Capens519cf222020-05-08 15:27:19 -04002229Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002230{
2231 return T(Ice::IceType_i16);
2232}
2233
Nicolas Capens519cf222020-05-08 15:27:19 -04002234Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002235{
2236 return T(Type_v4i8);
2237}
2238
Nicolas Capens519cf222020-05-08 15:27:19 -04002239Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002240{
2241 return T(Type_v4i8);
2242}
2243
Ben Clayton713b8d32019-12-17 20:37:56 +00002244namespace {
2245RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002246{
Ben Clayton713b8d32019-12-17 20:37:56 +00002247 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002248}
2249
Ben Clayton713b8d32019-12-17 20:37:56 +00002250RValue<Byte> Extract(RValue<Byte8> val, int i)
2251{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002252 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002253}
2254
2255RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2256{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002257 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002258}
2259} // namespace
2260
Nicolas Capens157ba262019-12-10 17:49:14 -05002261RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2262{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002263 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002264 if(emulateIntrinsics)
2265 {
2266 Byte8 result;
2267 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2268 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2269 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2270 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2271 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2272 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2273 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2274 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2275
2276 return result;
2277 }
2278 else
2279 {
2280 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002281 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002282 auto paddusb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002283 paddusb->addArg(x.value());
2284 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002285 ::basicBlock->appendInst(paddusb);
2286
2287 return RValue<Byte8>(V(result));
2288 }
2289}
2290
2291RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2292{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002293 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002294 if(emulateIntrinsics)
2295 {
2296 Byte8 result;
2297 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2298 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2299 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2300 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2301 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2302 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2303 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2304 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2305
2306 return result;
2307 }
2308 else
2309 {
2310 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002311 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002312 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002313 psubusw->addArg(x.value());
2314 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002315 ::basicBlock->appendInst(psubusw);
2316
2317 return RValue<Byte8>(V(result));
2318 }
2319}
2320
2321RValue<SByte> Extract(RValue<SByte8> val, int i)
2322{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002323 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002324 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002325}
2326
2327RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2328{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002329 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002330 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002331}
2332
2333RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2334{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002335 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002336 if(emulateIntrinsics)
2337 {
2338 SByte8 result;
2339 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2340 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2341 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2342 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2343 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2344 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2345 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2346 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2347
2348 return result;
2349 }
2350 else
2351 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002352#if defined(__i386__) || defined(__x86_64__)
2353 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2354 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2355 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002356
Ben Clayton713b8d32019-12-17 20:37:56 +00002357 return As<SByte8>(hi | lo);
2358#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002359 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002360#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002361 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002362}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002363
Nicolas Capens157ba262019-12-10 17:49:14 -05002364RValue<Int> SignMask(RValue<Byte8> x)
2365{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002366 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002367 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002368 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002369 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2370 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002371 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002372 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002373 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002374 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002375 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002376 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002377 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002378 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002379
Nicolas Capens157ba262019-12-10 17:49:14 -05002380 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002381 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002382}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002383
2384// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2385// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002386// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002387// }
2388
Nicolas Capens157ba262019-12-10 17:49:14 -05002389RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2390{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002391 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002392 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002393}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002394
Nicolas Capens519cf222020-05-08 15:27:19 -04002395Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002396{
2397 return T(Type_v8i8);
2398}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002399
Nicolas Capens598f8d82016-09-26 15:09:10 -04002400// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2401// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002402// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002403// }
2404
2405// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2406// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002407// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002408// }
2409
Nicolas Capens157ba262019-12-10 17:49:14 -05002410RValue<SByte> SaturateSigned(RValue<Short> x)
2411{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002412 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002413 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2414}
2415
2416RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002418 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002419 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002420 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002421 SByte8 result;
2422 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2423 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2424 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2425 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2426 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2427 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2428 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2429 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002430
Nicolas Capens157ba262019-12-10 17:49:14 -05002431 return result;
2432 }
2433 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002434 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002435 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002436 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002437 auto paddsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002438 paddsb->addArg(x.value());
2439 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002440 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002441
Nicolas Capens157ba262019-12-10 17:49:14 -05002442 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002443 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002444}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002445
Nicolas Capens157ba262019-12-10 17:49:14 -05002446RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2447{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002448 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002449 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002450 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002451 SByte8 result;
2452 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2453 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2454 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2455 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2456 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2457 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2458 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2459 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002460
Nicolas Capens157ba262019-12-10 17:49:14 -05002461 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002462 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002463 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002464 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002465 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002466 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002467 auto psubsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002468 psubsb->addArg(x.value());
2469 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002470 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002471
Nicolas Capens157ba262019-12-10 17:49:14 -05002472 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002473 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002474}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002475
Nicolas Capens157ba262019-12-10 17:49:14 -05002476RValue<Int> SignMask(RValue<SByte8> x)
2477{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002478 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002479 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002480 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002481 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2482 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002483 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002484 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002485 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002486 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002487 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002488 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002489 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002490 ::basicBlock->appendInst(movmsk);
2491
2492 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002493 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002494}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002495
Nicolas Capens157ba262019-12-10 17:49:14 -05002496RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2497{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002498 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002499 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002500}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002501
Nicolas Capens157ba262019-12-10 17:49:14 -05002502RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2503{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002504 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002505 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002506}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002507
Nicolas Capens519cf222020-05-08 15:27:19 -04002508Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002509{
2510 return T(Type_v8i8);
2511}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002512
Nicolas Capens519cf222020-05-08 15:27:19 -04002513Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002514{
2515 return T(Ice::IceType_v16i8);
2516}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002517
Nicolas Capens519cf222020-05-08 15:27:19 -04002518Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002519{
2520 return T(Ice::IceType_v16i8);
2521}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002522
Nicolas Capens519cf222020-05-08 15:27:19 -04002523Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002524{
2525 return T(Type_v2i16);
2526}
Nicolas Capensd4227962016-11-09 14:24:25 -05002527
Nicolas Capens519cf222020-05-08 15:27:19 -04002528Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002529{
2530 return T(Type_v2i16);
2531}
Nicolas Capensd4227962016-11-09 14:24:25 -05002532
Nicolas Capens157ba262019-12-10 17:49:14 -05002533Short4::Short4(RValue<Int4> cast)
2534{
Ben Clayton713b8d32019-12-17 20:37:56 +00002535 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002536 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002537 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2538
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002539 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002540 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002541
2542 storeValue(short4);
2543}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002544
2545// Short4::Short4(RValue<Float> cast)
2546// {
2547// }
2548
Nicolas Capens157ba262019-12-10 17:49:14 -05002549Short4::Short4(RValue<Float4> cast)
2550{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002551 // TODO(b/150791192): Generalize and optimize
2552 auto smin = std::numeric_limits<short>::min();
2553 auto smax = std::numeric_limits<short>::max();
2554 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002555}
2556
2557RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2558{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002559 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002560 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002561 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002562 Short4 result;
2563 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2564 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2565 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2566 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002567
2568 return result;
2569 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002570 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002571 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002572 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002573 }
2574}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002575
Nicolas Capens157ba262019-12-10 17:49:14 -05002576RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2577{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002578 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002579 if(emulateIntrinsics)
2580 {
2581 Short4 result;
2582 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2583 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2584 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2585 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2586
2587 return result;
2588 }
2589 else
2590 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002591 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002592 }
2593}
2594
2595RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2596{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002597 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002598 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002599 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002600 ::basicBlock->appendInst(cmp);
2601
2602 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002603 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002604 ::basicBlock->appendInst(select);
2605
2606 return RValue<Short4>(V(result));
2607}
2608
2609RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2610{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002611 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002612 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002613 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002614 ::basicBlock->appendInst(cmp);
2615
2616 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002617 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002618 ::basicBlock->appendInst(select);
2619
2620 return RValue<Short4>(V(result));
2621}
2622
2623RValue<Short> SaturateSigned(RValue<Int> x)
2624{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002625 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002626 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2627}
2628
2629RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2630{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002631 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002632 if(emulateIntrinsics)
2633 {
2634 Short4 result;
2635 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2636 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2637 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2638 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2639
2640 return result;
2641 }
2642 else
2643 {
2644 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002645 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002646 auto paddsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002647 paddsw->addArg(x.value());
2648 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002649 ::basicBlock->appendInst(paddsw);
2650
2651 return RValue<Short4>(V(result));
2652 }
2653}
2654
2655RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2656{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002657 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002658 if(emulateIntrinsics)
2659 {
2660 Short4 result;
2661 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2662 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2663 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2664 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2665
2666 return result;
2667 }
2668 else
2669 {
2670 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002671 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002672 auto psubsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002673 psubsw->addArg(x.value());
2674 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002675 ::basicBlock->appendInst(psubsw);
2676
2677 return RValue<Short4>(V(result));
2678 }
2679}
2680
2681RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2682{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002683 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002684 if(emulateIntrinsics)
2685 {
2686 Short4 result;
2687 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2688 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2689 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2690 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2691
2692 return result;
2693 }
2694 else
2695 {
2696 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002697 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002698 auto pmulhw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002699 pmulhw->addArg(x.value());
2700 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002701 ::basicBlock->appendInst(pmulhw);
2702
2703 return RValue<Short4>(V(result));
2704 }
2705}
2706
2707RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2708{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002709 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002710 if(emulateIntrinsics)
2711 {
2712 Int2 result;
2713 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2714 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2715
2716 return result;
2717 }
2718 else
2719 {
2720 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002721 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002722 auto pmaddwd = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002723 pmaddwd->addArg(x.value());
2724 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002725 ::basicBlock->appendInst(pmaddwd);
2726
2727 return As<Int2>(V(result));
2728 }
2729}
2730
2731RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2732{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002733 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002734 if(emulateIntrinsics)
2735 {
2736 SByte8 result;
2737 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2738 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2739 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2740 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2741 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2742 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2743 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2744 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2745
2746 return result;
2747 }
2748 else
2749 {
2750 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002751 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002752 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002753 pack->addArg(x.value());
2754 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002755 ::basicBlock->appendInst(pack);
2756
2757 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2758 }
2759}
2760
2761RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2762{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002763 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002764 if(emulateIntrinsics)
2765 {
2766 Byte8 result;
2767 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2768 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2769 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2770 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2771 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2772 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2773 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2774 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2775
2776 return result;
2777 }
2778 else
2779 {
2780 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002781 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002782 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002783 pack->addArg(x.value());
2784 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002785 ::basicBlock->appendInst(pack);
2786
2787 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2788 }
2789}
2790
2791RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2792{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002793 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002794 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002795}
2796
2797RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2798{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002799 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002800 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002801}
2802
Nicolas Capens519cf222020-05-08 15:27:19 -04002803Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002804{
2805 return T(Type_v4i16);
2806}
2807
2808UShort4::UShort4(RValue<Float4> cast, bool saturate)
2809{
2810 if(saturate)
2811 {
2812 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002813 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002814 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2815 // PackUnsigned takes care of 0x0000 saturation.
2816 Int4 int4(Min(cast, Float4(0xFFFF)));
2817 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002818 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002819 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002820 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002821 // ARM saturates the 32-bit integer result on overflow/undeflow.
2822 Int4 int4(cast);
2823 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002824 }
2825 else
2826 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002827 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002828 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002829 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002830 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002831 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002832 *this = Short4(Int4(cast));
2833 }
2834}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002835
Nicolas Capens157ba262019-12-10 17:49:14 -05002836RValue<UShort> Extract(RValue<UShort4> val, int i)
2837{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002838 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002839}
2840
2841RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2842{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002843 return RValue<UShort4>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002844}
2845
2846RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2847{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002848 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002849 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002850
Nicolas Capens157ba262019-12-10 17:49:14 -05002851 {
2852 UShort4 result;
2853 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2854 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2855 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2856 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2857
2858 return result;
2859 }
2860 else
2861 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002862 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002863 }
2864}
2865
2866RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2867{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002868 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002869 if(emulateIntrinsics)
2870 {
2871 UShort4 result;
2872 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2873 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2874 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2875 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2876
2877 return result;
2878 }
2879 else
2880 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002881 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002882 }
2883}
2884
2885RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2886{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002887 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002888 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002889 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002890 ::basicBlock->appendInst(cmp);
2891
2892 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002893 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002894 ::basicBlock->appendInst(select);
2895
2896 return RValue<UShort4>(V(result));
2897}
2898
2899RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2900{
2901 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002902 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002903 ::basicBlock->appendInst(cmp);
2904
2905 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002906 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002907 ::basicBlock->appendInst(select);
2908
2909 return RValue<UShort4>(V(result));
2910}
2911
2912RValue<UShort> SaturateUnsigned(RValue<Int> x)
2913{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002914 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002915 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2916}
2917
2918RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2919{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002920 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002921 if(emulateIntrinsics)
2922 {
2923 UShort4 result;
2924 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2925 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2926 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2927 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2928
2929 return result;
2930 }
2931 else
2932 {
2933 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002934 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002935 auto paddusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002936 paddusw->addArg(x.value());
2937 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002938 ::basicBlock->appendInst(paddusw);
2939
2940 return RValue<UShort4>(V(result));
2941 }
2942}
2943
2944RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2945{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002946 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002947 if(emulateIntrinsics)
2948 {
2949 UShort4 result;
2950 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2951 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2952 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2953 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2954
2955 return result;
2956 }
2957 else
2958 {
2959 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002960 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002961 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002962 psubusw->addArg(x.value());
2963 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002964 ::basicBlock->appendInst(psubusw);
2965
2966 return RValue<UShort4>(V(result));
2967 }
2968}
2969
2970RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2971{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002972 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002973 if(emulateIntrinsics)
2974 {
2975 UShort4 result;
2976 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2977 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2978 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2979 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2980
2981 return result;
2982 }
2983 else
2984 {
2985 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002986 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002987 auto pmulhuw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002988 pmulhuw->addArg(x.value());
2989 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002990 ::basicBlock->appendInst(pmulhuw);
2991
2992 return RValue<UShort4>(V(result));
2993 }
2994}
2995
2996RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2997{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002998 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002999 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3000
3001 // Scalarized implementation.
3002 Int4 result;
3003 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
3004 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
3005 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
3006 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
3007
3008 return result;
3009}
3010
3011RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3012{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003013 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003014 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3015
3016 if(false) // Partial product based implementation.
3017 {
3018 auto xh = x >> 16;
3019 auto yh = y >> 16;
3020 auto xl = x & UInt4(0x0000FFFF);
3021 auto yl = y & UInt4(0x0000FFFF);
3022 auto xlyh = xl * yh;
3023 auto xhyl = xh * yl;
3024 auto xlyhh = xlyh >> 16;
3025 auto xhylh = xhyl >> 16;
3026 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3027 auto xhyll = xhyl & UInt4(0x0000FFFF);
3028 auto xlylh = (xl * yl) >> 16;
3029 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3030
3031 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003032 }
3033
Nicolas Capens157ba262019-12-10 17:49:14 -05003034 // Scalarized implementation.
3035 Int4 result;
3036 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3037 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3038 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3039 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3040
3041 return As<UInt4>(result);
3042}
3043
3044RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3045{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003046 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003047 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003048 return UShort4(0);
3049}
3050
Nicolas Capens519cf222020-05-08 15:27:19 -04003051Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003052{
3053 return T(Type_v4i16);
3054}
3055
3056RValue<Short> Extract(RValue<Short8> val, int i)
3057{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003058 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003059 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003060}
3061
3062RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3063{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003064 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003065 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003066}
3067
3068RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3069{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003070 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003071 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003072 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003073 Short8 result;
3074 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3075 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3076 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3077 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3078 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3079 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3080 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3081 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003082
Nicolas Capens157ba262019-12-10 17:49:14 -05003083 return result;
3084 }
3085 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003086 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003087 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003088 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003089}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003090
Nicolas Capens157ba262019-12-10 17:49:14 -05003091RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3092{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003093 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003094 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003095 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003096 Short8 result;
3097 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3098 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3099 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3100 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3101 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3102 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3103 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3104 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003105
Nicolas Capens157ba262019-12-10 17:49:14 -05003106 return result;
3107 }
3108 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003109 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003110 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003111 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003112}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003113
Nicolas Capens157ba262019-12-10 17:49:14 -05003114RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3115{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003116 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003117 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003118 return Int4(0);
3119}
3120
3121RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3122{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003123 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003124 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003125 return Short8(0);
3126}
3127
Nicolas Capens519cf222020-05-08 15:27:19 -04003128Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003129{
3130 return T(Ice::IceType_v8i16);
3131}
3132
3133RValue<UShort> Extract(RValue<UShort8> val, int i)
3134{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003135 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003136 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003137}
3138
3139RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3140{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003141 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003142 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003143}
3144
3145RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3146{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003147 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003148 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003149 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003150 UShort8 result;
3151 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3152 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3153 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3154 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3155 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3156 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3157 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3158 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003159
Nicolas Capens157ba262019-12-10 17:49:14 -05003160 return result;
3161 }
3162 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003163 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003164 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003165 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003166}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003167
Nicolas Capens157ba262019-12-10 17:49:14 -05003168RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3169{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003170 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003171 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003172 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003173 UShort8 result;
3174 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3175 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3176 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3177 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3178 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3179 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3180 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3181 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003182
Nicolas Capens157ba262019-12-10 17:49:14 -05003183 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003184 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003185 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003186 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003187 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003188 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003189}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003190
Nicolas Capens157ba262019-12-10 17:49:14 -05003191RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3192{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003193 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003194 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003195 return UShort8(0);
3196}
3197
Nicolas Capens519cf222020-05-08 15:27:19 -04003198Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003199{
3200 return T(Ice::IceType_v8i16);
3201}
3202
Ben Clayton713b8d32019-12-17 20:37:56 +00003203RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003204{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003205 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003206 RValue<Int> res = val;
3207 val += 1;
3208 return res;
3209}
3210
Ben Clayton713b8d32019-12-17 20:37:56 +00003211const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003212{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003213 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003214 val += 1;
3215 return val;
3216}
3217
Ben Clayton713b8d32019-12-17 20:37:56 +00003218RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003219{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003220 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003221 RValue<Int> res = val;
3222 val -= 1;
3223 return res;
3224}
3225
Ben Clayton713b8d32019-12-17 20:37:56 +00003226const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003227{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003228 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003229 val -= 1;
3230 return val;
3231}
3232
3233RValue<Int> RoundInt(RValue<Float> cast)
3234{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003235 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003236 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003237 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003238 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3239 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003240 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003241 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003242 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003243 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003244 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003245 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003246 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003247 ::basicBlock->appendInst(nearbyint);
3248
3249 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003250 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003251}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003252
Nicolas Capens519cf222020-05-08 15:27:19 -04003253Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003254{
3255 return T(Ice::IceType_i32);
3256}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003257
Nicolas Capens519cf222020-05-08 15:27:19 -04003258Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003259{
3260 return T(Ice::IceType_i64);
3261}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003262
Nicolas Capens157ba262019-12-10 17:49:14 -05003263UInt::UInt(RValue<Float> cast)
3264{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003265 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003266 // Smallest positive value representable in UInt, but not in Int
3267 const unsigned int ustart = 0x80000000u;
3268 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003269
Nicolas Capens157ba262019-12-10 17:49:14 -05003270 // If the value is negative, store 0, otherwise store the result of the conversion
3271 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003272 // Check if the value can be represented as an Int
3273 IfThenElse(cast >= ustartf,
3274 // If the value is too large, subtract ustart and re-add it after conversion.
3275 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3276 // Otherwise, just convert normally
3277 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003278 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003279}
Nicolas Capensa8086512016-11-07 17:32:17 -05003280
Ben Clayton713b8d32019-12-17 20:37:56 +00003281RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003282{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003283 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003284 RValue<UInt> res = val;
3285 val += 1;
3286 return res;
3287}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003288
Ben Clayton713b8d32019-12-17 20:37:56 +00003289const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003292 val += 1;
3293 return val;
3294}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003295
Ben Clayton713b8d32019-12-17 20:37:56 +00003296RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003297{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003298 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003299 RValue<UInt> res = val;
3300 val -= 1;
3301 return res;
3302}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003303
Ben Clayton713b8d32019-12-17 20:37:56 +00003304const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003307 val -= 1;
3308 return val;
3309}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003310
Nicolas Capens598f8d82016-09-26 15:09:10 -04003311// RValue<UInt> RoundUInt(RValue<Float> cast)
3312// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003313// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003314// }
3315
Nicolas Capens519cf222020-05-08 15:27:19 -04003316Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003317{
3318 return T(Ice::IceType_i32);
3319}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003320
3321// Int2::Int2(RValue<Int> cast)
3322// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003323// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003324// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003325//
3326// Constant *shuffle[2];
3327// shuffle[0] = Nucleus::createConstantInt(0);
3328// shuffle[1] = Nucleus::createConstantInt(0);
3329//
Nicolas Capens519cf222020-05-08 15:27:19 -04003330// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331//
3332// storeValue(replicate);
3333// }
3334
Nicolas Capens157ba262019-12-10 17:49:14 -05003335RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3336{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003337 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003338 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003339 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003340 Int2 result;
3341 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3342 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003343
Nicolas Capens157ba262019-12-10 17:49:14 -05003344 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003345 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003346 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003347 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003348 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003349 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003350}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003351
Nicolas Capens157ba262019-12-10 17:49:14 -05003352RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003354 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003355 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003356 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003357 Int2 result;
3358 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3359 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3360
3361 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003362 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003363 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003364 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003365 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003366 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003367}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003368
Nicolas Capens519cf222020-05-08 15:27:19 -04003369Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003370{
3371 return T(Type_v2i32);
3372}
3373
3374RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3375{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003376 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003377 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003378 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003379 UInt2 result;
3380 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3381 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003382
Nicolas Capens157ba262019-12-10 17:49:14 -05003383 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003384 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003385 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003386 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003387 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003388 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003389}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003390
Nicolas Capens157ba262019-12-10 17:49:14 -05003391RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3392{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003393 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003394 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003395 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003396 UInt2 result;
3397 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3398 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003399
Nicolas Capens157ba262019-12-10 17:49:14 -05003400 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003401 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003402 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003403 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003404 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003405 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003406}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003407
Nicolas Capens519cf222020-05-08 15:27:19 -04003408Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003409{
3410 return T(Type_v2i32);
3411}
3412
Ben Clayton713b8d32019-12-17 20:37:56 +00003413Int4::Int4(RValue<Byte4> cast)
3414 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003415{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003416 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003417 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003418 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3419
3420 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003421 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003422 Value *b = Nucleus::createBitCast(a, Byte16::type());
3423 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003424
Ben Clayton713b8d32019-12-17 20:37:56 +00003425 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003426 Value *d = Nucleus::createBitCast(c, Short8::type());
3427 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003428
Nicolas Capens519cf222020-05-08 15:27:19 -04003429 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003430 storeValue(f);
3431}
3432
Ben Clayton713b8d32019-12-17 20:37:56 +00003433Int4::Int4(RValue<SByte4> cast)
3434 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003435{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003436 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003437 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003438 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3439
Ben Clayton713b8d32019-12-17 20:37:56 +00003440 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003441 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003442 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3443
Ben Clayton713b8d32019-12-17 20:37:56 +00003444 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003445 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003446 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3447
3448 *this = As<Int4>(e) >> 24;
3449}
3450
Ben Clayton713b8d32019-12-17 20:37:56 +00003451Int4::Int4(RValue<Short4> cast)
3452 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003453{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003454 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003455 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003456 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003457
3458 *this = As<Int4>(c) >> 16;
3459}
3460
Ben Clayton713b8d32019-12-17 20:37:56 +00003461Int4::Int4(RValue<UShort4> cast)
3462 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003463{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003464 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003465 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003466 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003467 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003468 storeValue(d);
3469}
3470
Ben Clayton713b8d32019-12-17 20:37:56 +00003471Int4::Int4(RValue<Int> rhs)
3472 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003473{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003474 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003475 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003476
Ben Clayton713b8d32019-12-17 20:37:56 +00003477 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003478 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3479
3480 storeValue(replicate);
3481}
3482
3483RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3484{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003485 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003486 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003487 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003488 Int4 result;
3489 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3490 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3491 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3492 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003493
Nicolas Capens157ba262019-12-10 17:49:14 -05003494 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003495 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003496 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003497 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003498 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003499 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003500}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003501
Nicolas Capens157ba262019-12-10 17:49:14 -05003502RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3503{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003504 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003505 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003506 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003507 Int4 result;
3508 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3509 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3510 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3511 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003512
Nicolas Capens157ba262019-12-10 17:49:14 -05003513 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003514 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003515 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003516 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003517 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003518 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003519}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003520
Nicolas Capens157ba262019-12-10 17:49:14 -05003521RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3522{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003523 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003524 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003525}
3526
3527RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3528{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003529 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003530 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003531}
3532
3533RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3534{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003535 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003536 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003537}
3538
3539RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3540{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003541 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003542 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003543}
3544
3545RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3546{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003547 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003548 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003549}
3550
3551RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3552{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003553 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003554 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003555}
3556
Nicolas Capens629bf952022-01-18 15:08:14 -05003557RValue<Int4> Abs(RValue<Int4> x)
3558{
3559 // TODO: Optimize.
3560 auto negative = x >> 31;
3561 return (x ^ negative) - negative;
3562}
3563
Nicolas Capens157ba262019-12-10 17:49:14 -05003564RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3565{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003566 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003567 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003568 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003569 ::basicBlock->appendInst(cmp);
3570
3571 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003572 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003573 ::basicBlock->appendInst(select);
3574
3575 return RValue<Int4>(V(result));
3576}
3577
3578RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3579{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003580 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003581 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003582 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003583 ::basicBlock->appendInst(cmp);
3584
3585 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003586 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003587 ::basicBlock->appendInst(select);
3588
3589 return RValue<Int4>(V(result));
3590}
3591
3592RValue<Int4> RoundInt(RValue<Float4> cast)
3593{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003594 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003595 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003596 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003597 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3598 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003599 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003600 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003601 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003602 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003603 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003604 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003605 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003606 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003607
3608 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003609 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003610}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003611
Nicolas Capenseeb81842021-01-12 17:44:40 -05003612RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3613{
3614 RR_DEBUG_INFO_UPDATE_LOC();
3615
3616 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3617 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3618 // saturate to 0x80000000.
3619 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3620
3621 if(emulateIntrinsics || CPUID::ARM)
3622 {
3623 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3624 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3625 }
3626 else
3627 {
3628 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3629 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003630 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capenseeb81842021-01-12 17:44:40 -05003631 nearbyint->addArg(clamped.value());
3632 ::basicBlock->appendInst(nearbyint);
3633
3634 return RValue<Int4>(V(result));
3635 }
3636}
3637
Nicolas Capens157ba262019-12-10 17:49:14 -05003638RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3639{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003640 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003641 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003642 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003643 Short8 result;
3644 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3645 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3646 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3647 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3648 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3649 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3650 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3651 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003652
Nicolas Capens157ba262019-12-10 17:49:14 -05003653 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003654 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003655 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003656 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003657 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003658 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003659 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003660 pack->addArg(x.value());
3661 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003662 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003663
Nicolas Capens157ba262019-12-10 17:49:14 -05003664 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003665 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003666}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003667
Nicolas Capens157ba262019-12-10 17:49:14 -05003668RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3669{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003670 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003671 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003672 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003673 RValue<Int4> sx = As<Int4>(x);
3674 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003675
Nicolas Capens157ba262019-12-10 17:49:14 -05003676 RValue<Int4> sy = As<Int4>(y);
3677 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003678
Nicolas Capens157ba262019-12-10 17:49:14 -05003679 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003680 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003681 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003682 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003683 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003684 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003685 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003686 pack->addArg(x.value());
3687 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003688 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003689
Nicolas Capens157ba262019-12-10 17:49:14 -05003690 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003691 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003692}
Nicolas Capens33438a62017-09-27 11:47:35 -04003693
Nicolas Capens157ba262019-12-10 17:49:14 -05003694RValue<Int> SignMask(RValue<Int4> x)
3695{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003696 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003697 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003698 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003699 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3700 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003701 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003702 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003703 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003704 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003705 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003706 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003707 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003708 ::basicBlock->appendInst(movmsk);
3709
3710 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003711 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003712}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003713
Nicolas Capens519cf222020-05-08 15:27:19 -04003714Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003715{
3716 return T(Ice::IceType_v4i32);
3717}
3718
Ben Clayton713b8d32019-12-17 20:37:56 +00003719UInt4::UInt4(RValue<Float4> cast)
3720 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003721{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003722 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003723 // Smallest positive value representable in UInt, but not in Int
3724 const unsigned int ustart = 0x80000000u;
3725 const float ustartf = float(ustart);
3726
3727 // Check if the value can be represented as an Int
3728 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3729 // If the value is too large, subtract ustart and re-add it after conversion.
3730 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003731 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003732 (~uiValue & Int4(cast));
3733 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003734 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003735}
3736
Ben Clayton713b8d32019-12-17 20:37:56 +00003737UInt4::UInt4(RValue<UInt> rhs)
3738 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003739{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003740 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003741 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003742
Ben Clayton713b8d32019-12-17 20:37:56 +00003743 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003744 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3745
3746 storeValue(replicate);
3747}
3748
3749RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3750{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003751 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003752 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003753 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003754 UInt4 result;
3755 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3756 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3757 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3758 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003759
Nicolas Capens157ba262019-12-10 17:49:14 -05003760 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003761 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003762 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003763 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003764 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003765 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003766}
Ben Clayton88816fa2019-05-15 17:08:14 +01003767
Nicolas Capens157ba262019-12-10 17:49:14 -05003768RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3769{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003770 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003771 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003772 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003773 UInt4 result;
3774 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3775 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3776 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3777 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003778
Nicolas Capens157ba262019-12-10 17:49:14 -05003779 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003780 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003781 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003782 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003783 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003784 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003785}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003786
Nicolas Capens157ba262019-12-10 17:49:14 -05003787RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3788{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003789 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003790 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003791}
3792
3793RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3794{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003795 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003796 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003797}
3798
3799RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3800{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003801 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003802 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003803}
3804
3805RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3806{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003807 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003808 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003809}
3810
3811RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3812{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003813 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003814 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003815}
3816
3817RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3818{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003819 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003820 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003821}
3822
3823RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3824{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003825 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003826 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003827 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003828 ::basicBlock->appendInst(cmp);
3829
3830 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003831 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003832 ::basicBlock->appendInst(select);
3833
3834 return RValue<UInt4>(V(result));
3835}
3836
3837RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003840 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003841 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003842 ::basicBlock->appendInst(cmp);
3843
3844 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003845 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003846 ::basicBlock->appendInst(select);
3847
3848 return RValue<UInt4>(V(result));
3849}
3850
Nicolas Capens519cf222020-05-08 15:27:19 -04003851Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003852{
3853 return T(Ice::IceType_v4i32);
3854}
3855
Nicolas Capens519cf222020-05-08 15:27:19 -04003856Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003857{
3858 return T(Ice::IceType_i16);
3859}
3860
3861RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003864 return 1.0f / x;
3865}
3866
3867RValue<Float> RcpSqrt_pp(RValue<Float> x)
3868{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003869 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003870 return Rcp_pp(Sqrt(x));
3871}
3872
3873RValue<Float> Sqrt(RValue<Float> x)
3874{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003875 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003876 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003877 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003878 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003879 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003880 ::basicBlock->appendInst(sqrt);
3881
3882 return RValue<Float>(V(result));
3883}
3884
3885RValue<Float> Round(RValue<Float> x)
3886{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003887 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003888 return Float4(Round(Float4(x))).x;
3889}
3890
3891RValue<Float> Trunc(RValue<Float> x)
3892{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003893 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003894 return Float4(Trunc(Float4(x))).x;
3895}
3896
3897RValue<Float> Frac(RValue<Float> x)
3898{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003899 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003900 return Float4(Frac(Float4(x))).x;
3901}
3902
3903RValue<Float> Floor(RValue<Float> x)
3904{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003905 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003906 return Float4(Floor(Float4(x))).x;
3907}
3908
3909RValue<Float> Ceil(RValue<Float> x)
3910{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003911 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003912 return Float4(Ceil(Float4(x))).x;
3913}
3914
Nicolas Capens519cf222020-05-08 15:27:19 -04003915Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003916{
3917 return T(Ice::IceType_f32);
3918}
3919
Nicolas Capens519cf222020-05-08 15:27:19 -04003920Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003921{
3922 return T(Type_v2f32);
3923}
3924
Ben Clayton713b8d32019-12-17 20:37:56 +00003925Float4::Float4(RValue<Float> rhs)
3926 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003927{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003928 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003929 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003930
Ben Clayton713b8d32019-12-17 20:37:56 +00003931 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003932 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3933
3934 storeValue(replicate);
3935}
3936
Nicolas Capens629bf952022-01-18 15:08:14 -05003937RValue<Float4> Abs(RValue<Float4> x)
3938{
3939 // TODO: Optimize.
3940 Value *vector = Nucleus::createBitCast(x.value(), Int4::type());
3941 int64_t constantVector[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
3942 Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, Int4::type()));
3943
3944 return As<Float4>(result);
3945}
3946
Nicolas Capens157ba262019-12-10 17:49:14 -05003947RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3948{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003949 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003950 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003951 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003952 ::basicBlock->appendInst(cmp);
3953
3954 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003955 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003956 ::basicBlock->appendInst(select);
3957
3958 return RValue<Float4>(V(result));
3959}
3960
3961RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3962{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003963 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003964 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003965 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003966 ::basicBlock->appendInst(cmp);
3967
3968 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003969 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003970 ::basicBlock->appendInst(select);
3971
3972 return RValue<Float4>(V(result));
3973}
3974
3975RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3976{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003977 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003978 return Float4(1.0f) / x;
3979}
3980
3981RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3982{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003983 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003984 return Rcp_pp(Sqrt(x));
3985}
3986
Antonio Maioranod1561872020-12-14 14:03:53 -05003987bool HasRcpApprox()
3988{
3989 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3990 return false;
3991}
3992
3993RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
3994{
3995 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3996 UNREACHABLE("RValue<Float4> RcpApprox()");
3997 return { 0.0f };
3998}
3999
4000RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
4001{
4002 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4003 UNREACHABLE("RValue<Float> RcpApprox()");
4004 return { 0.0f };
4005}
4006
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05004007bool HasRcpSqrtApprox()
4008{
4009 return false;
4010}
4011
4012RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
4013{
4014 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4015 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
4016 return { 0.0f };
4017}
4018
4019RValue<Float> RcpSqrtApprox(RValue<Float> x)
4020{
4021 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4022 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
4023 return { 0.0f };
4024}
4025
Nicolas Capens157ba262019-12-10 17:49:14 -05004026RValue<Float4> Sqrt(RValue<Float4> x)
4027{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004028 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004029 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04004030 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004031 Float4 result;
4032 result.x = Sqrt(Float(Float4(x).x));
4033 result.y = Sqrt(Float(Float4(x).y));
4034 result.z = Sqrt(Float(Float4(x).z));
4035 result.w = Sqrt(Float(Float4(x).w));
4036
4037 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04004038 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004039 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04004040 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004041 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004042 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004043 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004044 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04004045 ::basicBlock->appendInst(sqrt);
4046
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04004047 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04004048 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04004049}
Nicolas Capens157ba262019-12-10 17:49:14 -05004050
4051RValue<Int> SignMask(RValue<Float4> x)
4052{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004053 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004054 if(emulateIntrinsics || CPUID::ARM)
4055 {
4056 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
4057 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
4058 }
4059 else
4060 {
4061 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004062 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004063 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004064 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004065 ::basicBlock->appendInst(movmsk);
4066
4067 return RValue<Int>(V(result));
4068 }
4069}
4070
4071RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
4072{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004073 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004074 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004075}
4076
4077RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
4078{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004079 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004080 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004081}
4082
4083RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4084{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004085 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004086 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004087}
4088
4089RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4090{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004091 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004092 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004093}
4094
4095RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4096{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004097 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004098 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004099}
4100
4101RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4102{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004103 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004104 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004105}
4106
4107RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4108{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004109 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004110 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004111}
4112
4113RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4114{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004115 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004116 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004117}
4118
4119RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4120{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004121 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004122 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004123}
4124
4125RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4126{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004127 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004128 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004129}
4130
4131RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4132{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004133 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004134 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004135}
4136
4137RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4138{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004139 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004140 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004141}
4142
4143RValue<Float4> Round(RValue<Float4> x)
4144{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004145 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004146 if(emulateIntrinsics || CPUID::ARM)
4147 {
4148 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4149 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4150 }
4151 else if(CPUID::SSE4_1)
4152 {
4153 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004154 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004155 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004156 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004157 round->addArg(::context->getConstantInt32(0));
4158 ::basicBlock->appendInst(round);
4159
4160 return RValue<Float4>(V(result));
4161 }
4162 else
4163 {
4164 return Float4(RoundInt(x));
4165 }
4166}
4167
4168RValue<Float4> Trunc(RValue<Float4> x)
4169{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004170 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004171 if(CPUID::SSE4_1)
4172 {
4173 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004174 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004175 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004176 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004177 round->addArg(::context->getConstantInt32(3));
4178 ::basicBlock->appendInst(round);
4179
4180 return RValue<Float4>(V(result));
4181 }
4182 else
4183 {
4184 return Float4(Int4(x));
4185 }
4186}
4187
4188RValue<Float4> Frac(RValue<Float4> x)
4189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004190 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004191 Float4 frc;
4192
4193 if(CPUID::SSE4_1)
4194 {
4195 frc = x - Floor(x);
4196 }
4197 else
4198 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004199 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004200
Ben Clayton713b8d32019-12-17 20:37:56 +00004201 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004202 }
4203
4204 // x - floor(x) can be 1.0 for very small negative x.
4205 // Clamp against the value just below 1.0.
4206 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4207}
4208
4209RValue<Float4> Floor(RValue<Float4> x)
4210{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004211 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004212 if(CPUID::SSE4_1)
4213 {
4214 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004215 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004216 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004217 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004218 round->addArg(::context->getConstantInt32(1));
4219 ::basicBlock->appendInst(round);
4220
4221 return RValue<Float4>(V(result));
4222 }
4223 else
4224 {
4225 return x - Frac(x);
4226 }
4227}
4228
4229RValue<Float4> Ceil(RValue<Float4> x)
4230{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004231 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004232 if(CPUID::SSE4_1)
4233 {
4234 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004235 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004236 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004237 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004238 round->addArg(::context->getConstantInt32(2));
4239 ::basicBlock->appendInst(round);
4240
4241 return RValue<Float4>(V(result));
4242 }
4243 else
4244 {
4245 return -Floor(-x);
4246 }
4247}
4248
Nicolas Capens519cf222020-05-08 15:27:19 -04004249Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004250{
4251 return T(Ice::IceType_v4f32);
4252}
4253
4254RValue<Long> Ticks()
4255{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004256 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004257 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004258 return Long(Int(0));
4259}
4260
Ben Clayton713b8d32019-12-17 20:37:56 +00004261RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004262{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004263 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004264 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004265}
4266
Ben Clayton713b8d32019-12-17 20:37:56 +00004267RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004268{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004269 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004270 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004271}
4272
Ben Clayton713b8d32019-12-17 20:37:56 +00004273Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004274{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004275 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004276 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004277}
4278
4279void Breakpoint()
4280{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004281 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004282 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004283 auto trap = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05004284 ::basicBlock->appendInst(trap);
4285}
4286
Ben Clayton713b8d32019-12-17 20:37:56 +00004287void Nucleus::createFence(std::memory_order memoryOrder)
4288{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004289 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004290 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004291 auto inst = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004292 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4293 inst->addArg(order);
4294 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004295}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004296
Ben Clayton713b8d32019-12-17 20:37:56 +00004297Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4298{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004299 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense4b77942021-08-03 17:09:41 -04004300 UNIMPLEMENTED("b/155867273 Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004301 return nullptr;
4302}
Nicolas Capense4b77942021-08-03 17:09:41 -04004303
Ben Clayton713b8d32019-12-17 20:37:56 +00004304void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense4b77942021-08-03 17:09:41 -04004307 UNIMPLEMENTED("b/155867273 Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004308}
Nicolas Capens157ba262019-12-10 17:49:14 -05004309
4310RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004313 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4314}
4315
4316RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004319 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4320}
4321
4322void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004325 return emulated::Scatter(base, val, offsets, mask, alignment);
4326}
4327
4328void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004331 return emulated::Scatter(base, val, offsets, mask, alignment);
4332}
4333
4334RValue<Float> Exp2(RValue<Float> x)
4335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004337 return emulated::Exp2(x);
4338}
4339
4340RValue<Float> Log2(RValue<Float> x)
4341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004343 return emulated::Log2(x);
4344}
4345
4346RValue<Float4> Sin(RValue<Float4> x)
4347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004348 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004349 return optimal::Sin(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004350}
4351
4352RValue<Float4> Cos(RValue<Float4> x)
4353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004354 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004355 return optimal::Cos(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004356}
4357
4358RValue<Float4> Tan(RValue<Float4> x)
4359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004360 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004361 return optimal::Tan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004362}
4363
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004364RValue<Float4> Asin(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004365{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004366 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004367 if(p == Precision::Full)
4368 {
4369 return emulated::Asin(x);
4370 }
4371 return optimal::Asin_8_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004372}
4373
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004374RValue<Float4> Acos(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004375{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004376 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004377 // Surprisingly, deqp-vk's precision.acos.highp/mediump tests pass when using the 4-term polynomial approximation
4378 // version of acos, unlike for Asin, which requires higher precision algorithms.
4379 return optimal::Acos_4_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004380}
4381
4382RValue<Float4> Atan(RValue<Float4> x)
4383{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004384 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004385 return optimal::Atan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004386}
4387
4388RValue<Float4> Sinh(RValue<Float4> x)
4389{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004390 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004391 return optimal::Sinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004392}
4393
4394RValue<Float4> Cosh(RValue<Float4> x)
4395{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004396 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004397 return optimal::Cosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004398}
4399
4400RValue<Float4> Tanh(RValue<Float4> x)
4401{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004402 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004403 return optimal::Tanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004404}
4405
4406RValue<Float4> Asinh(RValue<Float4> x)
4407{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004408 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004409 return optimal::Asinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004410}
4411
4412RValue<Float4> Acosh(RValue<Float4> x)
4413{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004414 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004415 return optimal::Acosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004416}
4417
4418RValue<Float4> Atanh(RValue<Float4> x)
4419{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004420 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004421 return optimal::Atanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004422}
4423
4424RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4425{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004426 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004427 return optimal::Atan2(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004428}
4429
4430RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4431{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004432 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004433 return optimal::Pow(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004434}
4435
4436RValue<Float4> Exp(RValue<Float4> x)
4437{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004438 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004439 return optimal::Exp(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004440}
4441
4442RValue<Float4> Log(RValue<Float4> x)
4443{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004444 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004445 return optimal::Log(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004446}
4447
4448RValue<Float4> Exp2(RValue<Float4> x)
4449{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004450 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004451 return optimal::Exp2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004452}
4453
4454RValue<Float4> Log2(RValue<Float4> x)
4455{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004456 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004457 return optimal::Log2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004458}
4459
4460RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4461{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004462 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004463 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004464 {
Ben Claytonce54c592020-02-07 11:30:51 +00004465 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004466 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004467 }
4468 else
4469 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004470 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004471 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004472 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004473 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004474 ::basicBlock->appendInst(ctlz);
4475
4476 return RValue<UInt>(V(result));
4477 }
4478}
4479
4480RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4481{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004482 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004483 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004484 {
Ben Claytonce54c592020-02-07 11:30:51 +00004485 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004486 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004487 }
4488 else
4489 {
4490 // TODO: implement vectorized version in Subzero
4491 UInt4 result;
4492 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4493 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4494 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4495 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4496 return result;
4497 }
4498}
4499
4500RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4501{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004502 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004503 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004504 {
Ben Claytonce54c592020-02-07 11:30:51 +00004505 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004506 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004507 }
4508 else
4509 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004510 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004511 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004512 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004513 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004514 ::basicBlock->appendInst(ctlz);
4515
4516 return RValue<UInt>(V(result));
4517 }
4518}
4519
4520RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4521{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004522 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004523 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004524 {
Ben Claytonce54c592020-02-07 11:30:51 +00004525 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004526 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004527 }
4528 else
4529 {
4530 // TODO: implement vectorized version in Subzero
4531 UInt4 result;
4532 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4533 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4534 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4535 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4536 return result;
4537 }
4538}
4539
Antonio Maiorano370cba52019-12-31 11:36:07 -05004540RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4541{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004542 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004543 return emulated::MinAtomic(x, y, memoryOrder);
4544}
4545
4546RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4547{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004548 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004549 return emulated::MinAtomic(x, y, memoryOrder);
4550}
4551
4552RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4553{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004554 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004555 return emulated::MaxAtomic(x, y, memoryOrder);
4556}
4557
4558RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4559{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004560 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004561 return emulated::MaxAtomic(x, y, memoryOrder);
4562}
4563
Antonio Maioranoaae33732020-02-14 14:52:34 -05004564void EmitDebugLocation()
4565{
4566#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004567 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004568#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004569}
Ben Clayton713b8d32019-12-17 20:37:56 +00004570void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004571void FlushDebug() {}
4572
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004573namespace {
4574namespace coro {
4575
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004576// Instance data per generated coroutine
4577// This is the "handle" type used for Coroutine functions
4578// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4579struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004580{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004581 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004582 bool done = false; // the coroutine should stop at the next yield()
4583 bool terminated = false; // the coroutine has finished.
4584 bool inRoutine = false; // is the coroutine currently executing?
4585 marl::Scheduler::Fiber *mainFiber = nullptr;
4586 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004587 void *promisePtr = nullptr;
4588};
4589
4590CoroutineData *createCoroutineData()
4591{
4592 return new CoroutineData{};
4593}
4594
4595void destroyCoroutineData(CoroutineData *coroData)
4596{
4597 delete coroData;
4598}
4599
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004600// suspend() pauses execution of the coroutine, and resumes execution from the
4601// caller's call to await().
4602// Returns true if await() is called again, or false if coroutine_destroy()
4603// is called.
4604bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004605{
Ben Claytonc3466532020-03-24 11:54:05 +00004606 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4607 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4608 ASSERT(coroData->inRoutine);
4609 coroData->inRoutine = false;
4610 coroData->mainFiber->notify();
4611 while(!coroData->inRoutine)
4612 {
4613 coroData->routineFiber->wait();
4614 }
4615 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004616}
4617
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004618// resume() is called by await(), blocking until the coroutine calls yield()
4619// or the coroutine terminates.
4620void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004621{
Ben Claytonc3466532020-03-24 11:54:05 +00004622 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4623 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4624 ASSERT(!coroData->inRoutine);
4625 coroData->inRoutine = true;
4626 coroData->routineFiber->notify();
4627 while(coroData->inRoutine)
4628 {
4629 coroData->mainFiber->wait();
4630 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004631}
4632
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004633// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4634// until the coroutine ends, and deletes the coroutine data.
4635void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004636{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004637 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004638 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4639 ASSERT(!coroData->inRoutine);
4640 if(!coroData->terminated)
4641 {
4642 coroData->done = true;
4643 coroData->inRoutine = true;
4644 coroData->routineFiber->notify();
4645 while(!coroData->terminated)
4646 {
4647 coroData->mainFiber->wait();
4648 }
4649 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004650 if(coroData->useInternalScheduler)
4651 {
4652 ::getOrCreateScheduler().unbind();
4653 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004654 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004655}
4656
4657namespace detail {
4658thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4659} // namespace detail
4660
4661void setHandleParam(Nucleus::CoroutineHandle handle)
4662{
4663 ASSERT(!detail::coroHandle);
4664 detail::coroHandle = handle;
4665}
4666
4667Nucleus::CoroutineHandle getHandleParam()
4668{
4669 ASSERT(detail::coroHandle);
4670 auto handle = detail::coroHandle;
4671 detail::coroHandle = {};
4672 return handle;
4673}
4674
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004675bool isDone(Nucleus::CoroutineHandle handle)
4676{
4677 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004678 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004679}
4680
4681void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4682{
4683 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4684 coroData->promisePtr = promisePtr;
4685}
4686
4687void *getPromisePtr(Nucleus::CoroutineHandle handle)
4688{
4689 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4690 return coroData->promisePtr;
4691}
4692
4693} // namespace coro
4694} // namespace
4695
4696// Used to generate coroutines.
4697// Lifetime: from yield to acquireCoroutine
4698class CoroutineGenerator
4699{
4700public:
4701 CoroutineGenerator()
4702 {
4703 }
4704
4705 // Inserts instructions at the top of the current function to make it a coroutine.
4706 void generateCoroutineBegin()
4707 {
4708 // Begin building the main coroutine_begin() function.
4709 // We insert these instructions at the top of the entry node,
4710 // before existing reactor-generated instructions.
4711
4712 // CoroutineHandle coroutine_begin(<Arguments>)
4713 // {
4714 // this->handle = coro::getHandleParam();
4715 //
4716 // YieldType promise;
4717 // coro::setPromisePtr(handle, &promise); // For await
4718 //
4719 // ... <REACTOR CODE> ...
4720 //
4721
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004722 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004723 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004724
4725 // YieldType promise;
4726 // coro::setPromisePtr(handle, &promise); // For await
4727 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004728 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004729 }
4730
4731 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4732 void generateYield(Value *val)
4733 {
4734 // ... <REACTOR CODE> ...
4735 //
4736 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004737 // if (!coro::suspend(handle)) {
4738 // return false; // coroutine has been stopped by the caller.
4739 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004740 //
4741 // ... <REACTOR CODE> ...
4742
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004743 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004744 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004745
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004746 // if (!coro::suspend(handle)) {
4747 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4748 auto doneBlock = Nucleus::createBasicBlock();
4749 auto resumeBlock = Nucleus::createBasicBlock();
4750 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4751
4752 // return false; // coroutine has been stopped by the caller.
4753 ::basicBlock = doneBlock;
4754 Nucleus::createRetVoid(); // coroutine return value is ignored.
4755
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004756 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004757 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004758 }
4759
4760 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4761
4762 // Generates the await function for the current coroutine.
4763 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4764 static FunctionUniquePtr generateAwaitFunction()
4765 {
4766 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4767 // {
4768 // if (coro::isDone())
4769 // {
4770 // return false;
4771 // }
4772 // else // resume
4773 // {
4774 // YieldType* promise = coro::getPromisePtr(handle);
4775 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004776 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004777 // return true;
4778 // }
4779 // }
4780
4781 // Subzero doesn't support bool types (IceType_i1) as return type
4782 const Ice::Type ReturnType = Ice::IceType_i32;
4783 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4784 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4785
4786 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4787 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4788
4789 Ice::Variable *handle = awaitFunc->getArgs()[0];
4790 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4791
4792 auto doneBlock = awaitFunc->makeNode();
4793 {
4794 // return false;
4795 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4796 doneBlock->appendInst(ret);
4797 }
4798
4799 auto resumeBlock = awaitFunc->makeNode();
4800 {
4801 // YieldType* promise = coro::getPromisePtr(handle);
4802 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4803
4804 // *out = *promise;
4805 // Load promise value
4806 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4807 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4808 resumeBlock->appendInst(load);
4809 // Then store it in output param
4810 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4811 resumeBlock->appendInst(store);
4812
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004813 // coro::resume(handle);
4814 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004815
4816 // return true;
4817 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4818 resumeBlock->appendInst(ret);
4819 }
4820
4821 // if (coro::isDone())
4822 // {
4823 // <doneBlock>
4824 // }
4825 // else // resume
4826 // {
4827 // <resumeBlock>
4828 // }
4829 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004830 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004831 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4832 bb->appendInst(br);
4833
4834 return FunctionUniquePtr{ awaitFunc };
4835 }
4836
4837 // Generates the destroy function for the current coroutine.
4838 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4839 static FunctionUniquePtr generateDestroyFunction()
4840 {
4841 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4842 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004843 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004844 // return;
4845 // }
4846
4847 const Ice::Type ReturnType = Ice::IceType_void;
4848 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4849
4850 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4851 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4852
4853 Ice::Variable *handle = destroyFunc->getArgs()[0];
4854
4855 auto *bb = destroyFunc->getEntryNode();
4856
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004857 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4858 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004859
4860 // return;
4861 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4862 bb->appendInst(ret);
4863
4864 return FunctionUniquePtr{ destroyFunc };
4865 }
4866
4867private:
4868 Ice::Variable *handle{};
4869 Ice::Variable *promise{};
4870};
4871
4872static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4873{
4874 // This doubles up as our coroutine handle
4875 auto coroData = coro::createCoroutineData();
4876
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004877 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4878 if(coroData->useInternalScheduler)
4879 {
4880 ::getOrCreateScheduler().bind();
4881 }
4882
Ben Clayton76e9e532020-03-16 20:35:04 +00004883 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004884 // Store handle in TLS so that the coroutine can grab it right away, before
4885 // any fiber switch occurs.
4886 coro::setHandleParam(coroData);
4887
Ben Claytonc3466532020-03-24 11:54:05 +00004888 ASSERT(!coroData->routineFiber);
4889 coroData->routineFiber = marl::Scheduler::Fiber::current();
4890
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004891 beginFunc();
4892
Ben Claytonc3466532020-03-24 11:54:05 +00004893 ASSERT(coroData->inRoutine);
4894 coroData->done = true; // coroutine is done.
4895 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4896 coroData->inRoutine = false;
4897 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004898 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004899
Ben Claytonc3466532020-03-24 11:54:05 +00004900 ASSERT(!coroData->mainFiber);
4901 coroData->mainFiber = marl::Scheduler::Fiber::current();
4902
4903 // block until the first yield or coroutine end
4904 ASSERT(!coroData->inRoutine);
4905 coroData->inRoutine = true;
4906 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4907 while(coroData->inRoutine)
4908 {
4909 coroData->mainFiber->wait();
4910 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004911
4912 return coroData;
4913}
4914
4915void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4916{
4917 // Start by creating a regular function
4918 createFunction(yieldType, params);
4919
4920 // Save in case yield() is called
4921 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4922 ::coroYieldType = yieldType;
4923}
4924
4925void Nucleus::yield(Value *val)
4926{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004927 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004928 Variable::materializeAll();
4929
4930 // On first yield, we start generating coroutine functions
4931 if(!::coroGen)
4932 {
4933 ::coroGen = std::make_shared<CoroutineGenerator>();
4934 ::coroGen->generateCoroutineBegin();
4935 }
4936
4937 ASSERT(::coroGen);
4938 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004939}
4940
Ben Clayton713b8d32019-12-17 20:37:56 +00004941static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4942{
4943 return false;
4944}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004945
4946static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4947{
4948}
Nicolas Capens157ba262019-12-10 17:49:14 -05004949
Sean Risser705231f2021-08-19 18:17:24 -04004950std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit *cfgEdit /* = nullptr */)
Nicolas Capens157ba262019-12-10 17:49:14 -05004951{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004952 if(::coroGen)
4953 {
4954 // Finish generating coroutine functions
4955 {
4956 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004957 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004958 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004959
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004960 auto awaitFunc = ::coroGen->generateAwaitFunction();
4961 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004962
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004963 // At this point, we no longer need the CoroutineGenerator.
4964 ::coroGen.reset();
4965 ::coroYieldType = nullptr;
4966
4967 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4968 { name, "await", "destroy" },
4969 cfgEdit);
4970
4971 return routine;
4972 }
4973 else
4974 {
4975 {
4976 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004977 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004978 }
4979
4980 ::coroYieldType = nullptr;
4981
4982 // Not an actual coroutine (no yields), so return stubs for await and destroy
4983 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4984
4985 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4986 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4987 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4988 return routine;
4989 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004990}
4991
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004992Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004993{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004994 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4995
4996 if(isCoroutine)
4997 {
4998 return rr::invokeCoroutineBegin(func);
4999 }
5000 else
5001 {
5002 // For regular routines, just invoke the begin func directly
5003 return func();
5004 }
Ben Clayton713b8d32019-12-17 20:37:56 +00005005}
Nicolas Capens157ba262019-12-10 17:49:14 -05005006
5007} // namespace rr