blob: 8fe7e31e2103d8444688cf412d828be2126b963f [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050016#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000017#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050018#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040019
Nicolas Capens1a3ce872018-10-10 10:42:36 -040020#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000021#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040022
Nicolas Capens598f8d82016-09-26 15:09:10 -040023#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040024#include "src/IceCfgNode.h"
25#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000026#include "src/IceELFStreamer.h"
27#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040028#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000029#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040030
Ben Clayton713b8d32019-12-17 20:37:56 +000031#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040032#include "llvm/Support/FileSystem.h"
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -050033#include "llvm/Support/ManagedStatic.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040034#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040035
Antonio Maiorano8bce0672020-02-28 13:13:45 -050036#include "marl/event.h"
37
Nicolas Capens6a990f82018-07-06 15:54:07 -040038#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040041
Nicolas Capensbd65da92017-01-05 16:31:06 -050042#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000043# ifndef WIN32_LEAN_AND_MEAN
44# define WIN32_LEAN_AND_MEAN
45# endif // !WIN32_LEAN_AND_MEAN
46# ifndef NOMINMAX
47# define NOMINMAX
48# endif // !NOMINMAX
49# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050050#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Ben Clayton683bad82020-02-10 23:57:09 +000052#include <array>
Nicolas Capens4ee53092022-02-05 01:53:12 -050053#include <cmath>
Nicolas Capens598f8d82016-09-26 15:09:10 -040054#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000055#include <limits>
56#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040057
Antonio Maiorano02a39532020-01-21 15:15:34 -050058// Subzero utility functions
59// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050060namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050061namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050062
63Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
64{
65 uint32_t sequenceNumber = 0;
Nicolas Capensff010f92021-02-01 12:22:53 -050066 auto *function = Ice::Cfg::create(context, sequenceNumber).release();
67
68 function->setStackSizeLimit(512 * 1024); // 512 KiB
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050069
70 Ice::CfgLocalAllocatorScope allocScope{ function };
71
72 for(auto type : paramTypes)
73 {
74 Ice::Variable *arg = function->makeVariable(type);
75 function->addArg(arg);
76 }
77
78 Ice::CfgNode *node = function->makeNode();
79 function->setEntryNode(node);
80
81 return function;
82}
83
84Ice::Type getPointerType(Ice::Type elementType)
85{
86 if(sizeof(void *) == 8)
87 {
88 return Ice::IceType_i64;
89 }
90 else
91 {
92 return Ice::IceType_i32;
93 }
94}
95
96Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
97{
98 int typeSize = Ice::typeWidthInBytes(type);
99 int totalSize = typeSize * (arraySize ? arraySize : 1);
100
101 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
102 auto address = function->makeVariable(getPointerType(type));
Nicolas Capens0cfc0432021-02-05 15:18:42 -0500103 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize); // SRoA depends on the alignment to match the type size.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500104 function->getEntryNode()->getInsts().push_front(alloca);
105
106 return address;
107}
108
109Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500110{
111 if(sizeof(void *) == 8)
112 {
113 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
114 }
115 else
116 {
117 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
118 }
119}
120
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400121// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
122Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500123
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400124// Wrapper for calls on C functions with Ice types
125Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
126{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500127 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400128
129 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
130 // then truncate result to bool.
131 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
132 const bool returningBool = (retTy == Ice::IceType_i1);
133 if(returningBool)
134 {
135 ret = function->makeVariable(Ice::IceType_i32);
136 }
137 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500138 {
139 ret = function->makeVariable(retTy);
140 }
141
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400142 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500143 for(auto arg : iceArgs)
144 {
145 call->addArg(arg);
146 }
147
148 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400149
150 if(returningBool)
151 {
152 // Truncate result to bool so that if any (lsb) bits were set, result will be true
153 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
154 }
155
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500156 return ret;
157}
158
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400159Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
160{
161 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
162 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
163}
164
Antonio Maiorano62427e02020-02-13 09:18:05 -0500165// Wrapper for calls on C functions with Ice types
166template<typename Return, typename... CArgs, typename... RArgs>
Nicolas Capens629bf952022-01-18 15:08:14 -0500167Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&...args)
Antonio Maiorano62427e02020-02-13 09:18:05 -0500168{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400169 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
170
Nicolas Capens519cf222020-05-08 15:27:19 -0400171 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500172 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500173 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500174}
175
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400176Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
177{
178 Ice::Variable *to = function->makeVariable(toType);
179 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
180 basicBlock->appendInst(cast);
181 return to;
182}
183
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500184Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500185{
Antonio Maiorano02a39532020-01-21 15:15:34 -0500186 Ice::Variable *result = function->makeVariable(type);
187 auto load = Ice::InstLoad::create(function, result, ptr, align);
188 basicBlock->appendInst(load);
189
190 return result;
191}
192
193} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500194} // namespace
195
Ben Clayton713b8d32019-12-17 20:37:56 +0000196namespace rr {
197class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500198class CoroutineGenerator;
199} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500200
201namespace {
202
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -0500203// Used to automatically invoke llvm_shutdown() when driver is unloaded
204llvm::llvm_shutdown_obj llvmShutdownObj;
205
Nicolas Capens157ba262019-12-10 17:49:14 -0500206// Default configuration settings. Must be accessed under mutex lock.
207std::mutex defaultConfigLock;
208rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000209{
Nicolas Capens157ba262019-12-10 17:49:14 -0500210 // This uses a static in a function to avoid the cost of a global static
211 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
212 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000213 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500214 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000215}
216
Nicolas Capens157ba262019-12-10 17:49:14 -0500217Ice::GlobalContext *context = nullptr;
218Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400219Ice::CfgNode *entryBlock = nullptr;
220Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500221Ice::CfgNode *basicBlock = nullptr;
222Ice::CfgLocalAllocatorScope *allocator = nullptr;
223rr::ELFMemoryStreamer *routine = nullptr;
224
225std::mutex codegenMutex;
226
227Ice::ELFFileStreamer *elfFile = nullptr;
228Ice::Fdstream *out = nullptr;
229
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500230// Coroutine globals
231rr::Type *coroYieldType = nullptr;
232std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500233marl::Scheduler &getOrCreateScheduler()
234{
235 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100236 marl::Scheduler::Config cfg;
237 cfg.setWorkerThreadCount(8);
238 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500239 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500240
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500241 return *scheduler;
242}
Nicolas Capens54313fb2021-02-19 14:26:27 -0500243
244rr::Nucleus::OptimizerCallback *optimizerCallback = nullptr;
245
Nicolas Capens157ba262019-12-10 17:49:14 -0500246} // Anonymous namespace
247
248namespace {
249
250#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000251# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500252#endif
253
Ben Clayton713b8d32019-12-17 20:37:56 +0000254#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
255# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500256#endif
257
Antonio Maiorano370cba52019-12-31 11:36:07 -0500258Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400259{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500260 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100261 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500262 // Note that Opt_0 and Opt_1 are not implemented by Subzero
263 case rr::Optimization::Level::None: return Ice::Opt_m1;
264 case rr::Optimization::Level::Less: return Ice::Opt_m1;
265 case rr::Optimization::Level::Default: return Ice::Opt_2;
266 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
267 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100268 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500269 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400270}
271
Antonio Maiorano370cba52019-12-31 11:36:07 -0500272Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
273{
274 switch(memoryOrder)
275 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500276 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
277 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
278 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
279 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
280 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
281 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
Antonio Maiorano370cba52019-12-31 11:36:07 -0500282 }
283 return Ice::Intrinsics::MemoryOrderInvalid;
284}
285
Nicolas Capens157ba262019-12-10 17:49:14 -0500286class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500287{
Nicolas Capens157ba262019-12-10 17:49:14 -0500288public:
289 const static bool ARM;
290 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400291
Nicolas Capens157ba262019-12-10 17:49:14 -0500292private:
293 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100294 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000295#if defined(__i386__) || defined(__x86_64__)
296# if defined(_WIN32)
297 __cpuid(registers, info);
298# else
299 __asm volatile("cpuid"
300 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
301 : "a"(info));
302# endif
303#else
304 registers[0] = 0;
305 registers[1] = 0;
306 registers[2] = 0;
307 registers[3] = 0;
308#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100309 }
310
Sean Risser46a649d2021-08-30 15:44:33 -0400311 constexpr static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500312 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000313#if defined(__arm__) || defined(__aarch64__)
314 return true;
315#elif defined(__i386__) || defined(__x86_64__)
316 return false;
317#elif defined(__mips__)
318 return false;
319#else
320# error "Unknown architecture"
321#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500322 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500323
Nicolas Capens157ba262019-12-10 17:49:14 -0500324 static bool detectSSE4_1()
325 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000326#if defined(__i386__) || defined(__x86_64__)
327 int registers[4];
328 cpuid(registers, 1);
329 return (registers[2] & 0x00080000) != 0;
330#else
331 return false;
332#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500333 }
334};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500335
Sean Risser46a649d2021-08-30 15:44:33 -0400336constexpr bool CPUID::ARM = CPUID::detectARM();
Nicolas Capens157ba262019-12-10 17:49:14 -0500337const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
Sean Risser46a649d2021-08-30 15:44:33 -0400338constexpr bool emulateIntrinsics = false;
339constexpr bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400340
Nicolas Capens157ba262019-12-10 17:49:14 -0500341constexpr bool subzeroDumpEnabled = false;
342constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500343
344#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500345static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
346static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500347#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500348
349} // anonymous namespace
350
351namespace rr {
352
Nicolas Capens70505b42022-01-31 22:29:48 -0500353std::string Caps::backendName()
Antonio Maioranoab210f92019-12-13 16:26:24 -0500354{
355 return "Subzero";
356}
357
Nicolas Capens70505b42022-01-31 22:29:48 -0500358bool Caps::coroutinesSupported()
359{
360 return true;
361}
362
363bool Caps::fmaIsFast()
364{
365 // TODO(b/214591655): Subzero currently never emits FMA instructions. std::fma() is called instead.
366 return false;
367}
Nicolas Capens157ba262019-12-10 17:49:14 -0500368
369enum EmulatedType
370{
371 EmulatedShift = 16,
372 EmulatedV2 = 2 << EmulatedShift,
373 EmulatedV4 = 4 << EmulatedShift,
374 EmulatedV8 = 8 << EmulatedShift,
375 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
376
377 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
378 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
379 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000380 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
381 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500382 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
383};
384
Ben Clayton713b8d32019-12-17 20:37:56 +0000385class Value : public Ice::Operand
386{};
387class SwitchCases : public Ice::InstSwitch
388{};
389class BasicBlock : public Ice::CfgNode
390{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500391
392Ice::Type T(Type *t)
393{
394 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
395 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500396}
397
Nicolas Capens157ba262019-12-10 17:49:14 -0500398Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400399{
Ben Clayton713b8d32019-12-17 20:37:56 +0000400 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500401}
402
403Type *T(EmulatedType t)
404{
Ben Clayton713b8d32019-12-17 20:37:56 +0000405 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500406}
407
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500408std::vector<Ice::Type> T(const std::vector<Type *> &types)
409{
410 std::vector<Ice::Type> result;
411 result.reserve(types.size());
412 for(auto &t : types)
413 {
414 result.push_back(T(t));
415 }
416 return result;
417}
418
Nicolas Capens157ba262019-12-10 17:49:14 -0500419Value *V(Ice::Operand *v)
420{
Ben Clayton713b8d32019-12-17 20:37:56 +0000421 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500422}
423
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500424Ice::Operand *V(Value *v)
425{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500426 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500427}
428
Antonio Maiorano62427e02020-02-13 09:18:05 -0500429std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
430{
431 std::vector<Ice::Operand *> result;
432 result.reserve(values.size());
433 for(auto &v : values)
434 {
435 result.push_back(V(v));
436 }
437 return result;
438}
439
Nicolas Capens157ba262019-12-10 17:49:14 -0500440BasicBlock *B(Ice::CfgNode *b)
441{
Ben Clayton713b8d32019-12-17 20:37:56 +0000442 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500443}
444
445static size_t typeSize(Type *type)
446{
447 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400448 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500449 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800450 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500451 case Type_v2i32: return 8;
452 case Type_v4i16: return 8;
453 case Type_v2i16: return 4;
454 case Type_v8i8: return 8;
455 case Type_v4i8: return 4;
456 case Type_v2f32: return 8;
457 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500458 }
459 }
460
461 return Ice::typeWidthInBytes(T(type));
462}
463
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400464static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500465{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400466 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500467 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
468 {
469 Nucleus::createRetVoid();
470 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400471
472 // Connect the entry block to the top of the initial basic block
473 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
474 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500475}
476
Ben Clayton713b8d32019-12-17 20:37:56 +0000477using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
478using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500479
480inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
481{
Ben Clayton713b8d32019-12-17 20:37:56 +0000482 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500483}
484
485inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
486{
487 return &sectionHeader(elfHeader)[index];
488}
489
490static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
491{
492 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
493
494 uint32_t index = relocation.getSymbol();
495 int table = relocationTable.sh_link;
496 void *symbolValue = nullptr;
497
498 if(index != SHN_UNDEF)
499 {
500 if(table == SHN_UNDEF) return nullptr;
501 const SectionHeader *symbolTable = elfSection(elfHeader, table);
502
503 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
504 if(index >= symtab_entries)
505 {
506 ASSERT(index < symtab_entries && "Symbol Index out of range");
507 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800508 }
509
Nicolas Capens157ba262019-12-10 17:49:14 -0500510 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000511 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500512 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800513
Nicolas Capens157ba262019-12-10 17:49:14 -0500514 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400515 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500516 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000517 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400518 }
519 else
520 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500521 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400522 }
Nicolas Capens66478362016-10-13 15:36:36 -0400523 }
524
Nicolas Capens157ba262019-12-10 17:49:14 -0500525 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000526 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500527
528 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400529 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400530 switch(relocation.getType())
531 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500532 case R_ARM_NONE:
533 // No relocation
534 break;
535 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500536 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000537 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500538 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
539 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
540 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400541 break;
Nicolas Capens112faf42019-12-13 17:32:26 -0500542 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500543 {
544 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
545 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
546 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400547 break;
Nicolas Capens112faf42019-12-13 17:32:26 -0500548 default:
549 ASSERT(false && "Unsupported relocation type");
550 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400551 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500552 }
553 else
554 {
555 switch(relocation.getType())
556 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500557 case R_386_NONE:
558 // No relocation
559 break;
560 case R_386_32:
561 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
562 break;
563 case R_386_PC32:
564 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
565 break;
566 default:
567 ASSERT(false && "Unsupported relocation type");
568 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500569 }
Nicolas Capens66478362016-10-13 15:36:36 -0400570 }
571
Nicolas Capens157ba262019-12-10 17:49:14 -0500572 return symbolValue;
573}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400574
Nicolas Capens157ba262019-12-10 17:49:14 -0500575static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
576{
577 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
578
579 uint32_t index = relocation.getSymbol();
580 int table = relocationTable.sh_link;
581 void *symbolValue = nullptr;
582
583 if(index != SHN_UNDEF)
584 {
585 if(table == SHN_UNDEF) return nullptr;
586 const SectionHeader *symbolTable = elfSection(elfHeader, table);
587
588 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
589 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400590 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500591 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400592 return nullptr;
593 }
594
Nicolas Capens157ba262019-12-10 17:49:14 -0500595 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000596 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400598
Nicolas Capens157ba262019-12-10 17:49:14 -0500599 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400600 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500601 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000602 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 }
604 else
605 {
606 return nullptr;
607 }
608 }
Nicolas Capens66478362016-10-13 15:36:36 -0400609
Nicolas Capens157ba262019-12-10 17:49:14 -0500610 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000611 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
612 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400613
Nicolas Capens157ba262019-12-10 17:49:14 -0500614 switch(relocation.getType())
615 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500616 case R_X86_64_NONE:
617 // No relocation
618 break;
619 case R_X86_64_64:
620 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
621 break;
622 case R_X86_64_PC32:
623 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
624 break;
625 case R_X86_64_32S:
626 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
627 break;
628 default:
629 ASSERT(false && "Unsupported relocation type");
630 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500631 }
632
633 return symbolValue;
634}
635
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400636struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500637{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400638 const void *entry;
639 size_t codeSize = 0;
640};
641
642std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
643{
644 ASSERT(functionNames.size() > 0);
645 std::vector<EntryPoint> entryPoints(functionNames.size());
646
Ben Clayton713b8d32019-12-17 20:37:56 +0000647 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500648
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400649 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500650 if(!elfHeader->checkMagic())
651 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400652 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500653 }
654
655 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000656 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
657#if defined(__i386__)
658 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
659#elif defined(__x86_64__)
660 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
661#elif defined(__arm__)
662 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
663#elif defined(__aarch64__)
664 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
665#elif defined(__mips__)
666 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
667#else
668# error "Unsupported platform"
669#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500670
Ben Clayton713b8d32019-12-17 20:37:56 +0000671 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500672
673 for(int i = 0; i < elfHeader->e_shnum; i++)
674 {
675 if(sectionHeader[i].sh_type == SHT_PROGBITS)
676 {
677 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
678 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400679 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500680 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400681 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500682
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400683 for(size_t j = 0; j < functionNames.size(); ++j)
684 {
685 if(strstr(sectionName, functionNames[j]) != nullptr)
686 {
687 return j;
688 }
689 }
690
691 UNREACHABLE("Failed to find executable section that matches input function names");
692 return static_cast<size_t>(-1);
693 };
694
695 size_t index = findSectionNameEntryIndex();
696 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
697 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400698 }
699 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500700 else if(sectionHeader[i].sh_type == SHT_REL)
701 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000702 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400703
Nicolas Capens157ba262019-12-10 17:49:14 -0500704 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
705 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000706 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500707 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
708 }
709 }
710 else if(sectionHeader[i].sh_type == SHT_RELA)
711 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000712 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500713
714 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
715 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000716 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500717 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
718 }
719 }
720 }
721
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400722 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500723}
724
725template<typename T>
726struct ExecutableAllocator
727{
728 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000729 template<class U>
730 ExecutableAllocator(const ExecutableAllocator<U> &other)
731 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500732
733 using value_type = T;
734 using size_type = std::size_t;
735
736 T *allocate(size_type n)
737 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000738 return (T *)allocateMemoryPages(
739 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500740 }
741
742 void deallocate(T *p, size_type n)
743 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800744 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500745 }
746};
747
748class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
749{
750 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
751 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
752
753public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000754 ELFMemoryStreamer()
755 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500756 {
757 position = 0;
758 buffer.reserve(0x1000);
759 }
760
761 ~ELFMemoryStreamer() override
762 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500763 }
764
765 void write8(uint8_t Value) override
766 {
767 if(position == (uint64_t)buffer.size())
768 {
769 buffer.push_back(Value);
770 position++;
771 }
772 else if(position < (uint64_t)buffer.size())
773 {
774 buffer[position] = Value;
775 position++;
776 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000777 else
778 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500779 }
780
781 void writeBytes(llvm::StringRef Bytes) override
782 {
783 std::size_t oldSize = buffer.size();
784 buffer.resize(oldSize + Bytes.size());
785 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
786 position += Bytes.size();
787 }
788
Jason Macnak0587e072022-02-11 16:49:02 -0800789 uint64_t tell() const override
790 {
791 return position;
792 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500793
Jason Macnak0587e072022-02-11 16:49:02 -0800794 void seek(uint64_t Off) override
795 {
796 position = Off;
797 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500798
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400799 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500800 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400801 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500802
803#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500804 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
805#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400806 for(auto &entryPoint : entryPoints)
807 {
808 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
809 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500810#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500811
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400812 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400813 }
814
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500815 void finalize()
816 {
817 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
818
819 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
820 }
821
Ben Clayton713b8d32019-12-17 20:37:56 +0000822 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400823 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500824 ASSERT(func);
825 funcs[index] = func;
826 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400827
Nicolas Capens157ba262019-12-10 17:49:14 -0500828 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400829 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500830 ASSERT(funcs[index]);
831 return funcs[index];
832 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400833
Antonio Maiorano02a39532020-01-21 15:15:34 -0500834 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500835 {
Nicolas Capens4e75f452021-01-28 01:52:56 -0500836 // Check if we already have a suitable constant.
837 for(const auto &c : constantsPool)
838 {
839 void *ptr = c.data.get();
840 size_t space = c.space;
841
842 void *alignedPtr = std::align(alignment, size, ptr, space);
843
844 if(space < size)
845 {
846 continue;
847 }
848
849 if(memcmp(data, alignedPtr, size) == 0)
850 {
851 return alignedPtr;
852 }
853 }
854
Antonio Maiorano02a39532020-01-21 15:15:34 -0500855 // TODO(b/148086935): Replace with a buffer allocator.
856 size_t space = size + alignment;
857 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
858 void *ptr = buf.get();
859 void *alignedPtr = std::align(alignment, size, ptr, space);
860 ASSERT(alignedPtr);
861 memcpy(alignedPtr, data, size);
Nicolas Capens4e75f452021-01-28 01:52:56 -0500862 constantsPool.emplace_back(std::move(buf), space);
863
Antonio Maiorano02a39532020-01-21 15:15:34 -0500864 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500865 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400866
Nicolas Capens157ba262019-12-10 17:49:14 -0500867private:
Nicolas Capens4e75f452021-01-28 01:52:56 -0500868 struct Constant
869 {
870 Constant(std::unique_ptr<uint8_t[]> data, size_t space)
871 : data(std::move(data))
872 , space(space)
873 {}
874
875 std::unique_ptr<uint8_t[]> data;
876 size_t space;
877 };
878
Ben Clayton713b8d32019-12-17 20:37:56 +0000879 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500880 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
881 std::size_t position;
Nicolas Capens4e75f452021-01-28 01:52:56 -0500882 std::vector<Constant> constantsPool;
Nicolas Capens157ba262019-12-10 17:49:14 -0500883};
884
Antonio Maiorano62427e02020-02-13 09:18:05 -0500885#ifdef ENABLE_RR_PRINT
886void VPrintf(const std::vector<Value *> &vals)
887{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400888 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500889}
890#endif // ENABLE_RR_PRINT
891
Nicolas Capens157ba262019-12-10 17:49:14 -0500892Nucleus::Nucleus()
893{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400894 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500895
896 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
897 Ice::ClFlags::getParsedClFlags(Flags);
898
Ben Clayton713b8d32019-12-17 20:37:56 +0000899#if defined(__arm__)
900 Flags.setTargetArch(Ice::Target_ARM32);
901 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
902#elif defined(__mips__)
903 Flags.setTargetArch(Ice::Target_MIPS32);
904 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
905#else // x86
906 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
907 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
908#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500909 Flags.setOutFileType(Ice::FT_Elf);
910 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
Nicolas Capens157ba262019-12-10 17:49:14 -0500911 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
912 Flags.setDisableHybridAssembly(true);
913
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500914 // Emit functions into separate sections in the ELF so we can find them by name
915 Flags.setFunctionSections(true);
916
Nicolas Capens157ba262019-12-10 17:49:14 -0500917 static llvm::raw_os_ostream cout(std::cout);
918 static llvm::raw_os_ostream cerr(std::cerr);
919
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500920 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500921 {
922 // Decorate text asm with liveness info
923 Flags.setDecorateAsm(true);
924 }
925
Ben Clayton713b8d32019-12-17 20:37:56 +0000926 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500927 {
928 std::error_code errorCode;
929 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
930 ::elfFile = new Ice::ELFFileStreamer(*out);
931 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
932 }
933 else
934 {
935 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
936 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
937 ::routine = elfMemory;
938 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400939
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400940#if !__has_feature(memory_sanitizer)
941 // thread_local variables in shared libraries are initialized at load-time,
942 // but this is not observed by MemorySanitizer if the loader itself was not
Nicolas Capensaf907702021-05-14 11:10:49 -0400943 // instrumented, leading to false-positive uninitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400944 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400945#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500946 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500947}
948
949Nucleus::~Nucleus()
950{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400951 delete Variable::unmaterializedVariables;
952 Variable::unmaterializedVariables = nullptr;
953
Nicolas Capens157ba262019-12-10 17:49:14 -0500954 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500955 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500956
957 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500958 ::allocator = nullptr;
959
Nicolas Capens157ba262019-12-10 17:49:14 -0500960 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500961 ::function = nullptr;
962
Nicolas Capens157ba262019-12-10 17:49:14 -0500963 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500964 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500965
966 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500967 ::elfFile = nullptr;
968
Nicolas Capens157ba262019-12-10 17:49:14 -0500969 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500970 ::out = nullptr;
971
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400972 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500973 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400974 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500975
976 ::codegenMutex.unlock();
977}
978
979void Nucleus::setDefaultConfig(const Config &cfg)
980{
981 std::unique_lock<std::mutex> lock(::defaultConfigLock);
982 ::defaultConfig() = cfg;
983}
984
985void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
986{
987 std::unique_lock<std::mutex> lock(::defaultConfigLock);
988 auto &config = ::defaultConfig();
989 config = cfgEdit.apply(config);
990}
991
992Config Nucleus::getDefaultConfig()
993{
994 std::unique_lock<std::mutex> lock(::defaultConfigLock);
995 return ::defaultConfig();
996}
997
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500998// This function lowers and produces executable binary code in memory for the input functions,
999// and returns a Routine with the entry points to these functions.
1000template<size_t Count>
Sean Risser705231f2021-08-19 18:17:24 -04001001static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit *cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -05001002{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001003 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
1004 // and GlobalContext::emitItems.
1005
Nicolas Capens81bc9d92019-12-16 15:05:57 -05001006 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -05001007 {
1008 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001009 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -05001010 }
1011
1012 ::context->emitFileHeader();
1013
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001014 // Translate
1015
1016 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -05001017 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001018 Ice::Cfg *currFunc = functions[i];
1019
1020 // Install function allocator in TLS for Cfg-specific container allocators
1021 Ice::CfgLocalAllocatorScope allocScope(currFunc);
1022
1023 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
1024
Nicolas Capens54313fb2021-02-19 14:26:27 -05001025 if(::optimizerCallback)
1026 {
1027 Nucleus::OptimizerReport report;
1028 rr::optimize(currFunc, &report);
1029 ::optimizerCallback(&report);
1030 ::optimizerCallback = nullptr;
1031 }
1032 else
1033 {
1034 rr::optimize(currFunc);
1035 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001036
1037 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001038 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001039
1040 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001041 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001042
1043 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1044
1045 if(subzeroEmitTextAsm)
1046 {
1047 currFunc->emit();
1048 }
1049
1050 currFunc->emitIAS();
Nicolas Capensff010f92021-02-01 12:22:53 -05001051
1052 if(currFunc->hasError())
1053 {
1054 return nullptr;
1055 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001056 }
1057
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001058 // Emit items
1059
1060 ::context->lowerGlobals("");
1061
Nicolas Capens157ba262019-12-10 17:49:14 -05001062 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001063
1064 for(size_t i = 0; i < Count; ++i)
1065 {
1066 Ice::Cfg *currFunc = functions[i];
1067
1068 // Accumulate globals from functions to emit into the "last" section at the end
1069 auto globals = currFunc->getGlobalInits();
1070 if(globals && !globals->empty())
1071 {
1072 ::context->getGlobals()->merge(globals.get());
1073 }
1074
1075 auto assembler = currFunc->releaseAssembler();
1076 assembler->alignFunction();
1077 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1078 }
1079
Nicolas Capens157ba262019-12-10 17:49:14 -05001080 ::context->lowerGlobals("last");
1081 ::context->lowerConstants();
1082 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001083
Nicolas Capens157ba262019-12-10 17:49:14 -05001084 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001085 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001086 objectWriter->writeNonUserSections();
1087
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001088 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001089 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1090 ASSERT(entryPoints.size() == Count);
1091 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001092 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001093 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001094 }
1095
1096 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001097
1098 Routine *handoffRoutine = ::routine;
1099 ::routine = nullptr;
1100
1101 return std::shared_ptr<Routine>(handoffRoutine);
1102}
1103
Sean Risser705231f2021-08-19 18:17:24 -04001104std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit *cfgEdit /* = nullptr */)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001105{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001106 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001107 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1108}
1109
Nicolas Capens157ba262019-12-10 17:49:14 -05001110Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1111{
1112 Ice::Type type = T(t);
1113 int typeSize = Ice::typeWidthInBytes(type);
1114 int totalSize = typeSize * (arraySize ? arraySize : 1);
1115
1116 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1117 auto address = ::function->makeVariable(T(getPointerType(t)));
Nicolas Capens0cfc0432021-02-05 15:18:42 -05001118 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize); // SRoA depends on the alignment to match the type size.
Nicolas Capens157ba262019-12-10 17:49:14 -05001119 ::function->getEntryNode()->getInsts().push_front(alloca);
1120
1121 return V(address);
1122}
1123
1124BasicBlock *Nucleus::createBasicBlock()
1125{
1126 return B(::function->makeNode());
1127}
1128
1129BasicBlock *Nucleus::getInsertBlock()
1130{
1131 return B(::basicBlock);
1132}
1133
1134void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1135{
Nicolas Capens7c296ec2021-02-18 14:10:26 -05001136 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001137
1138 ::basicBlock = basicBlock;
1139}
1140
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001141void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001142{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001143 ASSERT(::function == nullptr);
1144 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001145 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001146 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001147 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001148
1149 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1150
1151 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1152 // becomes invalid if another one is created; for example, when creating await and destroy functions
1153 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1154 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001155 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1156
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001157 ::entryBlock = ::function->getEntryNode();
1158 ::basicBlock = ::function->makeNode();
1159 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001160}
1161
1162Value *Nucleus::getArgument(unsigned int index)
1163{
1164 return V(::function->getArgs()[index]);
1165}
1166
1167void Nucleus::createRetVoid()
1168{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001169 RR_DEBUG_INFO_UPDATE_LOC();
1170
Nicolas Capens157ba262019-12-10 17:49:14 -05001171 // Code generated after this point is unreachable, so any variables
1172 // being read can safely return an undefined value. We have to avoid
1173 // materializing variables after the terminator ret instruction.
1174 Variable::killUnmaterialized();
1175
1176 Ice::InstRet *ret = Ice::InstRet::create(::function);
1177 ::basicBlock->appendInst(ret);
1178}
1179
1180void Nucleus::createRet(Value *v)
1181{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001182 RR_DEBUG_INFO_UPDATE_LOC();
1183
Nicolas Capens157ba262019-12-10 17:49:14 -05001184 // Code generated after this point is unreachable, so any variables
1185 // being read can safely return an undefined value. We have to avoid
1186 // materializing variables after the terminator ret instruction.
1187 Variable::killUnmaterialized();
1188
1189 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1190 ::basicBlock->appendInst(ret);
1191}
1192
1193void Nucleus::createBr(BasicBlock *dest)
1194{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001195 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001196 Variable::materializeAll();
1197
1198 auto br = Ice::InstBr::create(::function, dest);
1199 ::basicBlock->appendInst(br);
1200}
1201
1202void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1203{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001204 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001205 Variable::materializeAll();
1206
1207 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1208 ::basicBlock->appendInst(br);
1209}
1210
1211static bool isCommutative(Ice::InstArithmetic::OpKind op)
1212{
1213 switch(op)
1214 {
Nicolas Capens112faf42019-12-13 17:32:26 -05001215 case Ice::InstArithmetic::Add:
1216 case Ice::InstArithmetic::Fadd:
1217 case Ice::InstArithmetic::Mul:
1218 case Ice::InstArithmetic::Fmul:
1219 case Ice::InstArithmetic::And:
1220 case Ice::InstArithmetic::Or:
1221 case Ice::InstArithmetic::Xor:
1222 return true;
1223 default:
1224 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001225 }
1226}
1227
1228static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1229{
1230 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1231
1232 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1233
1234 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1235 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1236 ::basicBlock->appendInst(arithmetic);
1237
1238 return V(result);
1239}
1240
1241Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1242{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001243 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001244 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1245}
1246
1247Value *Nucleus::createSub(Value *lhs, Value *rhs)
1248{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001249 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001250 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1251}
1252
1253Value *Nucleus::createMul(Value *lhs, Value *rhs)
1254{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001255 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001256 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1257}
1258
1259Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1260{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001261 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001262 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1263}
1264
1265Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1266{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001267 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001268 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1269}
1270
1271Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1272{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001273 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001274 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1275}
1276
1277Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1278{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001279 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001280 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1281}
1282
1283Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001285 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001286 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1287}
1288
1289Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001292 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1293}
1294
1295Value *Nucleus::createURem(Value *lhs, Value *rhs)
1296{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001297 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001298 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1299}
1300
1301Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1302{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001303 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001304 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1305}
1306
1307Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1308{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001309 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001310 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1311 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001312 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001313 return nullptr;
1314}
1315
Nicolas Capens157ba262019-12-10 17:49:14 -05001316Value *Nucleus::createShl(Value *lhs, Value *rhs)
1317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001319 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1320}
1321
1322Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001325 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1326}
1327
1328Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001331 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1332}
1333
1334Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001337 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1338}
1339
1340Value *Nucleus::createOr(Value *lhs, Value *rhs)
1341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001343 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1344}
1345
1346Value *Nucleus::createXor(Value *lhs, Value *rhs)
1347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001348 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001349 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1350}
1351
1352Value *Nucleus::createNeg(Value *v)
1353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001354 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001355 return createSub(createNullValue(T(v->getType())), v);
1356}
1357
1358Value *Nucleus::createFNeg(Value *v)
1359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001360 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001361 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1362 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001363
1364 return createFSub(negativeZero, v);
1365}
1366
1367Value *Nucleus::createNot(Value *v)
1368{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001369 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001370 if(Ice::isScalarIntegerType(v->getType()))
1371 {
1372 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1373 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001374 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001375 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001376 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001377 return createXor(v, createConstantVector(c, T(v->getType())));
1378 }
1379}
1380
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001381static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1382{
1383#if defined(__i386__) || defined(__x86_64__)
1384 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1385 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1386 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1387#else
1388 if(atomic)
1389 {
1390 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1391 }
1392 if(memoryOrder != std::memory_order_relaxed)
1393 {
1394 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1395 }
1396#endif
1397
1398 // Vulkan doesn't allow sequential memory order
1399 ASSERT(memoryOrder != std::memory_order_seq_cst);
1400}
1401
Nicolas Capens157ba262019-12-10 17:49:14 -05001402Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1403{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001404 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001405 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001406
1407 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001408 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001409
Ben Clayton713b8d32019-12-17 20:37:56 +00001410 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001411 {
1412 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001413 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001414 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001415 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001416 auto pointer = RValue<Pointer<Byte>>(ptr);
1417 Int x = *Pointer<Int>(pointer);
1418
1419 Int4 vector;
1420 vector = Insert(vector, x, 0);
1421
Antonio Maiorano02a39532020-01-21 15:15:34 -05001422 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001423 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1424 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001425 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001426 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001427 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001428 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001429 auto pointer = RValue<Pointer<Byte>>(ptr);
1430 Int x = *Pointer<Int>(pointer);
1431 Int y = *Pointer<Int>(pointer + 4);
1432
1433 Int4 vector;
1434 vector = Insert(vector, x, 0);
1435 vector = Insert(vector, y, 1);
1436
Antonio Maiorano02a39532020-01-21 15:15:34 -05001437 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001438 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1439 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001440 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001441 else
1442 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001443 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001444 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001445 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001446 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Antonio Maiorano02a39532020-01-21 15:15:34 -05001447 result = ::function->makeVariable(T(type));
Nicolas Capens33a77f72021-02-08 15:04:38 -05001448 auto load = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001449 load->addArg(ptr);
1450 load->addArg(::context->getConstantInt32(typeSize(type)));
1451 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001452 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001453 }
1454 else
1455 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001456 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001457 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001458
Antonio Maiorano02a39532020-01-21 15:15:34 -05001459 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001460 return V(result);
1461}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001462
Nicolas Capens157ba262019-12-10 17:49:14 -05001463Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1464{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001465 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001466 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001467
Ben Clayton713b8d32019-12-17 20:37:56 +00001468#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001469 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001470 if(align != 0)
1471 {
1472 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1473 call->addArg(ptr);
1474 call->addArg(::context->getConstantInt64(typeSize(type)));
1475 ::basicBlock->appendInst(call);
1476 }
1477#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001478
Nicolas Capens157ba262019-12-10 17:49:14 -05001479 int valueType = (int)reinterpret_cast<intptr_t>(type);
1480
Ben Clayton713b8d32019-12-17 20:37:56 +00001481 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001482 {
1483 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001484 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001485 if(typeSize(type) == 4)
1486 {
1487 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1488 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1489 ::basicBlock->appendInst(bitcast);
1490
1491 RValue<Int4> v(V(vector));
1492
1493 auto pointer = RValue<Pointer<Byte>>(ptr);
1494 Int x = Extract(v, 0);
1495 *Pointer<Int>(pointer) = x;
1496 }
1497 else if(typeSize(type) == 8)
1498 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001499 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001500 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1501 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1502 ::basicBlock->appendInst(bitcast);
1503
1504 RValue<Int4> v(V(vector));
1505
1506 auto pointer = RValue<Pointer<Byte>>(ptr);
1507 Int x = Extract(v, 0);
1508 *Pointer<Int>(pointer) = x;
1509 Int y = Extract(v, 1);
1510 *Pointer<Int>(pointer + 4) = y;
1511 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001512 else
1513 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001514 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001515 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001516 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001517 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001518 auto store = Ice::InstIntrinsic::create(::function, 3, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001519 store->addArg(value);
1520 store->addArg(ptr);
1521 store->addArg(::context->getConstantInt32(typeSize(type)));
1522 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001523 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001524 }
1525 else
1526 {
1527 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001528
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001529 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001530 ::basicBlock->appendInst(store);
1531 }
1532
1533 return value;
1534}
1535
1536Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1537{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001538 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001539 ASSERT(index->getType() == Ice::IceType_i32);
1540
1541 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1542 {
1543 int32_t offset = constant->getValue() * (int)typeSize(type);
1544
1545 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001546 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001547 return ptr;
1548 }
1549
Nicolas Capens157ba262019-12-10 17:49:14 -05001550 return createAdd(ptr, createConstantInt(offset));
1551 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001552
Nicolas Capens157ba262019-12-10 17:49:14 -05001553 if(!Ice::isByteSizedType(T(type)))
1554 {
1555 index = createMul(index, createConstantInt((int)typeSize(type)));
1556 }
1557
Ben Clayton713b8d32019-12-17 20:37:56 +00001558 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001559 {
1560 if(unsignedIndex)
1561 {
1562 index = createZExt(index, T(Ice::IceType_i64));
1563 }
1564 else
1565 {
1566 index = createSExt(index, T(Ice::IceType_i64));
1567 }
1568 }
1569
1570 return createAdd(ptr, index);
1571}
1572
Antonio Maiorano370cba52019-12-31 11:36:07 -05001573static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1574{
1575 Ice::Variable *result = ::function->makeVariable(value->getType());
1576
1577 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001578 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001579 auto op = ::context->getConstantInt32(rmwOp);
1580 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1581 inst->addArg(op);
1582 inst->addArg(ptr);
1583 inst->addArg(value);
1584 inst->addArg(order);
1585 ::basicBlock->appendInst(inst);
1586
1587 return V(result);
1588}
1589
Nicolas Capens157ba262019-12-10 17:49:14 -05001590Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1591{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001592 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001593 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001594}
1595
1596Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1597{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001598 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001599 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001600}
1601
1602Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1603{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001604 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001605 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001606}
1607
1608Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1609{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001610 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001611 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001612}
1613
1614Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1615{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001616 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001617 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001618}
1619
1620Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1621{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001622 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001623 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001624}
1625
1626Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1627{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001628 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001629 Ice::Variable *result = ::function->makeVariable(value->getType());
1630
1631 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001632 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001633 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1634 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1635 inst->addArg(ptr);
1636 inst->addArg(compare);
1637 inst->addArg(value);
1638 inst->addArg(orderEq);
1639 inst->addArg(orderNeq);
1640 ::basicBlock->appendInst(inst);
1641
1642 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001643}
1644
1645static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1646{
1647 if(v->getType() == T(destType))
1648 {
1649 return v;
1650 }
1651
1652 Ice::Variable *result = ::function->makeVariable(T(destType));
1653 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1654 ::basicBlock->appendInst(cast);
1655
1656 return V(result);
1657}
1658
1659Value *Nucleus::createTrunc(Value *v, Type *destType)
1660{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001661 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001662 return createCast(Ice::InstCast::Trunc, v, destType);
1663}
1664
1665Value *Nucleus::createZExt(Value *v, Type *destType)
1666{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001667 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001668 return createCast(Ice::InstCast::Zext, v, destType);
1669}
1670
1671Value *Nucleus::createSExt(Value *v, Type *destType)
1672{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001673 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001674 return createCast(Ice::InstCast::Sext, v, destType);
1675}
1676
1677Value *Nucleus::createFPToUI(Value *v, Type *destType)
1678{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001679 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001680 return createCast(Ice::InstCast::Fptoui, v, destType);
1681}
1682
1683Value *Nucleus::createFPToSI(Value *v, Type *destType)
1684{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001685 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001686 return createCast(Ice::InstCast::Fptosi, v, destType);
1687}
1688
1689Value *Nucleus::createSIToFP(Value *v, Type *destType)
1690{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001691 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001692 return createCast(Ice::InstCast::Sitofp, v, destType);
1693}
1694
1695Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1696{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001697 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001698 return createCast(Ice::InstCast::Fptrunc, v, destType);
1699}
1700
1701Value *Nucleus::createFPExt(Value *v, Type *destType)
1702{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001703 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001704 return createCast(Ice::InstCast::Fpext, v, destType);
1705}
1706
1707Value *Nucleus::createBitCast(Value *v, Type *destType)
1708{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001709 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001710 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1711 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1712 // emulate them by writing to the stack and reading back as the destination type.
1713 if(emulateMismatchedBitCast)
1714 {
1715 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1716 {
1717 Value *address = allocateStackVariable(destType);
1718 createStore(v, address, T(v->getType()));
1719 return createLoad(address, destType);
1720 }
1721 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1722 {
1723 Value *address = allocateStackVariable(T(v->getType()));
1724 createStore(v, address, T(v->getType()));
1725 return createLoad(address, destType);
1726 }
1727 }
1728
1729 return createCast(Ice::InstCast::Bitcast, v, destType);
1730}
1731
1732static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1733{
1734 ASSERT(lhs->getType() == rhs->getType());
1735
1736 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1737 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1738 ::basicBlock->appendInst(cmp);
1739
1740 return V(result);
1741}
1742
Nicolas Capens157ba262019-12-10 17:49:14 -05001743Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1744{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001745 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001746 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1747}
1748
1749Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1750{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001751 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001752 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1753}
1754
1755Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1756{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001757 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001758 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1759}
1760
1761Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1762{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001763 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001764 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1765}
1766
1767Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1768{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001769 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001770 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1771}
1772
1773Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1774{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001775 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001776 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1777}
1778
1779Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1780{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001781 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001782 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1783}
1784
1785Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1786{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001787 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001788 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1789}
1790
1791Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1792{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001793 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001794 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1795}
1796
1797Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1798{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001799 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001800 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1801}
1802
1803static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1804{
1805 ASSERT(lhs->getType() == rhs->getType());
1806 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1807
1808 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1809 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1810 ::basicBlock->appendInst(cmp);
1811
1812 return V(result);
1813}
1814
1815Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1816{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001817 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001818 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1819}
1820
1821Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1822{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001823 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001824 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1825}
1826
1827Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1828{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001829 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001830 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1831}
1832
1833Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1834{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001835 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001836 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1837}
1838
1839Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1840{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001841 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001842 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1843}
1844
1845Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1846{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001847 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001848 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1849}
1850
1851Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001854 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1855}
1856
1857Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1858{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001859 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001860 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1861}
1862
1863Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001866 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1867}
1868
1869Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1870{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001871 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001872 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1873}
1874
1875Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1876{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001877 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001878 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1879}
1880
1881Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1882{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001883 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001884 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1885}
1886
1887Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1888{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001889 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001890 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1891}
1892
1893Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1894{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001895 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001896 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1897}
1898
1899Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1900{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001901 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001902 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001903 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001904 ::basicBlock->appendInst(extract);
1905
1906 return V(result);
1907}
1908
1909Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1910{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001911 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001912 auto result = ::function->makeVariable(vector->getType());
1913 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1914 ::basicBlock->appendInst(insert);
1915
1916 return V(result);
1917}
1918
1919Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1920{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001921 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001922 ASSERT(V1->getType() == V2->getType());
1923
1924 int size = Ice::typeNumElements(V1->getType());
1925 auto result = ::function->makeVariable(V1->getType());
1926 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1927
1928 for(int i = 0; i < size; i++)
1929 {
1930 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1931 }
1932
1933 ::basicBlock->appendInst(shuffle);
1934
1935 return V(result);
1936}
1937
1938Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1939{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001940 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001941 ASSERT(ifTrue->getType() == ifFalse->getType());
1942
1943 auto result = ::function->makeVariable(ifTrue->getType());
1944 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1945 ::basicBlock->appendInst(select);
1946
1947 return V(result);
1948}
1949
1950SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1951{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001952 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001953 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1954 ::basicBlock->appendInst(switchInst);
1955
Ben Clayton713b8d32019-12-17 20:37:56 +00001956 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001957}
1958
1959void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1960{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001961 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001962 switchCases->addBranch(label, label, branch);
1963}
1964
1965void Nucleus::createUnreachable()
1966{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001967 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001968 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1969 ::basicBlock->appendInst(unreachable);
1970}
1971
Antonio Maiorano62427e02020-02-13 09:18:05 -05001972Type *Nucleus::getType(Value *value)
1973{
1974 return T(V(value)->getType());
1975}
1976
1977Type *Nucleus::getContainedType(Type *vectorType)
1978{
1979 Ice::Type vecTy = T(vectorType);
1980 switch(vecTy)
1981 {
Nicolas Capens112faf42019-12-13 17:32:26 -05001982 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1983 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1984 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1985 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1986 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1987 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1988 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1989 default:
1990 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1991 return {};
Antonio Maiorano62427e02020-02-13 09:18:05 -05001992 }
1993}
1994
Nicolas Capens157ba262019-12-10 17:49:14 -05001995Type *Nucleus::getPointerType(Type *ElementType)
1996{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001997 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001998}
1999
Antonio Maiorano62427e02020-02-13 09:18:05 -05002000static constexpr Ice::Type getNaturalIntType()
2001{
2002 constexpr size_t intSize = sizeof(int);
2003 static_assert(intSize == 4 || intSize == 8, "");
2004 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
2005}
2006
2007Type *Nucleus::getPrintfStorageType(Type *valueType)
2008{
2009 Ice::Type valueTy = T(valueType);
2010 switch(valueTy)
2011 {
Nicolas Capens112faf42019-12-13 17:32:26 -05002012 case Ice::IceType_i32:
2013 return T(getNaturalIntType());
Antonio Maiorano62427e02020-02-13 09:18:05 -05002014
Nicolas Capens112faf42019-12-13 17:32:26 -05002015 case Ice::IceType_f32:
2016 return T(Ice::IceType_f64);
Antonio Maiorano62427e02020-02-13 09:18:05 -05002017
Nicolas Capens112faf42019-12-13 17:32:26 -05002018 default:
2019 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
2020 return {};
Antonio Maiorano62427e02020-02-13 09:18:05 -05002021 }
2022}
2023
Nicolas Capens157ba262019-12-10 17:49:14 -05002024Value *Nucleus::createNullValue(Type *Ty)
2025{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002026 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002027 if(Ice::isVectorType(T(Ty)))
2028 {
2029 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002030 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002031 return createConstantVector(c, Ty);
2032 }
2033 else
2034 {
2035 return V(::context->getConstantZero(T(Ty)));
2036 }
2037}
2038
2039Value *Nucleus::createConstantLong(int64_t i)
2040{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002041 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002042 return V(::context->getConstantInt64(i));
2043}
2044
2045Value *Nucleus::createConstantInt(int i)
2046{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002047 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002048 return V(::context->getConstantInt32(i));
2049}
2050
2051Value *Nucleus::createConstantInt(unsigned int i)
2052{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002053 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002054 return V(::context->getConstantInt32(i));
2055}
2056
2057Value *Nucleus::createConstantBool(bool b)
2058{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002059 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002060 return V(::context->getConstantInt1(b));
2061}
2062
2063Value *Nucleus::createConstantByte(signed char i)
2064{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002065 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002066 return V(::context->getConstantInt8(i));
2067}
2068
2069Value *Nucleus::createConstantByte(unsigned char i)
2070{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002071 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002072 return V(::context->getConstantInt8(i));
2073}
2074
2075Value *Nucleus::createConstantShort(short i)
2076{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002077 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002078 return V(::context->getConstantInt16(i));
2079}
2080
2081Value *Nucleus::createConstantShort(unsigned short i)
2082{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002083 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002084 return V(::context->getConstantInt16(i));
2085}
2086
2087Value *Nucleus::createConstantFloat(float x)
2088{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002089 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002090 return V(::context->getConstantFloat(x));
2091}
2092
2093Value *Nucleus::createNullPointer(Type *Ty)
2094{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002095 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002096 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002097}
2098
Antonio Maiorano02a39532020-01-21 15:15:34 -05002099static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2100{
2101 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2102}
2103
Nicolas Capens157ba262019-12-10 17:49:14 -05002104Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2105{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002106 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002107 const int vectorSize = 16;
2108 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2109 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002110
2111 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002112 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002113
Antonio Maioranoa0957112020-03-04 15:06:19 -05002114 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002115 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002116
2117 switch((int)reinterpret_cast<intptr_t>(type))
2118 {
Nicolas Capens112faf42019-12-13 17:32:26 -05002119 case Ice::IceType_v4i32:
2120 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002121 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002122 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002123 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002124 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002125 }
2126 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002127 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002128 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002129 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002130 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002131 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002132 }
2133 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002134 case Ice::IceType_v8i16:
2135 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002136 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002137 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002138 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002139 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002140 }
2141 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002142 case Ice::IceType_v16i8:
2143 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002144 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002145 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002146 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002147 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002148 }
2149 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002150 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002151 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002152 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002153 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002154 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002155 }
2156 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002157 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002158 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002159 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002160 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002161 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002162 }
2163 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002164 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002165 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002166 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002167 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002168 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002169 }
2170 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002171 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002172 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002173 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002174 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002175 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002176 }
2177 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002178 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002179 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002180 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002181 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002182 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002183 }
2184 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002185 default:
2186 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002187 }
2188
Antonio Maiorano02a39532020-01-21 15:15:34 -05002189 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002190
Antonio Maiorano02a39532020-01-21 15:15:34 -05002191 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002192 return V(result);
2193}
2194
2195Value *Nucleus::createConstantVector(const double *constants, Type *type)
2196{
Ben Clayton713b8d32019-12-17 20:37:56 +00002197 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002198}
2199
Antonio Maiorano62427e02020-02-13 09:18:05 -05002200Value *Nucleus::createConstantString(const char *v)
2201{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002202 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002203 return V(IceConstantData(v, strlen(v) + 1));
2204}
2205
Nicolas Capens54313fb2021-02-19 14:26:27 -05002206void Nucleus::setOptimizerCallback(OptimizerCallback *callback)
2207{
2208 ::optimizerCallback = callback;
2209}
2210
Nicolas Capens519cf222020-05-08 15:27:19 -04002211Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002212{
2213 return T(Ice::IceType_void);
2214}
2215
Nicolas Capens519cf222020-05-08 15:27:19 -04002216Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002217{
2218 return T(Ice::IceType_i1);
2219}
2220
Nicolas Capens519cf222020-05-08 15:27:19 -04002221Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002222{
2223 return T(Ice::IceType_i8);
2224}
2225
Nicolas Capens519cf222020-05-08 15:27:19 -04002226Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002227{
2228 return T(Ice::IceType_i8);
2229}
2230
Nicolas Capens519cf222020-05-08 15:27:19 -04002231Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002232{
2233 return T(Ice::IceType_i16);
2234}
2235
Nicolas Capens519cf222020-05-08 15:27:19 -04002236Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002237{
2238 return T(Ice::IceType_i16);
2239}
2240
Nicolas Capens519cf222020-05-08 15:27:19 -04002241Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002242{
2243 return T(Type_v4i8);
2244}
2245
Nicolas Capens519cf222020-05-08 15:27:19 -04002246Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002247{
2248 return T(Type_v4i8);
2249}
2250
Ben Clayton713b8d32019-12-17 20:37:56 +00002251namespace {
2252RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002253{
Ben Clayton713b8d32019-12-17 20:37:56 +00002254 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002255}
2256
Ben Clayton713b8d32019-12-17 20:37:56 +00002257RValue<Byte> Extract(RValue<Byte8> val, int i)
2258{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002259 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002260}
2261
2262RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2263{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002264 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002265}
2266} // namespace
2267
Nicolas Capens157ba262019-12-10 17:49:14 -05002268RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002271 if(emulateIntrinsics)
2272 {
2273 Byte8 result;
2274 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2275 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2276 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2277 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2278 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2279 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2280 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2281 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2282
2283 return result;
2284 }
2285 else
2286 {
2287 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002288 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002289 auto paddusb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002290 paddusb->addArg(x.value());
2291 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002292 ::basicBlock->appendInst(paddusb);
2293
2294 return RValue<Byte8>(V(result));
2295 }
2296}
2297
2298RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002301 if(emulateIntrinsics)
2302 {
2303 Byte8 result;
2304 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2305 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2306 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2307 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2308 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2309 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2310 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2311 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2312
2313 return result;
2314 }
2315 else
2316 {
2317 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002318 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002319 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002320 psubusw->addArg(x.value());
2321 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002322 ::basicBlock->appendInst(psubusw);
2323
2324 return RValue<Byte8>(V(result));
2325 }
2326}
2327
2328RValue<SByte> Extract(RValue<SByte8> val, int i)
2329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002331 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002332}
2333
2334RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002337 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002338}
2339
2340RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002343 if(emulateIntrinsics)
2344 {
2345 SByte8 result;
2346 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2347 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2348 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2349 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2350 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2351 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2352 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2353 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2354
2355 return result;
2356 }
2357 else
2358 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002359#if defined(__i386__) || defined(__x86_64__)
2360 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2361 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2362 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002363
Ben Clayton713b8d32019-12-17 20:37:56 +00002364 return As<SByte8>(hi | lo);
2365#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002366 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002367#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002368 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002369}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002370
Nicolas Capens157ba262019-12-10 17:49:14 -05002371RValue<Int> SignMask(RValue<Byte8> x)
2372{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002373 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002374 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002375 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002376 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2377 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002378 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002379 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002380 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002381 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002382 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002383 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002384 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002385 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002386
Nicolas Capens157ba262019-12-10 17:49:14 -05002387 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002388 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002389}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002390
2391// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2392// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002393// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002394// }
2395
Nicolas Capens157ba262019-12-10 17:49:14 -05002396RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2397{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002398 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002399 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002400}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002401
Nicolas Capens519cf222020-05-08 15:27:19 -04002402Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002403{
2404 return T(Type_v8i8);
2405}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002406
Nicolas Capens598f8d82016-09-26 15:09:10 -04002407// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2408// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002409// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002410// }
2411
2412// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2413// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002414// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002415// }
2416
Nicolas Capens157ba262019-12-10 17:49:14 -05002417RValue<SByte> SaturateSigned(RValue<Short> x)
2418{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002419 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002420 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2421}
2422
2423RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2424{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002425 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002426 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002427 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002428 SByte8 result;
2429 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2430 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2431 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2432 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2433 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2434 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2435 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2436 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002437
Nicolas Capens157ba262019-12-10 17:49:14 -05002438 return result;
2439 }
2440 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002441 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002442 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002443 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002444 auto paddsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002445 paddsb->addArg(x.value());
2446 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002447 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002448
Nicolas Capens157ba262019-12-10 17:49:14 -05002449 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002450 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002451}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002452
Nicolas Capens157ba262019-12-10 17:49:14 -05002453RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2454{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002455 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002456 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002457 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002458 SByte8 result;
2459 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2460 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2461 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2462 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2463 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2464 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2465 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2466 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002467
Nicolas Capens157ba262019-12-10 17:49:14 -05002468 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002469 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002470 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002471 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002472 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002473 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002474 auto psubsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002475 psubsb->addArg(x.value());
2476 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002477 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002478
Nicolas Capens157ba262019-12-10 17:49:14 -05002479 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002480 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002481}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002482
Nicolas Capens157ba262019-12-10 17:49:14 -05002483RValue<Int> SignMask(RValue<SByte8> x)
2484{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002485 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002486 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002487 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002488 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2489 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002490 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002491 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002492 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002493 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002494 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002495 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002496 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002497 ::basicBlock->appendInst(movmsk);
2498
2499 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002500 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002501}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002502
Nicolas Capens157ba262019-12-10 17:49:14 -05002503RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2504{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002505 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002506 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002507}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002508
Nicolas Capens157ba262019-12-10 17:49:14 -05002509RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2510{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002511 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002512 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002513}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002514
Nicolas Capens519cf222020-05-08 15:27:19 -04002515Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002516{
2517 return T(Type_v8i8);
2518}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002519
Nicolas Capens519cf222020-05-08 15:27:19 -04002520Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002521{
2522 return T(Ice::IceType_v16i8);
2523}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002524
Nicolas Capens519cf222020-05-08 15:27:19 -04002525Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002526{
2527 return T(Ice::IceType_v16i8);
2528}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002529
Nicolas Capens519cf222020-05-08 15:27:19 -04002530Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002531{
2532 return T(Type_v2i16);
2533}
Nicolas Capensd4227962016-11-09 14:24:25 -05002534
Nicolas Capens519cf222020-05-08 15:27:19 -04002535Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002536{
2537 return T(Type_v2i16);
2538}
Nicolas Capensd4227962016-11-09 14:24:25 -05002539
Nicolas Capens157ba262019-12-10 17:49:14 -05002540Short4::Short4(RValue<Int4> cast)
2541{
Ben Clayton713b8d32019-12-17 20:37:56 +00002542 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002543 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002544 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2545
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002546 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002547 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002548
2549 storeValue(short4);
2550}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002551
2552// Short4::Short4(RValue<Float> cast)
2553// {
2554// }
2555
Nicolas Capens157ba262019-12-10 17:49:14 -05002556Short4::Short4(RValue<Float4> cast)
2557{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002558 // TODO(b/150791192): Generalize and optimize
2559 auto smin = std::numeric_limits<short>::min();
2560 auto smax = std::numeric_limits<short>::max();
2561 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002562}
2563
2564RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2565{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002566 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002567 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002568 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002569 Short4 result;
2570 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2571 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2572 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2573 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002574
2575 return result;
2576 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002577 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002578 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002579 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002580 }
2581}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002582
Nicolas Capens157ba262019-12-10 17:49:14 -05002583RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2584{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002585 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002586 if(emulateIntrinsics)
2587 {
2588 Short4 result;
2589 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2590 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2591 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2592 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2593
2594 return result;
2595 }
2596 else
2597 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002598 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002599 }
2600}
2601
2602RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2603{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002604 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002605 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002606 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002607 ::basicBlock->appendInst(cmp);
2608
2609 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002610 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002611 ::basicBlock->appendInst(select);
2612
2613 return RValue<Short4>(V(result));
2614}
2615
2616RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2617{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002618 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002619 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002620 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002621 ::basicBlock->appendInst(cmp);
2622
2623 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002624 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002625 ::basicBlock->appendInst(select);
2626
2627 return RValue<Short4>(V(result));
2628}
2629
2630RValue<Short> SaturateSigned(RValue<Int> x)
2631{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002632 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002633 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2634}
2635
2636RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2637{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002638 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002639 if(emulateIntrinsics)
2640 {
2641 Short4 result;
2642 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2643 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2644 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2645 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2646
2647 return result;
2648 }
2649 else
2650 {
2651 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002652 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002653 auto paddsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002654 paddsw->addArg(x.value());
2655 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002656 ::basicBlock->appendInst(paddsw);
2657
2658 return RValue<Short4>(V(result));
2659 }
2660}
2661
2662RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2663{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002664 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002665 if(emulateIntrinsics)
2666 {
2667 Short4 result;
2668 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2669 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2670 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2671 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2672
2673 return result;
2674 }
2675 else
2676 {
2677 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002678 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002679 auto psubsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002680 psubsw->addArg(x.value());
2681 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002682 ::basicBlock->appendInst(psubsw);
2683
2684 return RValue<Short4>(V(result));
2685 }
2686}
2687
2688RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2689{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002690 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002691 if(emulateIntrinsics)
2692 {
2693 Short4 result;
2694 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2695 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2696 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2697 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2698
2699 return result;
2700 }
2701 else
2702 {
2703 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002704 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002705 auto pmulhw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002706 pmulhw->addArg(x.value());
2707 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002708 ::basicBlock->appendInst(pmulhw);
2709
2710 return RValue<Short4>(V(result));
2711 }
2712}
2713
2714RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2715{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002716 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002717 if(emulateIntrinsics)
2718 {
2719 Int2 result;
2720 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2721 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2722
2723 return result;
2724 }
2725 else
2726 {
2727 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002728 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002729 auto pmaddwd = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002730 pmaddwd->addArg(x.value());
2731 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002732 ::basicBlock->appendInst(pmaddwd);
2733
2734 return As<Int2>(V(result));
2735 }
2736}
2737
2738RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2739{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002740 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002741 if(emulateIntrinsics)
2742 {
2743 SByte8 result;
2744 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2745 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2746 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2747 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2748 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2749 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2750 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2751 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2752
2753 return result;
2754 }
2755 else
2756 {
2757 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002758 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002759 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002760 pack->addArg(x.value());
2761 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002762 ::basicBlock->appendInst(pack);
2763
2764 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2765 }
2766}
2767
2768RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2769{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002770 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002771 if(emulateIntrinsics)
2772 {
2773 Byte8 result;
2774 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2775 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2776 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2777 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2778 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2779 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2780 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2781 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2782
2783 return result;
2784 }
2785 else
2786 {
2787 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002788 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002789 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002790 pack->addArg(x.value());
2791 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002792 ::basicBlock->appendInst(pack);
2793
2794 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2795 }
2796}
2797
2798RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2799{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002800 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002801 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002802}
2803
2804RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2805{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002806 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002807 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002808}
2809
Nicolas Capens519cf222020-05-08 15:27:19 -04002810Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002811{
2812 return T(Type_v4i16);
2813}
2814
2815UShort4::UShort4(RValue<Float4> cast, bool saturate)
2816{
2817 if(saturate)
2818 {
2819 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002820 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002821 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2822 // PackUnsigned takes care of 0x0000 saturation.
2823 Int4 int4(Min(cast, Float4(0xFFFF)));
2824 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002825 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002826 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002827 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002828 // ARM saturates the 32-bit integer result on overflow/undeflow.
2829 Int4 int4(cast);
2830 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002831 }
2832 else
2833 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002834 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002835 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002836 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002837 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002838 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002839 *this = Short4(Int4(cast));
2840 }
2841}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002842
Nicolas Capens157ba262019-12-10 17:49:14 -05002843RValue<UShort> Extract(RValue<UShort4> val, int i)
2844{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002845 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002846}
2847
Nicolas Capens157ba262019-12-10 17:49:14 -05002848RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2849{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002850 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002851 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002852
Nicolas Capens157ba262019-12-10 17:49:14 -05002853 {
2854 UShort4 result;
2855 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2856 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2857 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2858 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2859
2860 return result;
2861 }
2862 else
2863 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002864 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002865 }
2866}
2867
2868RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2869{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002870 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002871 if(emulateIntrinsics)
2872 {
2873 UShort4 result;
2874 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2875 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2876 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2877 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2878
2879 return result;
2880 }
2881 else
2882 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002883 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002884 }
2885}
2886
2887RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2888{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002889 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002890 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002891 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002892 ::basicBlock->appendInst(cmp);
2893
2894 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002895 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002896 ::basicBlock->appendInst(select);
2897
2898 return RValue<UShort4>(V(result));
2899}
2900
2901RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2902{
2903 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002904 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002905 ::basicBlock->appendInst(cmp);
2906
2907 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002908 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002909 ::basicBlock->appendInst(select);
2910
2911 return RValue<UShort4>(V(result));
2912}
2913
2914RValue<UShort> SaturateUnsigned(RValue<Int> x)
2915{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002916 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002917 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2918}
2919
2920RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2921{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002922 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002923 if(emulateIntrinsics)
2924 {
2925 UShort4 result;
2926 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2927 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2928 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2929 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2930
2931 return result;
2932 }
2933 else
2934 {
2935 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002936 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002937 auto paddusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002938 paddusw->addArg(x.value());
2939 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002940 ::basicBlock->appendInst(paddusw);
2941
2942 return RValue<UShort4>(V(result));
2943 }
2944}
2945
2946RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2947{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002948 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002949 if(emulateIntrinsics)
2950 {
2951 UShort4 result;
2952 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2953 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2954 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2955 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2956
2957 return result;
2958 }
2959 else
2960 {
2961 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002962 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002963 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002964 psubusw->addArg(x.value());
2965 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002966 ::basicBlock->appendInst(psubusw);
2967
2968 return RValue<UShort4>(V(result));
2969 }
2970}
2971
2972RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2973{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002974 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002975 if(emulateIntrinsics)
2976 {
2977 UShort4 result;
2978 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2979 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2980 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2981 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2982
2983 return result;
2984 }
2985 else
2986 {
2987 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002988 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002989 auto pmulhuw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002990 pmulhuw->addArg(x.value());
2991 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002992 ::basicBlock->appendInst(pmulhuw);
2993
2994 return RValue<UShort4>(V(result));
2995 }
2996}
2997
2998RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2999{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003000 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003001 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3002
3003 // Scalarized implementation.
3004 Int4 result;
3005 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
3006 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
3007 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
3008 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
3009
3010 return result;
3011}
3012
3013RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3014{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003015 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003016 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3017
3018 if(false) // Partial product based implementation.
3019 {
3020 auto xh = x >> 16;
3021 auto yh = y >> 16;
3022 auto xl = x & UInt4(0x0000FFFF);
3023 auto yl = y & UInt4(0x0000FFFF);
3024 auto xlyh = xl * yh;
3025 auto xhyl = xh * yl;
3026 auto xlyhh = xlyh >> 16;
3027 auto xhylh = xhyl >> 16;
3028 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3029 auto xhyll = xhyl & UInt4(0x0000FFFF);
3030 auto xlylh = (xl * yl) >> 16;
3031 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3032
3033 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003034 }
3035
Nicolas Capens157ba262019-12-10 17:49:14 -05003036 // Scalarized implementation.
3037 Int4 result;
3038 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3039 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3040 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3041 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3042
3043 return As<UInt4>(result);
3044}
3045
3046RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3047{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003048 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003049 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003050 return UShort4(0);
3051}
3052
Nicolas Capens519cf222020-05-08 15:27:19 -04003053Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003054{
3055 return T(Type_v4i16);
3056}
3057
3058RValue<Short> Extract(RValue<Short8> val, int i)
3059{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003060 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003061 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003062}
3063
3064RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3065{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003066 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003067 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003068}
3069
3070RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3071{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003072 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003073 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003074 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003075 Short8 result;
3076 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3077 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3078 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3079 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3080 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3081 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3082 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3083 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003084
Nicolas Capens157ba262019-12-10 17:49:14 -05003085 return result;
3086 }
3087 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003088 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003089 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003090 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003091}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003092
Nicolas Capens157ba262019-12-10 17:49:14 -05003093RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3094{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003095 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003096 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003097 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003098 Short8 result;
3099 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3100 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3101 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3102 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3103 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3104 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3105 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3106 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003107
Nicolas Capens157ba262019-12-10 17:49:14 -05003108 return result;
3109 }
3110 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003111 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003112 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003113 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003114}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003115
Nicolas Capens157ba262019-12-10 17:49:14 -05003116RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3117{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003118 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003119 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003120 return Int4(0);
3121}
3122
3123RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3124{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003125 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003126 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003127 return Short8(0);
3128}
3129
Nicolas Capens519cf222020-05-08 15:27:19 -04003130Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003131{
3132 return T(Ice::IceType_v8i16);
3133}
3134
3135RValue<UShort> Extract(RValue<UShort8> val, int i)
3136{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003137 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003138 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003139}
3140
3141RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3142{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003143 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003144 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003145}
3146
3147RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3148{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003149 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003150 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003151 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003152 UShort8 result;
3153 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3154 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3155 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3156 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3157 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3158 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3159 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3160 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003161
Nicolas Capens157ba262019-12-10 17:49:14 -05003162 return result;
3163 }
3164 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003165 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003166 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003167 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003168}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003169
Nicolas Capens157ba262019-12-10 17:49:14 -05003170RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3171{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003172 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003173 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003174 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003175 UShort8 result;
3176 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3177 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3178 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3179 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3180 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3181 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3182 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3183 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003184
Nicolas Capens157ba262019-12-10 17:49:14 -05003185 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003186 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003187 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003188 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003189 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003190 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003191}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003192
Nicolas Capens157ba262019-12-10 17:49:14 -05003193RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3194{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003195 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003196 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003197 return UShort8(0);
3198}
3199
Nicolas Capens519cf222020-05-08 15:27:19 -04003200Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003201{
3202 return T(Ice::IceType_v8i16);
3203}
3204
Ben Clayton713b8d32019-12-17 20:37:56 +00003205RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003206{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003207 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003208 RValue<Int> res = val;
3209 val += 1;
3210 return res;
3211}
3212
Ben Clayton713b8d32019-12-17 20:37:56 +00003213const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003214{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003215 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003216 val += 1;
3217 return val;
3218}
3219
Ben Clayton713b8d32019-12-17 20:37:56 +00003220RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003221{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003222 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003223 RValue<Int> res = val;
3224 val -= 1;
3225 return res;
3226}
3227
Ben Clayton713b8d32019-12-17 20:37:56 +00003228const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003229{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003230 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003231 val -= 1;
3232 return val;
3233}
3234
3235RValue<Int> RoundInt(RValue<Float> cast)
3236{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003237 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003238 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003239 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003240 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3241 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003242 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003243 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003244 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003245 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003246 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003247 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003248 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003249 ::basicBlock->appendInst(nearbyint);
3250
3251 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003252 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003253}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003254
Nicolas Capens519cf222020-05-08 15:27:19 -04003255Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003256{
3257 return T(Ice::IceType_i32);
3258}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003259
Nicolas Capens519cf222020-05-08 15:27:19 -04003260Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003261{
3262 return T(Ice::IceType_i64);
3263}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003264
Nicolas Capens157ba262019-12-10 17:49:14 -05003265UInt::UInt(RValue<Float> cast)
3266{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003267 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003268 // Smallest positive value representable in UInt, but not in Int
3269 const unsigned int ustart = 0x80000000u;
3270 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003271
Nicolas Capens157ba262019-12-10 17:49:14 -05003272 // If the value is negative, store 0, otherwise store the result of the conversion
3273 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003274 // Check if the value can be represented as an Int
3275 IfThenElse(cast >= ustartf,
3276 // If the value is too large, subtract ustart and re-add it after conversion.
3277 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3278 // Otherwise, just convert normally
3279 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003280 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003281}
Nicolas Capensa8086512016-11-07 17:32:17 -05003282
Ben Clayton713b8d32019-12-17 20:37:56 +00003283RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003285 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003286 RValue<UInt> res = val;
3287 val += 1;
3288 return res;
3289}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003290
Ben Clayton713b8d32019-12-17 20:37:56 +00003291const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003292{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003293 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003294 val += 1;
3295 return val;
3296}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003297
Ben Clayton713b8d32019-12-17 20:37:56 +00003298RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003301 RValue<UInt> res = val;
3302 val -= 1;
3303 return res;
3304}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003305
Ben Clayton713b8d32019-12-17 20:37:56 +00003306const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003307{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003308 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003309 val -= 1;
3310 return val;
3311}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003312
Nicolas Capens598f8d82016-09-26 15:09:10 -04003313// RValue<UInt> RoundUInt(RValue<Float> cast)
3314// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003315// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003316// }
3317
Nicolas Capens519cf222020-05-08 15:27:19 -04003318Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003319{
3320 return T(Ice::IceType_i32);
3321}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003322
3323// Int2::Int2(RValue<Int> cast)
3324// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003325// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003326// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003327//
3328// Constant *shuffle[2];
3329// shuffle[0] = Nucleus::createConstantInt(0);
3330// shuffle[1] = Nucleus::createConstantInt(0);
3331//
Nicolas Capens519cf222020-05-08 15:27:19 -04003332// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003333//
3334// storeValue(replicate);
3335// }
3336
Nicolas Capens157ba262019-12-10 17:49:14 -05003337RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3338{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003339 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003340 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003341 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003342 Int2 result;
3343 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3344 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003345
Nicolas Capens157ba262019-12-10 17:49:14 -05003346 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003347 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003348 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003349 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003350 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003351 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003352}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003353
Nicolas Capens157ba262019-12-10 17:49:14 -05003354RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3355{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003356 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003357 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003358 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003359 Int2 result;
3360 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3361 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3362
3363 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003364 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003365 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003366 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003367 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003368 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003369}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003370
Nicolas Capens519cf222020-05-08 15:27:19 -04003371Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003372{
3373 return T(Type_v2i32);
3374}
3375
3376RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3377{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003378 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003379 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003380 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003381 UInt2 result;
3382 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3383 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003384
Nicolas Capens157ba262019-12-10 17:49:14 -05003385 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003386 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003387 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003388 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003389 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003390 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003391}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003392
Nicolas Capens157ba262019-12-10 17:49:14 -05003393RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3394{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003395 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003396 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003397 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003398 UInt2 result;
3399 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3400 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003401
Nicolas Capens157ba262019-12-10 17:49:14 -05003402 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003403 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003404 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003405 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003406 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003407 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003408}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003409
Nicolas Capens519cf222020-05-08 15:27:19 -04003410Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003411{
3412 return T(Type_v2i32);
3413}
3414
Ben Clayton713b8d32019-12-17 20:37:56 +00003415Int4::Int4(RValue<Byte4> cast)
3416 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003418 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003419 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003420 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3421
3422 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003423 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003424 Value *b = Nucleus::createBitCast(a, Byte16::type());
3425 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003426
Ben Clayton713b8d32019-12-17 20:37:56 +00003427 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003428 Value *d = Nucleus::createBitCast(c, Short8::type());
3429 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003430
Nicolas Capens519cf222020-05-08 15:27:19 -04003431 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003432 storeValue(f);
3433}
3434
Ben Clayton713b8d32019-12-17 20:37:56 +00003435Int4::Int4(RValue<SByte4> cast)
3436 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003437{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003438 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003439 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003440 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3441
Ben Clayton713b8d32019-12-17 20:37:56 +00003442 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003443 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003444 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3445
Ben Clayton713b8d32019-12-17 20:37:56 +00003446 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003447 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003448 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3449
3450 *this = As<Int4>(e) >> 24;
3451}
3452
Ben Clayton713b8d32019-12-17 20:37:56 +00003453Int4::Int4(RValue<Short4> cast)
3454 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003455{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003456 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003457 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003458 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003459
3460 *this = As<Int4>(c) >> 16;
3461}
3462
Ben Clayton713b8d32019-12-17 20:37:56 +00003463Int4::Int4(RValue<UShort4> cast)
3464 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003465{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003466 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003467 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003468 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003469 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003470 storeValue(d);
3471}
3472
Ben Clayton713b8d32019-12-17 20:37:56 +00003473Int4::Int4(RValue<Int> rhs)
3474 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003475{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003476 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003477 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003478
Ben Clayton713b8d32019-12-17 20:37:56 +00003479 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003480 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3481
3482 storeValue(replicate);
3483}
3484
3485RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3486{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003487 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003488 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003489 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003490 Int4 result;
3491 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3492 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3493 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3494 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003495
Nicolas Capens157ba262019-12-10 17:49:14 -05003496 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003497 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003498 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003499 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003500 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003501 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003502}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003503
Nicolas Capens157ba262019-12-10 17:49:14 -05003504RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3505{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003506 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003507 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003508 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003509 Int4 result;
3510 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3511 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3512 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3513 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003514
Nicolas Capens157ba262019-12-10 17:49:14 -05003515 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003516 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003517 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003518 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003519 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003520 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003521}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003522
Nicolas Capens157ba262019-12-10 17:49:14 -05003523RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3524{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003525 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003526 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003527}
3528
3529RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3530{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003531 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003532 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003533}
3534
3535RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3536{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003537 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003538 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003539}
3540
3541RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3542{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003543 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003544 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003545}
3546
3547RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3548{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003549 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003550 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003551}
3552
3553RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3554{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003555 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003556 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003557}
3558
Nicolas Capens629bf952022-01-18 15:08:14 -05003559RValue<Int4> Abs(RValue<Int4> x)
3560{
3561 // TODO: Optimize.
3562 auto negative = x >> 31;
3563 return (x ^ negative) - negative;
3564}
3565
Nicolas Capens157ba262019-12-10 17:49:14 -05003566RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3567{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003568 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003569 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003570 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003571 ::basicBlock->appendInst(cmp);
3572
3573 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003574 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003575 ::basicBlock->appendInst(select);
3576
3577 return RValue<Int4>(V(result));
3578}
3579
3580RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3581{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003582 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003583 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003584 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003585 ::basicBlock->appendInst(cmp);
3586
3587 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003588 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003589 ::basicBlock->appendInst(select);
3590
3591 return RValue<Int4>(V(result));
3592}
3593
3594RValue<Int4> RoundInt(RValue<Float4> cast)
3595{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003596 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003597 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003598 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003599 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3600 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003601 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003602 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003603 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003604 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003605 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003606 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003607 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003608 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003609
3610 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003611 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003612}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003613
Nicolas Capenseeb81842021-01-12 17:44:40 -05003614RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3615{
3616 RR_DEBUG_INFO_UPDATE_LOC();
3617
3618 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3619 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3620 // saturate to 0x80000000.
3621 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3622
3623 if(emulateIntrinsics || CPUID::ARM)
3624 {
3625 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3626 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3627 }
3628 else
3629 {
3630 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3631 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003632 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capenseeb81842021-01-12 17:44:40 -05003633 nearbyint->addArg(clamped.value());
3634 ::basicBlock->appendInst(nearbyint);
3635
3636 return RValue<Int4>(V(result));
3637 }
3638}
3639
Nicolas Capens157ba262019-12-10 17:49:14 -05003640RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3641{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003642 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003643 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003644 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003645 Short8 result;
3646 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3647 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3648 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3649 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3650 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3651 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3652 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3653 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003654
Nicolas Capens157ba262019-12-10 17:49:14 -05003655 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003656 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003657 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003658 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003659 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003660 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003661 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003662 pack->addArg(x.value());
3663 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003664 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003665
Nicolas Capens157ba262019-12-10 17:49:14 -05003666 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003667 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003668}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003669
Nicolas Capens157ba262019-12-10 17:49:14 -05003670RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3671{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003672 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003673 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003674 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003675 RValue<Int4> sx = As<Int4>(x);
3676 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003677
Nicolas Capens157ba262019-12-10 17:49:14 -05003678 RValue<Int4> sy = As<Int4>(y);
3679 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003680
Nicolas Capens157ba262019-12-10 17:49:14 -05003681 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003682 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003683 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003684 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003685 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003686 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003687 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003688 pack->addArg(x.value());
3689 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003690 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003691
Nicolas Capens157ba262019-12-10 17:49:14 -05003692 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003693 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003694}
Nicolas Capens33438a62017-09-27 11:47:35 -04003695
Nicolas Capens157ba262019-12-10 17:49:14 -05003696RValue<Int> SignMask(RValue<Int4> x)
3697{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003698 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003699 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003700 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003701 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3702 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003703 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003704 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003705 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003706 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003707 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003708 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003709 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003710 ::basicBlock->appendInst(movmsk);
3711
3712 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003713 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003714}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003715
Nicolas Capens519cf222020-05-08 15:27:19 -04003716Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003717{
3718 return T(Ice::IceType_v4i32);
3719}
3720
Ben Clayton713b8d32019-12-17 20:37:56 +00003721UInt4::UInt4(RValue<Float4> cast)
3722 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003723{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003724 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003725 // Smallest positive value representable in UInt, but not in Int
3726 const unsigned int ustart = 0x80000000u;
3727 const float ustartf = float(ustart);
3728
3729 // Check if the value can be represented as an Int
3730 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3731 // If the value is too large, subtract ustart and re-add it after conversion.
3732 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003733 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003734 (~uiValue & Int4(cast));
3735 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003736 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003737}
3738
Ben Clayton713b8d32019-12-17 20:37:56 +00003739UInt4::UInt4(RValue<UInt> rhs)
3740 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003741{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003742 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003743 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003744
Ben Clayton713b8d32019-12-17 20:37:56 +00003745 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003746 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3747
3748 storeValue(replicate);
3749}
3750
3751RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3752{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003753 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003754 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003755 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003756 UInt4 result;
3757 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3758 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3759 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3760 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003761
Nicolas Capens157ba262019-12-10 17:49:14 -05003762 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003763 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003764 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003765 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003766 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003767 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003768}
Ben Clayton88816fa2019-05-15 17:08:14 +01003769
Nicolas Capens157ba262019-12-10 17:49:14 -05003770RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3771{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003772 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003773 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003774 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003775 UInt4 result;
3776 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3777 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3778 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3779 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003780
Nicolas Capens157ba262019-12-10 17:49:14 -05003781 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003782 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003783 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003784 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003785 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003786 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003787}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003788
Nicolas Capens157ba262019-12-10 17:49:14 -05003789RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003792 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003793}
3794
3795RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003798 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003799}
3800
3801RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3802{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003803 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003804 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003805}
3806
3807RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3808{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003809 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003810 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003811}
3812
3813RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3814{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003815 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003816 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003817}
3818
3819RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3820{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003821 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003822 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003823}
3824
3825RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3826{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003827 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003828 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003829 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003830 ::basicBlock->appendInst(cmp);
3831
3832 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003833 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003834 ::basicBlock->appendInst(select);
3835
3836 return RValue<UInt4>(V(result));
3837}
3838
3839RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3840{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003841 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003842 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003843 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003844 ::basicBlock->appendInst(cmp);
3845
3846 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003847 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003848 ::basicBlock->appendInst(select);
3849
3850 return RValue<UInt4>(V(result));
3851}
3852
Nicolas Capens519cf222020-05-08 15:27:19 -04003853Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003854{
3855 return T(Ice::IceType_v4i32);
3856}
3857
Nicolas Capens519cf222020-05-08 15:27:19 -04003858Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003859{
3860 return T(Ice::IceType_i16);
3861}
3862
3863RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003866 return 1.0f / x;
3867}
3868
3869RValue<Float> RcpSqrt_pp(RValue<Float> x)
3870{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003871 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003872 return Rcp_pp(Sqrt(x));
3873}
3874
3875RValue<Float> Sqrt(RValue<Float> x)
3876{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003877 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003878 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003879 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003880 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003881 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003882 ::basicBlock->appendInst(sqrt);
3883
3884 return RValue<Float>(V(result));
3885}
3886
3887RValue<Float> Round(RValue<Float> x)
3888{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003889 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003890 return Float4(Round(Float4(x))).x;
3891}
3892
3893RValue<Float> Trunc(RValue<Float> x)
3894{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003895 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003896 return Float4(Trunc(Float4(x))).x;
3897}
3898
3899RValue<Float> Frac(RValue<Float> x)
3900{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003901 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003902 return Float4(Frac(Float4(x))).x;
3903}
3904
3905RValue<Float> Floor(RValue<Float> x)
3906{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003907 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003908 return Float4(Floor(Float4(x))).x;
3909}
3910
3911RValue<Float> Ceil(RValue<Float> x)
3912{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003913 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003914 return Float4(Ceil(Float4(x))).x;
3915}
3916
Nicolas Capens519cf222020-05-08 15:27:19 -04003917Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003918{
3919 return T(Ice::IceType_f32);
3920}
3921
Nicolas Capens519cf222020-05-08 15:27:19 -04003922Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003923{
3924 return T(Type_v2f32);
3925}
3926
Ben Clayton713b8d32019-12-17 20:37:56 +00003927Float4::Float4(RValue<Float> rhs)
3928 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003929{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003930 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003931 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003932
Ben Clayton713b8d32019-12-17 20:37:56 +00003933 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003934 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3935
3936 storeValue(replicate);
3937}
3938
Nicolas Capens4ee53092022-02-05 01:53:12 -05003939// Call single arg function on a vector type
3940template<typename Func, typename T>
3941static RValue<T> call4(Func func, const RValue<T> &x)
3942{
3943 T result;
3944 result = Insert(result, Call(func, Extract(x, 0)), 0);
3945 result = Insert(result, Call(func, Extract(x, 1)), 1);
3946 result = Insert(result, Call(func, Extract(x, 2)), 2);
3947 result = Insert(result, Call(func, Extract(x, 3)), 3);
3948 return result;
3949}
3950
3951// Call two arg function on a vector type
3952template<typename Func, typename T>
3953static RValue<T> call4(Func func, const RValue<T> &x, const RValue<T> &y)
3954{
3955 T result;
3956 result = Insert(result, Call(func, Extract(x, 0), Extract(y, 0)), 0);
3957 result = Insert(result, Call(func, Extract(x, 1), Extract(y, 1)), 1);
3958 result = Insert(result, Call(func, Extract(x, 2), Extract(y, 2)), 2);
3959 result = Insert(result, Call(func, Extract(x, 3), Extract(y, 3)), 3);
3960 return result;
3961}
3962
3963// Call three arg function on a vector type
3964template<typename Func, typename T>
3965static RValue<T> call4(Func func, const RValue<T> &x, const RValue<T> &y, const RValue<T> &z)
3966{
3967 T result;
3968 result = Insert(result, Call(func, Extract(x, 0), Extract(y, 0), Extract(z, 0)), 0);
3969 result = Insert(result, Call(func, Extract(x, 1), Extract(y, 1), Extract(z, 1)), 1);
3970 result = Insert(result, Call(func, Extract(x, 2), Extract(y, 2), Extract(z, 2)), 2);
3971 result = Insert(result, Call(func, Extract(x, 3), Extract(y, 3), Extract(z, 3)), 3);
3972 return result;
3973}
3974
3975RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
3976{
3977 return call4(fmodf, lhs, rhs);
3978}
3979
Nicolas Capensbc74bc22022-01-26 10:47:00 -05003980RValue<Float4> MulAdd(RValue<Float4> x, RValue<Float4> y, RValue<Float4> z)
3981{
3982 // TODO(b/214591655): Use FMA when available.
3983 return x * y + z;
3984}
3985
Nicolas Capens75d79f22022-01-31 17:46:26 -05003986RValue<Float4> FMA(RValue<Float4> x, RValue<Float4> y, RValue<Float4> z)
3987{
3988 // TODO(b/214591655): Use FMA instructions when available.
Nicolas Capens4ee53092022-02-05 01:53:12 -05003989 return call4(fmaf, x, y, z);
Nicolas Capens75d79f22022-01-31 17:46:26 -05003990}
3991
Nicolas Capens629bf952022-01-18 15:08:14 -05003992RValue<Float4> Abs(RValue<Float4> x)
3993{
3994 // TODO: Optimize.
3995 Value *vector = Nucleus::createBitCast(x.value(), Int4::type());
3996 int64_t constantVector[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
3997 Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, Int4::type()));
3998
3999 return As<Float4>(result);
4000}
4001
Nicolas Capens157ba262019-12-10 17:49:14 -05004002RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
4003{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004004 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004005 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004006 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004007 ::basicBlock->appendInst(cmp);
4008
4009 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004010 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004011 ::basicBlock->appendInst(select);
4012
4013 return RValue<Float4>(V(result));
4014}
4015
4016RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
4017{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004018 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004019 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004020 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004021 ::basicBlock->appendInst(cmp);
4022
4023 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004024 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004025 ::basicBlock->appendInst(select);
4026
4027 return RValue<Float4>(V(result));
4028}
4029
4030RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
4031{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004032 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004033 return Float4(1.0f) / x;
4034}
4035
4036RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
4037{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004038 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004039 return Rcp_pp(Sqrt(x));
4040}
4041
Antonio Maioranod1561872020-12-14 14:03:53 -05004042bool HasRcpApprox()
4043{
4044 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4045 return false;
4046}
4047
4048RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
4049{
4050 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4051 UNREACHABLE("RValue<Float4> RcpApprox()");
4052 return { 0.0f };
4053}
4054
4055RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
4056{
4057 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4058 UNREACHABLE("RValue<Float> RcpApprox()");
4059 return { 0.0f };
4060}
4061
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05004062bool HasRcpSqrtApprox()
4063{
4064 return false;
4065}
4066
4067RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
4068{
4069 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4070 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
4071 return { 0.0f };
4072}
4073
4074RValue<Float> RcpSqrtApprox(RValue<Float> x)
4075{
4076 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4077 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
4078 return { 0.0f };
4079}
4080
Nicolas Capens157ba262019-12-10 17:49:14 -05004081RValue<Float4> Sqrt(RValue<Float4> x)
4082{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004083 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004084 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04004085 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004086 Float4 result;
4087 result.x = Sqrt(Float(Float4(x).x));
4088 result.y = Sqrt(Float(Float4(x).y));
4089 result.z = Sqrt(Float(Float4(x).z));
4090 result.w = Sqrt(Float(Float4(x).w));
4091
4092 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04004093 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004094 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04004095 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004096 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004097 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004098 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004099 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04004100 ::basicBlock->appendInst(sqrt);
4101
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04004102 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04004103 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04004104}
Nicolas Capens157ba262019-12-10 17:49:14 -05004105
4106RValue<Int> SignMask(RValue<Float4> x)
4107{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004108 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004109 if(emulateIntrinsics || CPUID::ARM)
4110 {
4111 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
4112 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
4113 }
4114 else
4115 {
4116 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004117 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004118 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004119 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004120 ::basicBlock->appendInst(movmsk);
4121
4122 return RValue<Int>(V(result));
4123 }
4124}
4125
4126RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
4127{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004128 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004129 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004130}
4131
4132RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
4133{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004134 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004135 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004136}
4137
4138RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4139{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004140 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004141 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004142}
4143
4144RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4145{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004146 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004147 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004148}
4149
4150RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4151{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004152 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004153 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004154}
4155
4156RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4157{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004158 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004159 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004160}
4161
4162RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4163{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004164 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004165 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004166}
4167
4168RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4169{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004170 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004171 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004172}
4173
4174RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4175{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004176 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004177 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004178}
4179
4180RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4181{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004182 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004183 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004184}
4185
4186RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4187{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004188 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004189 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004190}
4191
4192RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4193{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004194 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004195 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004196}
4197
4198RValue<Float4> Round(RValue<Float4> x)
4199{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004200 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004201 if(emulateIntrinsics || CPUID::ARM)
4202 {
4203 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4204 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4205 }
4206 else if(CPUID::SSE4_1)
4207 {
4208 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004209 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004210 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004211 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004212 round->addArg(::context->getConstantInt32(0));
4213 ::basicBlock->appendInst(round);
4214
4215 return RValue<Float4>(V(result));
4216 }
4217 else
4218 {
4219 return Float4(RoundInt(x));
4220 }
4221}
4222
4223RValue<Float4> Trunc(RValue<Float4> x)
4224{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004225 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004226 if(CPUID::SSE4_1)
4227 {
4228 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004229 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004230 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004231 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004232 round->addArg(::context->getConstantInt32(3));
4233 ::basicBlock->appendInst(round);
4234
4235 return RValue<Float4>(V(result));
4236 }
4237 else
4238 {
4239 return Float4(Int4(x));
4240 }
4241}
4242
4243RValue<Float4> Frac(RValue<Float4> x)
4244{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004245 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004246 Float4 frc;
4247
4248 if(CPUID::SSE4_1)
4249 {
4250 frc = x - Floor(x);
4251 }
4252 else
4253 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004254 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004255
Ben Clayton713b8d32019-12-17 20:37:56 +00004256 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004257 }
4258
4259 // x - floor(x) can be 1.0 for very small negative x.
4260 // Clamp against the value just below 1.0.
4261 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4262}
4263
4264RValue<Float4> Floor(RValue<Float4> x)
4265{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004266 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004267 if(CPUID::SSE4_1)
4268 {
4269 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004270 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004271 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004272 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004273 round->addArg(::context->getConstantInt32(1));
4274 ::basicBlock->appendInst(round);
4275
4276 return RValue<Float4>(V(result));
4277 }
4278 else
4279 {
4280 return x - Frac(x);
4281 }
4282}
4283
4284RValue<Float4> Ceil(RValue<Float4> x)
4285{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004286 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004287 if(CPUID::SSE4_1)
4288 {
4289 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004290 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004291 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004292 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004293 round->addArg(::context->getConstantInt32(2));
4294 ::basicBlock->appendInst(round);
4295
4296 return RValue<Float4>(V(result));
4297 }
4298 else
4299 {
4300 return -Floor(-x);
4301 }
4302}
4303
Nicolas Capens519cf222020-05-08 15:27:19 -04004304Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004305{
4306 return T(Ice::IceType_v4f32);
4307}
4308
4309RValue<Long> Ticks()
4310{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004311 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004312 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004313 return Long(Int(0));
4314}
4315
Ben Clayton713b8d32019-12-17 20:37:56 +00004316RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004318 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004319 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004320}
4321
Ben Clayton713b8d32019-12-17 20:37:56 +00004322RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004324 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004325 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004326}
4327
Ben Clayton713b8d32019-12-17 20:37:56 +00004328Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004331 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004332}
4333
4334void Breakpoint()
4335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004336 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004337 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004338 auto trap = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05004339 ::basicBlock->appendInst(trap);
4340}
4341
Ben Clayton713b8d32019-12-17 20:37:56 +00004342void Nucleus::createFence(std::memory_order memoryOrder)
4343{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004344 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004345 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004346 auto inst = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004347 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4348 inst->addArg(order);
4349 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004350}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004351
Ben Clayton713b8d32019-12-17 20:37:56 +00004352Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004354 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense4b77942021-08-03 17:09:41 -04004355 UNIMPLEMENTED("b/155867273 Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004356 return nullptr;
4357}
Nicolas Capense4b77942021-08-03 17:09:41 -04004358
Ben Clayton713b8d32019-12-17 20:37:56 +00004359void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4360{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004361 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense4b77942021-08-03 17:09:41 -04004362 UNIMPLEMENTED("b/155867273 Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004363}
Nicolas Capens157ba262019-12-10 17:49:14 -05004364
Nicolas Capens4ee53092022-02-05 01:53:12 -05004365template<typename T>
4366struct UnderlyingType
4367{
4368 using Type = typename decltype(rr::Extract(std::declval<RValue<T>>(), 0))::rvalue_underlying_type;
4369};
4370
4371template<typename T>
4372using UnderlyingTypeT = typename UnderlyingType<T>::Type;
4373
4374template<typename T, typename EL = UnderlyingTypeT<T>>
4375static void gather(T &out, RValue<Pointer<EL>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes)
4376{
4377 constexpr bool atomic = false;
4378 constexpr std::memory_order order = std::memory_order_relaxed;
4379
4380 Pointer<Byte> baseBytePtr = base;
4381
4382 out = T(0);
4383 for(int i = 0; i < 4; i++)
4384 {
4385 If(Extract(mask, i) != 0)
4386 {
4387 auto offset = Extract(offsets, i);
4388 auto el = Load(Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
4389 out = Insert(out, el, i);
4390 }
4391 Else If(zeroMaskedLanes)
4392 {
4393 out = Insert(out, EL(0), i);
4394 }
4395 }
4396}
4397
4398template<typename T, typename EL = UnderlyingTypeT<T>>
4399static void scatter(RValue<Pointer<EL>> base, RValue<T> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4400{
4401 constexpr bool atomic = false;
4402 constexpr std::memory_order order = std::memory_order_relaxed;
4403
4404 Pointer<Byte> baseBytePtr = base;
4405
4406 for(int i = 0; i < 4; i++)
4407 {
4408 If(Extract(mask, i) != 0)
4409 {
4410 auto offset = Extract(offsets, i);
4411 Store(Extract(val, i), Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
4412 }
4413 }
4414}
4415
Nicolas Capens157ba262019-12-10 17:49:14 -05004416RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004418 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004419 Float4 result{};
4420 gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
4421 return result;
Nicolas Capens157ba262019-12-10 17:49:14 -05004422}
4423
4424RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4425{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004426 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004427 Int4 result{};
4428 gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
4429 return result;
Nicolas Capens157ba262019-12-10 17:49:14 -05004430}
4431
4432void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4433{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004434 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004435 scatter(base, val, offsets, mask, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05004436}
4437
4438void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4439{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004440 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004441 scatter<Int4>(base, val, offsets, mask, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05004442}
4443
4444RValue<Float> Exp2(RValue<Float> x)
4445{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004446 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004447 return Call(exp2f, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004448}
4449
4450RValue<Float> Log2(RValue<Float> x)
4451{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004452 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004453 return Call(log2f, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004454}
4455
4456RValue<Float4> Sin(RValue<Float4> x)
4457{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004458 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004459 return call4(sinf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004460}
4461
4462RValue<Float4> Cos(RValue<Float4> x)
4463{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004464 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004465 return call4(cosf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004466}
4467
4468RValue<Float4> Tan(RValue<Float4> x)
4469{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004470 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004471 return call4(tanf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004472}
4473
Nicolas Capensd04f3f52022-02-05 01:19:14 -05004474RValue<Float4> Asin(RValue<Float4> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05004475{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004476 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004477 return call4(asinf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004478}
4479
Nicolas Capensd04f3f52022-02-05 01:19:14 -05004480RValue<Float4> Acos(RValue<Float4> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05004481{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004482 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004483 return call4(acosf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004484}
4485
4486RValue<Float4> Atan(RValue<Float4> x)
4487{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004488 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004489 return call4(atanf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004490}
4491
4492RValue<Float4> Sinh(RValue<Float4> x)
4493{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004494 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004495 return call4(sinhf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004496}
4497
4498RValue<Float4> Cosh(RValue<Float4> x)
4499{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004500 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004501 return call4(coshf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004502}
4503
4504RValue<Float4> Tanh(RValue<Float4> x)
4505{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004506 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004507 return call4(tanhf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004508}
4509
4510RValue<Float4> Asinh(RValue<Float4> x)
4511{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004512 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004513 return call4(asinhf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004514}
4515
4516RValue<Float4> Acosh(RValue<Float4> x)
4517{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004518 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004519 return call4(acoshf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004520}
4521
4522RValue<Float4> Atanh(RValue<Float4> x)
4523{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004524 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004525 return call4(atanhf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004526}
4527
4528RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4529{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004530 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004531 return call4(atan2f, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004532}
4533
4534RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4535{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004536 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004537 return call4(powf, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004538}
4539
4540RValue<Float4> Exp(RValue<Float4> x)
4541{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004542 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004543 return call4(expf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004544}
4545
4546RValue<Float4> Log(RValue<Float4> x)
4547{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004548 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004549 return call4(logf, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004550}
4551
4552RValue<Float4> Exp2(RValue<Float4> x)
4553{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004554 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004555 return call4(exp2f, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004556}
4557
4558RValue<Float4> Log2(RValue<Float4> x)
4559{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004560 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004561 return call4(log2f, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004562}
4563
4564RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4565{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004566 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004567 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004568 {
Ben Claytonce54c592020-02-07 11:30:51 +00004569 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004570 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004571 }
4572 else
4573 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004574 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004575 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004576 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004577 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004578 ::basicBlock->appendInst(ctlz);
4579
4580 return RValue<UInt>(V(result));
4581 }
4582}
4583
4584RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4585{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004586 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004587 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004588 {
Ben Claytonce54c592020-02-07 11:30:51 +00004589 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004590 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004591 }
4592 else
4593 {
4594 // TODO: implement vectorized version in Subzero
4595 UInt4 result;
4596 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4597 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4598 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4599 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4600 return result;
4601 }
4602}
4603
4604RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4605{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004606 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004607 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004608 {
Ben Claytonce54c592020-02-07 11:30:51 +00004609 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004610 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004611 }
4612 else
4613 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004614 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004615 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004616 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004617 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004618 ::basicBlock->appendInst(ctlz);
4619
4620 return RValue<UInt>(V(result));
4621 }
4622}
4623
4624RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4625{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004626 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004627 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004628 {
Ben Claytonce54c592020-02-07 11:30:51 +00004629 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004630 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004631 }
4632 else
4633 {
4634 // TODO: implement vectorized version in Subzero
4635 UInt4 result;
4636 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4637 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4638 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4639 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4640 return result;
4641 }
4642}
4643
Nicolas Capens4ee53092022-02-05 01:53:12 -05004644// TODO(b/148276653): Both atomicMin and atomicMax use a static (global) mutex that makes all min
4645// operations for a given T mutually exclusive, rather than only the ones on the value pointed to
4646// by ptr. Use a CAS loop, as is done for LLVMReactor's min/max atomic for Android.
4647// TODO(b/148207274): Or, move this down into Subzero as a CAS-based operation.
4648template<typename T>
4649static T atomicMin(T *ptr, T value)
4650{
4651 static std::mutex m;
4652
4653 std::lock_guard<std::mutex> lock(m);
4654 T origValue = *ptr;
4655 *ptr = std::min(origValue, value);
4656 return origValue;
4657}
4658
4659template<typename T>
4660static T atomicMax(T *ptr, T value)
4661{
4662 static std::mutex m;
4663
4664 std::lock_guard<std::mutex> lock(m);
4665 T origValue = *ptr;
4666 *ptr = std::max(origValue, value);
4667 return origValue;
4668}
4669
Antonio Maiorano370cba52019-12-31 11:36:07 -05004670RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4671{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004672 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004673 return Call(atomicMin<int32_t>, x, y);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004674}
4675
4676RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4677{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004678 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004679 return Call(atomicMin<uint32_t>, x, y);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004680}
4681
4682RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4683{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004684 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004685 return Call(atomicMax<int32_t>, x, y);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004686}
4687
4688RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4689{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004690 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004691 return Call(atomicMax<uint32_t>, x, y);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004692}
4693
Antonio Maioranoaae33732020-02-14 14:52:34 -05004694void EmitDebugLocation()
4695{
4696#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004697 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004698#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004699}
Ben Clayton713b8d32019-12-17 20:37:56 +00004700void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004701void FlushDebug() {}
4702
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004703namespace {
4704namespace coro {
4705
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004706// Instance data per generated coroutine
4707// This is the "handle" type used for Coroutine functions
4708// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4709struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004710{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004711 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004712 bool done = false; // the coroutine should stop at the next yield()
4713 bool terminated = false; // the coroutine has finished.
4714 bool inRoutine = false; // is the coroutine currently executing?
4715 marl::Scheduler::Fiber *mainFiber = nullptr;
4716 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004717 void *promisePtr = nullptr;
4718};
4719
4720CoroutineData *createCoroutineData()
4721{
4722 return new CoroutineData{};
4723}
4724
4725void destroyCoroutineData(CoroutineData *coroData)
4726{
4727 delete coroData;
4728}
4729
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004730// suspend() pauses execution of the coroutine, and resumes execution from the
4731// caller's call to await().
4732// Returns true if await() is called again, or false if coroutine_destroy()
4733// is called.
4734bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004735{
Ben Claytonc3466532020-03-24 11:54:05 +00004736 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4737 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4738 ASSERT(coroData->inRoutine);
4739 coroData->inRoutine = false;
4740 coroData->mainFiber->notify();
4741 while(!coroData->inRoutine)
4742 {
4743 coroData->routineFiber->wait();
4744 }
4745 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004746}
4747
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004748// resume() is called by await(), blocking until the coroutine calls yield()
4749// or the coroutine terminates.
4750void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004751{
Ben Claytonc3466532020-03-24 11:54:05 +00004752 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4753 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4754 ASSERT(!coroData->inRoutine);
4755 coroData->inRoutine = true;
4756 coroData->routineFiber->notify();
4757 while(coroData->inRoutine)
4758 {
4759 coroData->mainFiber->wait();
4760 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004761}
4762
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004763// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4764// until the coroutine ends, and deletes the coroutine data.
4765void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004766{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004767 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004768 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4769 ASSERT(!coroData->inRoutine);
4770 if(!coroData->terminated)
4771 {
4772 coroData->done = true;
4773 coroData->inRoutine = true;
4774 coroData->routineFiber->notify();
4775 while(!coroData->terminated)
4776 {
4777 coroData->mainFiber->wait();
4778 }
4779 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004780 if(coroData->useInternalScheduler)
4781 {
4782 ::getOrCreateScheduler().unbind();
4783 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004784 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004785}
4786
4787namespace detail {
4788thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4789} // namespace detail
4790
4791void setHandleParam(Nucleus::CoroutineHandle handle)
4792{
4793 ASSERT(!detail::coroHandle);
4794 detail::coroHandle = handle;
4795}
4796
4797Nucleus::CoroutineHandle getHandleParam()
4798{
4799 ASSERT(detail::coroHandle);
4800 auto handle = detail::coroHandle;
4801 detail::coroHandle = {};
4802 return handle;
4803}
4804
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004805bool isDone(Nucleus::CoroutineHandle handle)
4806{
4807 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004808 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004809}
4810
4811void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4812{
4813 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4814 coroData->promisePtr = promisePtr;
4815}
4816
4817void *getPromisePtr(Nucleus::CoroutineHandle handle)
4818{
4819 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4820 return coroData->promisePtr;
4821}
4822
4823} // namespace coro
4824} // namespace
4825
4826// Used to generate coroutines.
4827// Lifetime: from yield to acquireCoroutine
4828class CoroutineGenerator
4829{
4830public:
4831 CoroutineGenerator()
4832 {
4833 }
4834
4835 // Inserts instructions at the top of the current function to make it a coroutine.
4836 void generateCoroutineBegin()
4837 {
4838 // Begin building the main coroutine_begin() function.
4839 // We insert these instructions at the top of the entry node,
4840 // before existing reactor-generated instructions.
4841
4842 // CoroutineHandle coroutine_begin(<Arguments>)
4843 // {
4844 // this->handle = coro::getHandleParam();
4845 //
4846 // YieldType promise;
4847 // coro::setPromisePtr(handle, &promise); // For await
4848 //
4849 // ... <REACTOR CODE> ...
4850 //
4851
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004852 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004853 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004854
4855 // YieldType promise;
4856 // coro::setPromisePtr(handle, &promise); // For await
4857 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004858 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004859 }
4860
4861 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4862 void generateYield(Value *val)
4863 {
4864 // ... <REACTOR CODE> ...
4865 //
4866 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004867 // if (!coro::suspend(handle)) {
4868 // return false; // coroutine has been stopped by the caller.
4869 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004870 //
4871 // ... <REACTOR CODE> ...
4872
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004873 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004874 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004875
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004876 // if (!coro::suspend(handle)) {
4877 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4878 auto doneBlock = Nucleus::createBasicBlock();
4879 auto resumeBlock = Nucleus::createBasicBlock();
4880 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4881
4882 // return false; // coroutine has been stopped by the caller.
4883 ::basicBlock = doneBlock;
4884 Nucleus::createRetVoid(); // coroutine return value is ignored.
4885
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004886 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004887 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004888 }
4889
4890 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4891
4892 // Generates the await function for the current coroutine.
4893 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4894 static FunctionUniquePtr generateAwaitFunction()
4895 {
4896 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4897 // {
4898 // if (coro::isDone())
4899 // {
4900 // return false;
4901 // }
4902 // else // resume
4903 // {
4904 // YieldType* promise = coro::getPromisePtr(handle);
4905 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004906 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004907 // return true;
4908 // }
4909 // }
4910
4911 // Subzero doesn't support bool types (IceType_i1) as return type
4912 const Ice::Type ReturnType = Ice::IceType_i32;
4913 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4914 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4915
4916 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4917 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4918
4919 Ice::Variable *handle = awaitFunc->getArgs()[0];
4920 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4921
4922 auto doneBlock = awaitFunc->makeNode();
4923 {
4924 // return false;
4925 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4926 doneBlock->appendInst(ret);
4927 }
4928
4929 auto resumeBlock = awaitFunc->makeNode();
4930 {
4931 // YieldType* promise = coro::getPromisePtr(handle);
4932 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4933
4934 // *out = *promise;
4935 // Load promise value
4936 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4937 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4938 resumeBlock->appendInst(load);
4939 // Then store it in output param
4940 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4941 resumeBlock->appendInst(store);
4942
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004943 // coro::resume(handle);
4944 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004945
4946 // return true;
4947 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4948 resumeBlock->appendInst(ret);
4949 }
4950
4951 // if (coro::isDone())
4952 // {
4953 // <doneBlock>
4954 // }
4955 // else // resume
4956 // {
4957 // <resumeBlock>
4958 // }
4959 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004960 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004961 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4962 bb->appendInst(br);
4963
4964 return FunctionUniquePtr{ awaitFunc };
4965 }
4966
4967 // Generates the destroy function for the current coroutine.
4968 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4969 static FunctionUniquePtr generateDestroyFunction()
4970 {
4971 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4972 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004973 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004974 // return;
4975 // }
4976
4977 const Ice::Type ReturnType = Ice::IceType_void;
4978 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4979
4980 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4981 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4982
4983 Ice::Variable *handle = destroyFunc->getArgs()[0];
4984
4985 auto *bb = destroyFunc->getEntryNode();
4986
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004987 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4988 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004989
4990 // return;
4991 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4992 bb->appendInst(ret);
4993
4994 return FunctionUniquePtr{ destroyFunc };
4995 }
4996
4997private:
4998 Ice::Variable *handle{};
4999 Ice::Variable *promise{};
5000};
5001
5002static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
5003{
5004 // This doubles up as our coroutine handle
5005 auto coroData = coro::createCoroutineData();
5006
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05005007 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
5008 if(coroData->useInternalScheduler)
5009 {
5010 ::getOrCreateScheduler().bind();
5011 }
5012
Ben Clayton76e9e532020-03-16 20:35:04 +00005013 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005014 // Store handle in TLS so that the coroutine can grab it right away, before
5015 // any fiber switch occurs.
5016 coro::setHandleParam(coroData);
5017
Ben Claytonc3466532020-03-24 11:54:05 +00005018 ASSERT(!coroData->routineFiber);
5019 coroData->routineFiber = marl::Scheduler::Fiber::current();
5020
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005021 beginFunc();
5022
Ben Claytonc3466532020-03-24 11:54:05 +00005023 ASSERT(coroData->inRoutine);
5024 coroData->done = true; // coroutine is done.
5025 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
5026 coroData->inRoutine = false;
5027 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00005028 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005029
Ben Claytonc3466532020-03-24 11:54:05 +00005030 ASSERT(!coroData->mainFiber);
5031 coroData->mainFiber = marl::Scheduler::Fiber::current();
5032
5033 // block until the first yield or coroutine end
5034 ASSERT(!coroData->inRoutine);
5035 coroData->inRoutine = true;
5036 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
5037 while(coroData->inRoutine)
5038 {
5039 coroData->mainFiber->wait();
5040 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005041
5042 return coroData;
5043}
5044
5045void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
5046{
5047 // Start by creating a regular function
5048 createFunction(yieldType, params);
5049
5050 // Save in case yield() is called
5051 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
5052 ::coroYieldType = yieldType;
5053}
5054
5055void Nucleus::yield(Value *val)
5056{
Antonio Maioranoaae33732020-02-14 14:52:34 -05005057 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005058 Variable::materializeAll();
5059
5060 // On first yield, we start generating coroutine functions
5061 if(!::coroGen)
5062 {
5063 ::coroGen = std::make_shared<CoroutineGenerator>();
5064 ::coroGen->generateCoroutineBegin();
5065 }
5066
5067 ASSERT(::coroGen);
5068 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05005069}
5070
Ben Clayton713b8d32019-12-17 20:37:56 +00005071static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
5072{
5073 return false;
5074}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005075
5076static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
5077{
5078}
Nicolas Capens157ba262019-12-10 17:49:14 -05005079
Sean Risser705231f2021-08-19 18:17:24 -04005080std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit *cfgEdit /* = nullptr */)
Nicolas Capens157ba262019-12-10 17:49:14 -05005081{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005082 if(::coroGen)
5083 {
5084 // Finish generating coroutine functions
5085 {
5086 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04005087 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005088 }
Nicolas Capens157ba262019-12-10 17:49:14 -05005089
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005090 auto awaitFunc = ::coroGen->generateAwaitFunction();
5091 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05005092
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005093 // At this point, we no longer need the CoroutineGenerator.
5094 ::coroGen.reset();
5095 ::coroYieldType = nullptr;
5096
5097 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
5098 { name, "await", "destroy" },
5099 cfgEdit);
5100
5101 return routine;
5102 }
5103 else
5104 {
5105 {
5106 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04005107 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005108 }
5109
5110 ::coroYieldType = nullptr;
5111
5112 // Not an actual coroutine (no yields), so return stubs for await and destroy
5113 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
5114
5115 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
5116 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
5117 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
5118 return routine;
5119 }
Nicolas Capens157ba262019-12-10 17:49:14 -05005120}
5121
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005122Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00005123{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005124 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
5125
5126 if(isCoroutine)
5127 {
5128 return rr::invokeCoroutineBegin(func);
5129 }
5130 else
5131 {
5132 // For regular routines, just invoke the begin func directly
5133 return func();
5134 }
Ben Clayton713b8d32019-12-17 20:37:56 +00005135}
Nicolas Capens157ba262019-12-10 17:49:14 -05005136
5137} // namespace rr