blob: 0a4e9d2cea87af6d209cdd8023d536f9e213865f [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050016#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000017#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050018#include "ReactorDebugInfo.hpp"
Nicolas Capens3b0ad202022-06-02 15:02:31 -040019#include "SIMD.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040020
Nicolas Capens1a3ce872018-10-10 10:42:36 -040021#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000022#include "Optimizer.hpp"
Nicolas Capens79d4c6c2022-04-22 17:20:26 -040023#include "PragmaInternals.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040024
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040026#include "src/IceCfgNode.h"
27#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceELFStreamer.h"
29#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040030#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000031#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040032
Ben Clayton713b8d32019-12-17 20:37:56 +000033#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040034#include "llvm/Support/FileSystem.h"
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -050035#include "llvm/Support/ManagedStatic.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040036#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040037
Antonio Maiorano8bce0672020-02-28 13:13:45 -050038#include "marl/event.h"
39
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000041# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040042#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040043
Nicolas Capensbd65da92017-01-05 16:31:06 -050044#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000045# ifndef WIN32_LEAN_AND_MEAN
46# define WIN32_LEAN_AND_MEAN
47# endif // !WIN32_LEAN_AND_MEAN
48# ifndef NOMINMAX
49# define NOMINMAX
50# endif // !NOMINMAX
51# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050052#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040053
Ben Clayton683bad82020-02-10 23:57:09 +000054#include <array>
Nicolas Capens4ee53092022-02-05 01:53:12 -050055#include <cmath>
Nicolas Capens598f8d82016-09-26 15:09:10 -040056#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000057#include <limits>
58#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040059
Antonio Maiorano02a39532020-01-21 15:15:34 -050060// Subzero utility functions
61// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050062namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050063namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050064
65Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
66{
67 uint32_t sequenceNumber = 0;
Nicolas Capensff010f92021-02-01 12:22:53 -050068 auto *function = Ice::Cfg::create(context, sequenceNumber).release();
69
70 function->setStackSizeLimit(512 * 1024); // 512 KiB
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050071
72 Ice::CfgLocalAllocatorScope allocScope{ function };
73
74 for(auto type : paramTypes)
75 {
76 Ice::Variable *arg = function->makeVariable(type);
77 function->addArg(arg);
78 }
79
80 Ice::CfgNode *node = function->makeNode();
81 function->setEntryNode(node);
82
83 return function;
84}
85
86Ice::Type getPointerType(Ice::Type elementType)
87{
88 if(sizeof(void *) == 8)
89 {
90 return Ice::IceType_i64;
91 }
92 else
93 {
94 return Ice::IceType_i32;
95 }
96}
97
98Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
99{
100 int typeSize = Ice::typeWidthInBytes(type);
101 int totalSize = typeSize * (arraySize ? arraySize : 1);
102
103 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
104 auto address = function->makeVariable(getPointerType(type));
Nicolas Capens0cfc0432021-02-05 15:18:42 -0500105 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize); // SRoA depends on the alignment to match the type size.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500106 function->getEntryNode()->getInsts().push_front(alloca);
107
Nicolas Capensd0703092022-05-26 12:58:20 -0400108 ASSERT(!rr::getPragmaState(rr::InitializeLocalVariables) && "Subzero does not support initializing local variables");
109
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500110 return address;
111}
112
Nicolas Capens3d7faaa2022-10-04 14:48:57 -0400113Ice::Constant *getConstantPointer(Ice::GlobalContext *context, const void *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500114{
115 if(sizeof(void *) == 8)
116 {
117 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
118 }
119 else
120 {
121 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
122 }
123}
124
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400125// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
126Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500127
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400128// Wrapper for calls on C functions with Ice types
129Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
130{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500131 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400132
133 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
134 // then truncate result to bool.
135 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
136 const bool returningBool = (retTy == Ice::IceType_i1);
137 if(returningBool)
138 {
139 ret = function->makeVariable(Ice::IceType_i32);
140 }
141 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500142 {
143 ret = function->makeVariable(retTy);
144 }
145
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400146 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500147 for(auto arg : iceArgs)
148 {
149 call->addArg(arg);
150 }
151
152 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400153
154 if(returningBool)
155 {
156 // Truncate result to bool so that if any (lsb) bits were set, result will be true
157 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
158 }
159
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500160 return ret;
161}
162
Nicolas Capens3d7faaa2022-10-04 14:48:57 -0400163Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, const void *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400164{
165 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
166 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
167}
168
Antonio Maiorano62427e02020-02-13 09:18:05 -0500169// Wrapper for calls on C functions with Ice types
170template<typename Return, typename... CArgs, typename... RArgs>
Nicolas Capens629bf952022-01-18 15:08:14 -0500171Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&...args)
Antonio Maiorano62427e02020-02-13 09:18:05 -0500172{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400173 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
174
Nicolas Capens519cf222020-05-08 15:27:19 -0400175 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500176 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Nicolas Capens3d7faaa2022-10-04 14:48:57 -0400177 return Call(function, basicBlock, retTy, reinterpret_cast<const void *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500178}
179
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400180Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
181{
182 Ice::Variable *to = function->makeVariable(toType);
183 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
184 basicBlock->appendInst(cast);
185 return to;
186}
187
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500188Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500189{
Antonio Maiorano02a39532020-01-21 15:15:34 -0500190 Ice::Variable *result = function->makeVariable(type);
191 auto load = Ice::InstLoad::create(function, result, ptr, align);
192 basicBlock->appendInst(load);
193
194 return result;
195}
196
197} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500198} // namespace
199
Ben Clayton713b8d32019-12-17 20:37:56 +0000200namespace rr {
201class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500202class CoroutineGenerator;
203} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500204
205namespace {
206
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -0500207// Used to automatically invoke llvm_shutdown() when driver is unloaded
208llvm::llvm_shutdown_obj llvmShutdownObj;
209
Nicolas Capens157ba262019-12-10 17:49:14 -0500210Ice::GlobalContext *context = nullptr;
211Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400212Ice::CfgNode *entryBlock = nullptr;
213Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500214Ice::CfgNode *basicBlock = nullptr;
215Ice::CfgLocalAllocatorScope *allocator = nullptr;
216rr::ELFMemoryStreamer *routine = nullptr;
217
218std::mutex codegenMutex;
219
220Ice::ELFFileStreamer *elfFile = nullptr;
221Ice::Fdstream *out = nullptr;
222
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500223// Coroutine globals
224rr::Type *coroYieldType = nullptr;
225std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500226marl::Scheduler &getOrCreateScheduler()
227{
228 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100229 marl::Scheduler::Config cfg;
230 cfg.setWorkerThreadCount(8);
231 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500232 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500233
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500234 return *scheduler;
235}
Nicolas Capens54313fb2021-02-19 14:26:27 -0500236
237rr::Nucleus::OptimizerCallback *optimizerCallback = nullptr;
238
Nicolas Capens157ba262019-12-10 17:49:14 -0500239} // Anonymous namespace
240
241namespace {
242
243#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000244# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500245#endif
246
Ben Clayton713b8d32019-12-17 20:37:56 +0000247#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
248# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500249#endif
250
Nicolas Capens79d4c6c2022-04-22 17:20:26 -0400251Ice::OptLevel toIce(int level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400252{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500253 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100254 {
Nicolas Capens79d4c6c2022-04-22 17:20:26 -0400255 // Note that O0 and O1 are not implemented by Subzero
256 case 0: return Ice::Opt_m1;
257 case 1: return Ice::Opt_m1;
258 case 2: return Ice::Opt_2;
259 case 3: return Ice::Opt_2;
Nicolas Capens112faf42019-12-13 17:32:26 -0500260 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100261 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500262 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400263}
264
Antonio Maiorano370cba52019-12-31 11:36:07 -0500265Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
266{
267 switch(memoryOrder)
268 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500269 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
270 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
271 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
272 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
273 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
274 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
Antonio Maiorano370cba52019-12-31 11:36:07 -0500275 }
276 return Ice::Intrinsics::MemoryOrderInvalid;
277}
278
Nicolas Capens157ba262019-12-10 17:49:14 -0500279class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500280{
Nicolas Capens157ba262019-12-10 17:49:14 -0500281public:
282 const static bool ARM;
283 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400284
Nicolas Capens157ba262019-12-10 17:49:14 -0500285private:
286 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100287 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000288#if defined(__i386__) || defined(__x86_64__)
289# if defined(_WIN32)
290 __cpuid(registers, info);
291# else
292 __asm volatile("cpuid"
293 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
294 : "a"(info));
295# endif
296#else
297 registers[0] = 0;
298 registers[1] = 0;
299 registers[2] = 0;
300 registers[3] = 0;
301#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100302 }
303
Sean Risser46a649d2021-08-30 15:44:33 -0400304 constexpr static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500305 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000306#if defined(__arm__) || defined(__aarch64__)
307 return true;
308#elif defined(__i386__) || defined(__x86_64__)
309 return false;
310#elif defined(__mips__)
311 return false;
312#else
313# error "Unknown architecture"
314#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500315 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500316
Nicolas Capens157ba262019-12-10 17:49:14 -0500317 static bool detectSSE4_1()
318 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000319#if defined(__i386__) || defined(__x86_64__)
320 int registers[4];
321 cpuid(registers, 1);
322 return (registers[2] & 0x00080000) != 0;
323#else
324 return false;
325#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500326 }
327};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500328
Sean Risser46a649d2021-08-30 15:44:33 -0400329constexpr bool CPUID::ARM = CPUID::detectARM();
Nicolas Capens157ba262019-12-10 17:49:14 -0500330const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
Sean Risser46a649d2021-08-30 15:44:33 -0400331constexpr bool emulateIntrinsics = false;
332constexpr bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400333
Nicolas Capens157ba262019-12-10 17:49:14 -0500334constexpr bool subzeroDumpEnabled = false;
335constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500336
337#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500338static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
339static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500340#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500341
342} // anonymous namespace
343
344namespace rr {
345
Nicolas Capens3b0ad202022-06-02 15:02:31 -0400346const int SIMD::Width = 4;
347
Nicolas Capens70505b42022-01-31 22:29:48 -0500348std::string Caps::backendName()
Antonio Maioranoab210f92019-12-13 16:26:24 -0500349{
350 return "Subzero";
351}
352
Nicolas Capens70505b42022-01-31 22:29:48 -0500353bool Caps::coroutinesSupported()
354{
355 return true;
356}
357
358bool Caps::fmaIsFast()
359{
360 // TODO(b/214591655): Subzero currently never emits FMA instructions. std::fma() is called instead.
361 return false;
362}
Nicolas Capens157ba262019-12-10 17:49:14 -0500363
364enum EmulatedType
365{
366 EmulatedShift = 16,
367 EmulatedV2 = 2 << EmulatedShift,
368 EmulatedV4 = 4 << EmulatedShift,
369 EmulatedV8 = 8 << EmulatedShift,
370 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
371
372 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
373 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
374 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000375 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
376 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500377 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
378};
379
Ben Clayton713b8d32019-12-17 20:37:56 +0000380class Value : public Ice::Operand
381{};
382class SwitchCases : public Ice::InstSwitch
383{};
384class BasicBlock : public Ice::CfgNode
385{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500386
387Ice::Type T(Type *t)
388{
389 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
390 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500391}
392
Nicolas Capens157ba262019-12-10 17:49:14 -0500393Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400394{
Ben Clayton713b8d32019-12-17 20:37:56 +0000395 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500396}
397
398Type *T(EmulatedType t)
399{
Ben Clayton713b8d32019-12-17 20:37:56 +0000400 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500401}
402
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500403std::vector<Ice::Type> T(const std::vector<Type *> &types)
404{
405 std::vector<Ice::Type> result;
406 result.reserve(types.size());
407 for(auto &t : types)
408 {
409 result.push_back(T(t));
410 }
411 return result;
412}
413
Nicolas Capens157ba262019-12-10 17:49:14 -0500414Value *V(Ice::Operand *v)
415{
Ben Clayton713b8d32019-12-17 20:37:56 +0000416 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500417}
418
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500419Ice::Operand *V(Value *v)
420{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500421 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500422}
423
Antonio Maiorano62427e02020-02-13 09:18:05 -0500424std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
425{
426 std::vector<Ice::Operand *> result;
427 result.reserve(values.size());
428 for(auto &v : values)
429 {
430 result.push_back(V(v));
431 }
432 return result;
433}
434
Nicolas Capens157ba262019-12-10 17:49:14 -0500435BasicBlock *B(Ice::CfgNode *b)
436{
Ben Clayton713b8d32019-12-17 20:37:56 +0000437 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500438}
439
440static size_t typeSize(Type *type)
441{
442 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400443 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500444 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800445 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500446 case Type_v2i32: return 8;
447 case Type_v4i16: return 8;
448 case Type_v2i16: return 4;
449 case Type_v8i8: return 8;
450 case Type_v4i8: return 4;
451 case Type_v2f32: return 8;
452 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500453 }
454 }
455
456 return Ice::typeWidthInBytes(T(type));
457}
458
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400459static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500460{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400461 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500462 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
463 {
464 Nucleus::createRetVoid();
465 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400466
467 // Connect the entry block to the top of the initial basic block
468 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
469 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500470}
471
Ben Clayton713b8d32019-12-17 20:37:56 +0000472using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
473using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500474
475inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
476{
Ben Clayton713b8d32019-12-17 20:37:56 +0000477 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500478}
479
480inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
481{
482 return &sectionHeader(elfHeader)[index];
483}
484
485static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
486{
487 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
488
489 uint32_t index = relocation.getSymbol();
490 int table = relocationTable.sh_link;
491 void *symbolValue = nullptr;
492
493 if(index != SHN_UNDEF)
494 {
495 if(table == SHN_UNDEF) return nullptr;
496 const SectionHeader *symbolTable = elfSection(elfHeader, table);
497
498 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
499 if(index >= symtab_entries)
500 {
501 ASSERT(index < symtab_entries && "Symbol Index out of range");
502 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800503 }
504
Nicolas Capens157ba262019-12-10 17:49:14 -0500505 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000506 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500507 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800508
Nicolas Capens157ba262019-12-10 17:49:14 -0500509 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400510 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500511 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000512 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400513 }
514 else
515 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500516 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400517 }
Nicolas Capens66478362016-10-13 15:36:36 -0400518 }
519
Nicolas Capens157ba262019-12-10 17:49:14 -0500520 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000521 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500522
523 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400524 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400525 switch(relocation.getType())
526 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500527 case R_ARM_NONE:
528 // No relocation
529 break;
530 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500531 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000532 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500533 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
534 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
535 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400536 break;
Nicolas Capens112faf42019-12-13 17:32:26 -0500537 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500538 {
539 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
540 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
541 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400542 break;
Nicolas Capens112faf42019-12-13 17:32:26 -0500543 default:
544 ASSERT(false && "Unsupported relocation type");
545 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400546 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500547 }
548 else
549 {
550 switch(relocation.getType())
551 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500552 case R_386_NONE:
553 // No relocation
554 break;
555 case R_386_32:
556 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
557 break;
558 case R_386_PC32:
559 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
560 break;
561 default:
562 ASSERT(false && "Unsupported relocation type");
563 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500564 }
Nicolas Capens66478362016-10-13 15:36:36 -0400565 }
566
Nicolas Capens157ba262019-12-10 17:49:14 -0500567 return symbolValue;
568}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400569
Nicolas Capens157ba262019-12-10 17:49:14 -0500570static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
571{
572 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
573
574 uint32_t index = relocation.getSymbol();
575 int table = relocationTable.sh_link;
576 void *symbolValue = nullptr;
577
578 if(index != SHN_UNDEF)
579 {
580 if(table == SHN_UNDEF) return nullptr;
581 const SectionHeader *symbolTable = elfSection(elfHeader, table);
582
583 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
584 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400585 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500586 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400587 return nullptr;
588 }
589
Nicolas Capens157ba262019-12-10 17:49:14 -0500590 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000591 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500592 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400593
Nicolas Capens157ba262019-12-10 17:49:14 -0500594 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400595 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500596 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000597 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500598 }
599 else
600 {
601 return nullptr;
602 }
603 }
Nicolas Capens66478362016-10-13 15:36:36 -0400604
Nicolas Capens157ba262019-12-10 17:49:14 -0500605 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000606 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
607 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400608
Nicolas Capens157ba262019-12-10 17:49:14 -0500609 switch(relocation.getType())
610 {
Nicolas Capens112faf42019-12-13 17:32:26 -0500611 case R_X86_64_NONE:
612 // No relocation
613 break;
614 case R_X86_64_64:
615 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
616 break;
617 case R_X86_64_PC32:
618 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
619 break;
620 case R_X86_64_32S:
621 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
622 break;
623 default:
624 ASSERT(false && "Unsupported relocation type");
625 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500626 }
627
628 return symbolValue;
629}
630
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400631struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500632{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400633 const void *entry;
634 size_t codeSize = 0;
635};
636
637std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
638{
639 ASSERT(functionNames.size() > 0);
640 std::vector<EntryPoint> entryPoints(functionNames.size());
641
Ben Clayton713b8d32019-12-17 20:37:56 +0000642 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500643
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400644 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500645 if(!elfHeader->checkMagic())
646 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400647 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500648 }
649
650 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000651 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
652#if defined(__i386__)
653 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
654#elif defined(__x86_64__)
655 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
656#elif defined(__arm__)
657 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
658#elif defined(__aarch64__)
659 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
660#elif defined(__mips__)
661 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
662#else
663# error "Unsupported platform"
664#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500665
Ben Clayton713b8d32019-12-17 20:37:56 +0000666 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500667
668 for(int i = 0; i < elfHeader->e_shnum; i++)
669 {
670 if(sectionHeader[i].sh_type == SHT_PROGBITS)
671 {
672 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
673 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400674 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500675 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400676 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500677
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400678 for(size_t j = 0; j < functionNames.size(); ++j)
679 {
680 if(strstr(sectionName, functionNames[j]) != nullptr)
681 {
682 return j;
683 }
684 }
685
686 UNREACHABLE("Failed to find executable section that matches input function names");
687 return static_cast<size_t>(-1);
688 };
689
690 size_t index = findSectionNameEntryIndex();
691 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
692 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400693 }
694 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500695 else if(sectionHeader[i].sh_type == SHT_REL)
696 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000697 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400698
Nicolas Capens157ba262019-12-10 17:49:14 -0500699 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
700 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000701 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500702 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
703 }
704 }
705 else if(sectionHeader[i].sh_type == SHT_RELA)
706 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000707 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500708
709 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
710 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000711 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500712 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
713 }
714 }
715 }
716
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400717 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500718}
719
720template<typename T>
721struct ExecutableAllocator
722{
723 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000724 template<class U>
725 ExecutableAllocator(const ExecutableAllocator<U> &other)
726 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500727
728 using value_type = T;
729 using size_type = std::size_t;
730
731 T *allocate(size_type n)
732 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000733 return (T *)allocateMemoryPages(
734 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500735 }
736
737 void deallocate(T *p, size_type n)
738 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800739 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500740 }
741};
742
743class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
744{
745 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
746 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
747
748public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000749 ELFMemoryStreamer()
750 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500751 {
752 position = 0;
753 buffer.reserve(0x1000);
754 }
755
756 ~ELFMemoryStreamer() override
757 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500758 }
759
760 void write8(uint8_t Value) override
761 {
762 if(position == (uint64_t)buffer.size())
763 {
764 buffer.push_back(Value);
765 position++;
766 }
767 else if(position < (uint64_t)buffer.size())
768 {
769 buffer[position] = Value;
770 position++;
771 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000772 else
773 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500774 }
775
776 void writeBytes(llvm::StringRef Bytes) override
777 {
778 std::size_t oldSize = buffer.size();
779 buffer.resize(oldSize + Bytes.size());
780 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
781 position += Bytes.size();
782 }
783
Jason Macnak0587e072022-02-11 16:49:02 -0800784 uint64_t tell() const override
785 {
786 return position;
787 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500788
Jason Macnak0587e072022-02-11 16:49:02 -0800789 void seek(uint64_t Off) override
790 {
791 position = Off;
792 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500793
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400794 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500795 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400796 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500797
798#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500799 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
800#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400801 for(auto &entryPoint : entryPoints)
802 {
803 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
804 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500805#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500806
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400807 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400808 }
809
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500810 void finalize()
811 {
812 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
813
814 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
815 }
816
Ben Clayton713b8d32019-12-17 20:37:56 +0000817 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400818 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500819 ASSERT(func);
820 funcs[index] = func;
821 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400822
Nicolas Capens157ba262019-12-10 17:49:14 -0500823 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400824 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500825 ASSERT(funcs[index]);
826 return funcs[index];
827 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400828
Antonio Maiorano02a39532020-01-21 15:15:34 -0500829 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500830 {
Nicolas Capens4e75f452021-01-28 01:52:56 -0500831 // Check if we already have a suitable constant.
832 for(const auto &c : constantsPool)
833 {
834 void *ptr = c.data.get();
835 size_t space = c.space;
836
837 void *alignedPtr = std::align(alignment, size, ptr, space);
838
839 if(space < size)
840 {
841 continue;
842 }
843
844 if(memcmp(data, alignedPtr, size) == 0)
845 {
846 return alignedPtr;
847 }
848 }
849
Antonio Maiorano02a39532020-01-21 15:15:34 -0500850 // TODO(b/148086935): Replace with a buffer allocator.
851 size_t space = size + alignment;
852 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
853 void *ptr = buf.get();
854 void *alignedPtr = std::align(alignment, size, ptr, space);
855 ASSERT(alignedPtr);
856 memcpy(alignedPtr, data, size);
Nicolas Capens4e75f452021-01-28 01:52:56 -0500857 constantsPool.emplace_back(std::move(buf), space);
858
Antonio Maiorano02a39532020-01-21 15:15:34 -0500859 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500860 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400861
Nicolas Capens157ba262019-12-10 17:49:14 -0500862private:
Nicolas Capens4e75f452021-01-28 01:52:56 -0500863 struct Constant
864 {
865 Constant(std::unique_ptr<uint8_t[]> data, size_t space)
866 : data(std::move(data))
867 , space(space)
868 {}
869
870 std::unique_ptr<uint8_t[]> data;
871 size_t space;
872 };
873
Ben Clayton713b8d32019-12-17 20:37:56 +0000874 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500875 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
876 std::size_t position;
Nicolas Capens4e75f452021-01-28 01:52:56 -0500877 std::vector<Constant> constantsPool;
Nicolas Capens157ba262019-12-10 17:49:14 -0500878};
879
Antonio Maiorano62427e02020-02-13 09:18:05 -0500880#ifdef ENABLE_RR_PRINT
881void VPrintf(const std::vector<Value *> &vals)
882{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400883 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500884}
885#endif // ENABLE_RR_PRINT
886
Nicolas Capens157ba262019-12-10 17:49:14 -0500887Nucleus::Nucleus()
888{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400889 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500890
891 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
892 Ice::ClFlags::getParsedClFlags(Flags);
893
Ben Clayton713b8d32019-12-17 20:37:56 +0000894#if defined(__arm__)
895 Flags.setTargetArch(Ice::Target_ARM32);
896 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
897#elif defined(__mips__)
898 Flags.setTargetArch(Ice::Target_MIPS32);
899 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
900#else // x86
901 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
902 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
903#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500904 Flags.setOutFileType(Ice::FT_Elf);
Nicolas Capens79d4c6c2022-04-22 17:20:26 -0400905 Flags.setOptLevel(toIce(rr::getPragmaState(rr::OptimizationLevel)));
Nicolas Capens157ba262019-12-10 17:49:14 -0500906 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
907 Flags.setDisableHybridAssembly(true);
908
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500909 // Emit functions into separate sections in the ELF so we can find them by name
910 Flags.setFunctionSections(true);
911
Nicolas Capens157ba262019-12-10 17:49:14 -0500912 static llvm::raw_os_ostream cout(std::cout);
913 static llvm::raw_os_ostream cerr(std::cerr);
914
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500915 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500916 {
917 // Decorate text asm with liveness info
918 Flags.setDecorateAsm(true);
919 }
920
Ben Clayton713b8d32019-12-17 20:37:56 +0000921 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500922 {
923 std::error_code errorCode;
924 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
925 ::elfFile = new Ice::ELFFileStreamer(*out);
926 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
927 }
928 else
929 {
930 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
931 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
932 ::routine = elfMemory;
933 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400934
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400935#if !__has_feature(memory_sanitizer)
936 // thread_local variables in shared libraries are initialized at load-time,
937 // but this is not observed by MemorySanitizer if the loader itself was not
Nicolas Capensaf907702021-05-14 11:10:49 -0400938 // instrumented, leading to false-positive uninitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400939 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400940#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500941 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500942}
943
944Nucleus::~Nucleus()
945{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400946 delete Variable::unmaterializedVariables;
947 Variable::unmaterializedVariables = nullptr;
948
Nicolas Capens157ba262019-12-10 17:49:14 -0500949 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500950 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500951
952 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500953 ::allocator = nullptr;
954
Nicolas Capens157ba262019-12-10 17:49:14 -0500955 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500956 ::function = nullptr;
957
Nicolas Capens157ba262019-12-10 17:49:14 -0500958 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500959 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500960
961 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500962 ::elfFile = nullptr;
963
Nicolas Capens157ba262019-12-10 17:49:14 -0500964 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500965 ::out = nullptr;
966
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400967 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500968 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400969 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500970
971 ::codegenMutex.unlock();
972}
973
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500974// This function lowers and produces executable binary code in memory for the input functions,
975// and returns a Routine with the entry points to these functions.
976template<size_t Count>
Nicolas Capens79d4c6c2022-04-22 17:20:26 -0400977static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count])
Nicolas Capens157ba262019-12-10 17:49:14 -0500978{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500979 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
980 // and GlobalContext::emitItems.
981
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500982 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500983 {
984 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500985 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500986 }
987
988 ::context->emitFileHeader();
989
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500990 // Translate
991
992 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500993 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500994 Ice::Cfg *currFunc = functions[i];
995
996 // Install function allocator in TLS for Cfg-specific container allocators
997 Ice::CfgLocalAllocatorScope allocScope(currFunc);
998
999 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
1000
Nicolas Capens54313fb2021-02-19 14:26:27 -05001001 if(::optimizerCallback)
1002 {
1003 Nucleus::OptimizerReport report;
1004 rr::optimize(currFunc, &report);
1005 ::optimizerCallback(&report);
1006 ::optimizerCallback = nullptr;
1007 }
1008 else
1009 {
1010 rr::optimize(currFunc);
1011 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001012
1013 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001014 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001015
1016 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001017 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001018
1019 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1020
1021 if(subzeroEmitTextAsm)
1022 {
1023 currFunc->emit();
1024 }
1025
1026 currFunc->emitIAS();
Nicolas Capensff010f92021-02-01 12:22:53 -05001027
1028 if(currFunc->hasError())
1029 {
1030 return nullptr;
1031 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001032 }
1033
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001034 // Emit items
1035
1036 ::context->lowerGlobals("");
1037
Nicolas Capens157ba262019-12-10 17:49:14 -05001038 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001039
1040 for(size_t i = 0; i < Count; ++i)
1041 {
1042 Ice::Cfg *currFunc = functions[i];
1043
1044 // Accumulate globals from functions to emit into the "last" section at the end
1045 auto globals = currFunc->getGlobalInits();
1046 if(globals && !globals->empty())
1047 {
1048 ::context->getGlobals()->merge(globals.get());
1049 }
1050
1051 auto assembler = currFunc->releaseAssembler();
1052 assembler->alignFunction();
1053 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1054 }
1055
Nicolas Capens157ba262019-12-10 17:49:14 -05001056 ::context->lowerGlobals("last");
1057 ::context->lowerConstants();
1058 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001059
Nicolas Capens157ba262019-12-10 17:49:14 -05001060 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001061 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001062 objectWriter->writeNonUserSections();
1063
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001064 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001065 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1066 ASSERT(entryPoints.size() == Count);
1067 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001068 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001069 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001070 }
1071
1072 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001073
1074 Routine *handoffRoutine = ::routine;
1075 ::routine = nullptr;
1076
1077 return std::shared_ptr<Routine>(handoffRoutine);
1078}
1079
Nicolas Capens79d4c6c2022-04-22 17:20:26 -04001080std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001081{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001082 finalizeFunction();
Nicolas Capens79d4c6c2022-04-22 17:20:26 -04001083 return rr::acquireRoutine({ ::function }, { name });
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001084}
1085
Nicolas Capens157ba262019-12-10 17:49:14 -05001086Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1087{
1088 Ice::Type type = T(t);
1089 int typeSize = Ice::typeWidthInBytes(type);
1090 int totalSize = typeSize * (arraySize ? arraySize : 1);
1091
1092 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1093 auto address = ::function->makeVariable(T(getPointerType(t)));
Nicolas Capens0cfc0432021-02-05 15:18:42 -05001094 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize); // SRoA depends on the alignment to match the type size.
Nicolas Capens157ba262019-12-10 17:49:14 -05001095 ::function->getEntryNode()->getInsts().push_front(alloca);
1096
1097 return V(address);
1098}
1099
1100BasicBlock *Nucleus::createBasicBlock()
1101{
1102 return B(::function->makeNode());
1103}
1104
1105BasicBlock *Nucleus::getInsertBlock()
1106{
1107 return B(::basicBlock);
1108}
1109
1110void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1111{
Nicolas Capens7c296ec2021-02-18 14:10:26 -05001112 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001113
1114 ::basicBlock = basicBlock;
1115}
1116
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001117void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001118{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001119 ASSERT(::function == nullptr);
1120 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001121 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001122 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001123 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001124
1125 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1126
1127 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1128 // becomes invalid if another one is created; for example, when creating await and destroy functions
1129 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1130 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001131 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1132
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001133 ::entryBlock = ::function->getEntryNode();
1134 ::basicBlock = ::function->makeNode();
1135 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001136}
1137
1138Value *Nucleus::getArgument(unsigned int index)
1139{
1140 return V(::function->getArgs()[index]);
1141}
1142
1143void Nucleus::createRetVoid()
1144{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001145 RR_DEBUG_INFO_UPDATE_LOC();
1146
Nicolas Capens157ba262019-12-10 17:49:14 -05001147 // Code generated after this point is unreachable, so any variables
1148 // being read can safely return an undefined value. We have to avoid
1149 // materializing variables after the terminator ret instruction.
1150 Variable::killUnmaterialized();
1151
1152 Ice::InstRet *ret = Ice::InstRet::create(::function);
1153 ::basicBlock->appendInst(ret);
1154}
1155
1156void Nucleus::createRet(Value *v)
1157{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001158 RR_DEBUG_INFO_UPDATE_LOC();
1159
Nicolas Capens157ba262019-12-10 17:49:14 -05001160 // Code generated after this point is unreachable, so any variables
1161 // being read can safely return an undefined value. We have to avoid
1162 // materializing variables after the terminator ret instruction.
1163 Variable::killUnmaterialized();
1164
1165 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1166 ::basicBlock->appendInst(ret);
1167}
1168
1169void Nucleus::createBr(BasicBlock *dest)
1170{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001171 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001172 Variable::materializeAll();
1173
1174 auto br = Ice::InstBr::create(::function, dest);
1175 ::basicBlock->appendInst(br);
1176}
1177
1178void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1179{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001180 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001181 Variable::materializeAll();
1182
1183 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1184 ::basicBlock->appendInst(br);
1185}
1186
1187static bool isCommutative(Ice::InstArithmetic::OpKind op)
1188{
1189 switch(op)
1190 {
Nicolas Capens112faf42019-12-13 17:32:26 -05001191 case Ice::InstArithmetic::Add:
1192 case Ice::InstArithmetic::Fadd:
1193 case Ice::InstArithmetic::Mul:
1194 case Ice::InstArithmetic::Fmul:
1195 case Ice::InstArithmetic::And:
1196 case Ice::InstArithmetic::Or:
1197 case Ice::InstArithmetic::Xor:
1198 return true;
1199 default:
1200 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001201 }
1202}
1203
1204static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1205{
1206 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1207
1208 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1209
1210 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1211 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1212 ::basicBlock->appendInst(arithmetic);
1213
1214 return V(result);
1215}
1216
1217Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1218{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001219 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001220 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1221}
1222
1223Value *Nucleus::createSub(Value *lhs, Value *rhs)
1224{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001225 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001226 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1227}
1228
1229Value *Nucleus::createMul(Value *lhs, Value *rhs)
1230{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001231 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001232 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1233}
1234
1235Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1236{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001237 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001238 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1239}
1240
1241Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1242{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001243 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001244 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1245}
1246
1247Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1248{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001249 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001250 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1251}
1252
1253Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1254{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001255 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001256 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1257}
1258
1259Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1260{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001261 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001262 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1263}
1264
1265Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1266{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001267 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001268 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1269}
1270
1271Value *Nucleus::createURem(Value *lhs, Value *rhs)
1272{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001273 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001274 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1275}
1276
1277Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1278{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001279 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001280 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1281}
1282
1283Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001285 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001286 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1287 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001288 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001289 return nullptr;
1290}
1291
Nicolas Capens157ba262019-12-10 17:49:14 -05001292Value *Nucleus::createShl(Value *lhs, Value *rhs)
1293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001295 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1296}
1297
1298Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001301 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1302}
1303
1304Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001307 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1308}
1309
1310Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001313 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1314}
1315
1316Value *Nucleus::createOr(Value *lhs, Value *rhs)
1317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001319 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1320}
1321
1322Value *Nucleus::createXor(Value *lhs, Value *rhs)
1323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001325 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1326}
1327
1328Value *Nucleus::createNeg(Value *v)
1329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001331 return createSub(createNullValue(T(v->getType())), v);
1332}
1333
1334Value *Nucleus::createFNeg(Value *v)
1335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4e7d3102022-06-21 01:42:18 -04001337 std::vector<double> c = { -0.0 };
Ben Clayton713b8d32019-12-17 20:37:56 +00001338 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001339
1340 return createFSub(negativeZero, v);
1341}
1342
1343Value *Nucleus::createNot(Value *v)
1344{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001345 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001346 if(Ice::isScalarIntegerType(v->getType()))
1347 {
1348 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1349 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001350 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001351 {
Nicolas Capens4e7d3102022-06-21 01:42:18 -04001352 std::vector<int64_t> c = { -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001353 return createXor(v, createConstantVector(c, T(v->getType())));
1354 }
1355}
1356
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001357static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1358{
1359#if defined(__i386__) || defined(__x86_64__)
1360 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1361 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1362 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1363#else
1364 if(atomic)
1365 {
1366 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1367 }
1368 if(memoryOrder != std::memory_order_relaxed)
1369 {
1370 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1371 }
1372#endif
1373
1374 // Vulkan doesn't allow sequential memory order
1375 ASSERT(memoryOrder != std::memory_order_seq_cst);
1376}
1377
Nicolas Capens157ba262019-12-10 17:49:14 -05001378Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1379{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001380 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001381 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001382
1383 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001384 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001385
Ben Clayton713b8d32019-12-17 20:37:56 +00001386 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001387 {
1388 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001389 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001390 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001391 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001392 auto pointer = RValue<Pointer<Byte>>(ptr);
1393 Int x = *Pointer<Int>(pointer);
1394
1395 Int4 vector;
1396 vector = Insert(vector, x, 0);
1397
Antonio Maiorano02a39532020-01-21 15:15:34 -05001398 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001399 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1400 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001401 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001402 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001403 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001404 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001405 auto pointer = RValue<Pointer<Byte>>(ptr);
1406 Int x = *Pointer<Int>(pointer);
1407 Int y = *Pointer<Int>(pointer + 4);
1408
1409 Int4 vector;
1410 vector = Insert(vector, x, 0);
1411 vector = Insert(vector, y, 1);
1412
Antonio Maiorano02a39532020-01-21 15:15:34 -05001413 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001414 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1415 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001416 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001417 else
1418 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001419 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001420 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001421 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001422 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Antonio Maiorano02a39532020-01-21 15:15:34 -05001423 result = ::function->makeVariable(T(type));
Nicolas Capens33a77f72021-02-08 15:04:38 -05001424 auto load = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001425 load->addArg(ptr);
1426 load->addArg(::context->getConstantInt32(typeSize(type)));
1427 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001428 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001429 }
1430 else
1431 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001432 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001433 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001434
Antonio Maiorano02a39532020-01-21 15:15:34 -05001435 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001436 return V(result);
1437}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001438
Nicolas Capens157ba262019-12-10 17:49:14 -05001439Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1440{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001441 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001442 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001443
Ben Clayton713b8d32019-12-17 20:37:56 +00001444#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001445 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001446 if(align != 0)
1447 {
1448 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1449 call->addArg(ptr);
1450 call->addArg(::context->getConstantInt64(typeSize(type)));
1451 ::basicBlock->appendInst(call);
1452 }
1453#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001454
Nicolas Capens157ba262019-12-10 17:49:14 -05001455 int valueType = (int)reinterpret_cast<intptr_t>(type);
1456
Ben Clayton713b8d32019-12-17 20:37:56 +00001457 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001458 {
1459 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001460 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001461 if(typeSize(type) == 4)
1462 {
1463 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1464 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1465 ::basicBlock->appendInst(bitcast);
1466
1467 RValue<Int4> v(V(vector));
1468
1469 auto pointer = RValue<Pointer<Byte>>(ptr);
1470 Int x = Extract(v, 0);
1471 *Pointer<Int>(pointer) = x;
1472 }
1473 else if(typeSize(type) == 8)
1474 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001475 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001476 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1477 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1478 ::basicBlock->appendInst(bitcast);
1479
1480 RValue<Int4> v(V(vector));
1481
1482 auto pointer = RValue<Pointer<Byte>>(ptr);
1483 Int x = Extract(v, 0);
1484 *Pointer<Int>(pointer) = x;
1485 Int y = Extract(v, 1);
1486 *Pointer<Int>(pointer + 4) = y;
1487 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001488 else
1489 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001490 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001491 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001492 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001493 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001494 auto store = Ice::InstIntrinsic::create(::function, 3, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001495 store->addArg(value);
1496 store->addArg(ptr);
1497 store->addArg(::context->getConstantInt32(typeSize(type)));
1498 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001499 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001500 }
1501 else
1502 {
1503 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001504
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001505 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001506 ::basicBlock->appendInst(store);
1507 }
1508
1509 return value;
1510}
1511
1512Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1513{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001514 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001515 ASSERT(index->getType() == Ice::IceType_i32);
1516
1517 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1518 {
1519 int32_t offset = constant->getValue() * (int)typeSize(type);
1520
1521 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001522 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001523 return ptr;
1524 }
1525
Nicolas Capens157ba262019-12-10 17:49:14 -05001526 return createAdd(ptr, createConstantInt(offset));
1527 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001528
Nicolas Capens157ba262019-12-10 17:49:14 -05001529 if(!Ice::isByteSizedType(T(type)))
1530 {
1531 index = createMul(index, createConstantInt((int)typeSize(type)));
1532 }
1533
Ben Clayton713b8d32019-12-17 20:37:56 +00001534 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001535 {
1536 if(unsignedIndex)
1537 {
1538 index = createZExt(index, T(Ice::IceType_i64));
1539 }
1540 else
1541 {
1542 index = createSExt(index, T(Ice::IceType_i64));
1543 }
1544 }
1545
1546 return createAdd(ptr, index);
1547}
1548
Antonio Maiorano370cba52019-12-31 11:36:07 -05001549static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1550{
1551 Ice::Variable *result = ::function->makeVariable(value->getType());
1552
1553 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001554 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001555 auto op = ::context->getConstantInt32(rmwOp);
1556 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1557 inst->addArg(op);
1558 inst->addArg(ptr);
1559 inst->addArg(value);
1560 inst->addArg(order);
1561 ::basicBlock->appendInst(inst);
1562
1563 return V(result);
1564}
1565
Nicolas Capens157ba262019-12-10 17:49:14 -05001566Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1567{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001568 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001569 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001570}
1571
1572Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1573{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001574 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001575 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001576}
1577
1578Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1579{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001580 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001581 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001582}
1583
1584Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1585{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001586 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001587 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001588}
1589
1590Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1591{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001592 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001593 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001594}
1595
1596Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1597{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001598 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001599 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001600}
1601
1602Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1603{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001604 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001605 Ice::Variable *result = ::function->makeVariable(value->getType());
1606
1607 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001608 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001609 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1610 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1611 inst->addArg(ptr);
1612 inst->addArg(compare);
1613 inst->addArg(value);
1614 inst->addArg(orderEq);
1615 inst->addArg(orderNeq);
1616 ::basicBlock->appendInst(inst);
1617
1618 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001619}
1620
1621static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1622{
1623 if(v->getType() == T(destType))
1624 {
1625 return v;
1626 }
1627
1628 Ice::Variable *result = ::function->makeVariable(T(destType));
1629 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1630 ::basicBlock->appendInst(cast);
1631
1632 return V(result);
1633}
1634
1635Value *Nucleus::createTrunc(Value *v, Type *destType)
1636{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001637 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001638 return createCast(Ice::InstCast::Trunc, v, destType);
1639}
1640
1641Value *Nucleus::createZExt(Value *v, Type *destType)
1642{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001643 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001644 return createCast(Ice::InstCast::Zext, v, destType);
1645}
1646
1647Value *Nucleus::createSExt(Value *v, Type *destType)
1648{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001649 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001650 return createCast(Ice::InstCast::Sext, v, destType);
1651}
1652
1653Value *Nucleus::createFPToUI(Value *v, Type *destType)
1654{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001655 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001656 return createCast(Ice::InstCast::Fptoui, v, destType);
1657}
1658
1659Value *Nucleus::createFPToSI(Value *v, Type *destType)
1660{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001661 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001662 return createCast(Ice::InstCast::Fptosi, v, destType);
1663}
1664
1665Value *Nucleus::createSIToFP(Value *v, Type *destType)
1666{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001667 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001668 return createCast(Ice::InstCast::Sitofp, v, destType);
1669}
1670
1671Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1672{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001673 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001674 return createCast(Ice::InstCast::Fptrunc, v, destType);
1675}
1676
1677Value *Nucleus::createFPExt(Value *v, Type *destType)
1678{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001679 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001680 return createCast(Ice::InstCast::Fpext, v, destType);
1681}
1682
1683Value *Nucleus::createBitCast(Value *v, Type *destType)
1684{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001685 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001686 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1687 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1688 // emulate them by writing to the stack and reading back as the destination type.
Alexis Hetud6dd61c2022-06-16 16:51:23 -04001689 if(emulateMismatchedBitCast || (v->getType() == Ice::Type::IceType_i64))
Nicolas Capens157ba262019-12-10 17:49:14 -05001690 {
1691 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1692 {
1693 Value *address = allocateStackVariable(destType);
1694 createStore(v, address, T(v->getType()));
1695 return createLoad(address, destType);
1696 }
1697 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1698 {
1699 Value *address = allocateStackVariable(T(v->getType()));
1700 createStore(v, address, T(v->getType()));
1701 return createLoad(address, destType);
1702 }
1703 }
1704
1705 return createCast(Ice::InstCast::Bitcast, v, destType);
1706}
1707
1708static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1709{
1710 ASSERT(lhs->getType() == rhs->getType());
1711
1712 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1713 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1714 ::basicBlock->appendInst(cmp);
1715
1716 return V(result);
1717}
1718
Nicolas Capens157ba262019-12-10 17:49:14 -05001719Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1720{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001721 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001722 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1723}
1724
1725Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1726{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001727 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001728 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1729}
1730
1731Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1732{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001733 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001734 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1735}
1736
1737Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1738{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001739 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001740 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1741}
1742
1743Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1744{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001745 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001746 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1747}
1748
1749Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1750{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001751 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001752 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1753}
1754
1755Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1756{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001757 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001758 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1759}
1760
1761Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1762{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001763 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001764 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1765}
1766
1767Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1768{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001769 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001770 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1771}
1772
1773Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1774{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001775 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001776 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1777}
1778
1779static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1780{
1781 ASSERT(lhs->getType() == rhs->getType());
1782 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1783
1784 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1785 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1786 ::basicBlock->appendInst(cmp);
1787
1788 return V(result);
1789}
1790
1791Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1792{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001793 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001794 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1795}
1796
1797Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1798{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001799 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001800 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1801}
1802
1803Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1804{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001805 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001806 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1807}
1808
1809Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1810{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001811 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001812 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1813}
1814
1815Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1816{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001817 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001818 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1819}
1820
1821Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1822{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001823 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001824 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1825}
1826
1827Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1828{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001829 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001830 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1831}
1832
1833Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1834{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001835 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001836 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1837}
1838
1839Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1840{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001841 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001842 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1843}
1844
1845Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1846{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001847 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001848 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1849}
1850
1851Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001854 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1855}
1856
1857Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1858{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001859 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001860 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1861}
1862
1863Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001866 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1867}
1868
1869Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1870{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001871 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001872 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1873}
1874
1875Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1876{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001877 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001878 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001879 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001880 ::basicBlock->appendInst(extract);
1881
1882 return V(result);
1883}
1884
1885Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1886{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001887 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001888 auto result = ::function->makeVariable(vector->getType());
1889 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1890 ::basicBlock->appendInst(insert);
1891
1892 return V(result);
1893}
1894
Nicolas Capens4e7d3102022-06-21 01:42:18 -04001895Value *Nucleus::createShuffleVector(Value *V1, Value *V2, std::vector<int> select)
Nicolas Capens157ba262019-12-10 17:49:14 -05001896{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001897 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001898 ASSERT(V1->getType() == V2->getType());
1899
Nicolas Capens4e7d3102022-06-21 01:42:18 -04001900 size_t size = Ice::typeNumElements(V1->getType());
Nicolas Capens157ba262019-12-10 17:49:14 -05001901 auto result = ::function->makeVariable(V1->getType());
1902 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1903
Nicolas Capens4e7d3102022-06-21 01:42:18 -04001904 const size_t selectSize = select.size();
1905 for(size_t i = 0; i < size; i++)
Nicolas Capens157ba262019-12-10 17:49:14 -05001906 {
Nicolas Capens4e7d3102022-06-21 01:42:18 -04001907 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i % selectSize])));
Nicolas Capens157ba262019-12-10 17:49:14 -05001908 }
1909
1910 ::basicBlock->appendInst(shuffle);
1911
1912 return V(result);
1913}
1914
1915Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1916{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001917 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001918 ASSERT(ifTrue->getType() == ifFalse->getType());
1919
1920 auto result = ::function->makeVariable(ifTrue->getType());
1921 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1922 ::basicBlock->appendInst(select);
1923
1924 return V(result);
1925}
1926
1927SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1928{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001929 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001930 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1931 ::basicBlock->appendInst(switchInst);
1932
Ben Clayton713b8d32019-12-17 20:37:56 +00001933 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001934}
1935
1936void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1937{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001938 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001939 switchCases->addBranch(label, label, branch);
1940}
1941
1942void Nucleus::createUnreachable()
1943{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001944 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001945 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1946 ::basicBlock->appendInst(unreachable);
1947}
1948
Antonio Maiorano62427e02020-02-13 09:18:05 -05001949Type *Nucleus::getType(Value *value)
1950{
1951 return T(V(value)->getType());
1952}
1953
1954Type *Nucleus::getContainedType(Type *vectorType)
1955{
1956 Ice::Type vecTy = T(vectorType);
1957 switch(vecTy)
1958 {
Nicolas Capens112faf42019-12-13 17:32:26 -05001959 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1960 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1961 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1962 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1963 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1964 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1965 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1966 default:
1967 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1968 return {};
Antonio Maiorano62427e02020-02-13 09:18:05 -05001969 }
1970}
1971
Nicolas Capens157ba262019-12-10 17:49:14 -05001972Type *Nucleus::getPointerType(Type *ElementType)
1973{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001974 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001975}
1976
Antonio Maiorano62427e02020-02-13 09:18:05 -05001977static constexpr Ice::Type getNaturalIntType()
1978{
1979 constexpr size_t intSize = sizeof(int);
1980 static_assert(intSize == 4 || intSize == 8, "");
1981 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1982}
1983
1984Type *Nucleus::getPrintfStorageType(Type *valueType)
1985{
1986 Ice::Type valueTy = T(valueType);
1987 switch(valueTy)
1988 {
Nicolas Capens112faf42019-12-13 17:32:26 -05001989 case Ice::IceType_i32:
1990 return T(getNaturalIntType());
Antonio Maiorano62427e02020-02-13 09:18:05 -05001991
Nicolas Capens112faf42019-12-13 17:32:26 -05001992 case Ice::IceType_f32:
1993 return T(Ice::IceType_f64);
Antonio Maiorano62427e02020-02-13 09:18:05 -05001994
Nicolas Capens112faf42019-12-13 17:32:26 -05001995 default:
1996 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1997 return {};
Antonio Maiorano62427e02020-02-13 09:18:05 -05001998 }
1999}
2000
Nicolas Capens157ba262019-12-10 17:49:14 -05002001Value *Nucleus::createNullValue(Type *Ty)
2002{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002003 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002004 if(Ice::isVectorType(T(Ty)))
2005 {
2006 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002007 std::vector<int64_t> c = { 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002008 return createConstantVector(c, Ty);
2009 }
2010 else
2011 {
2012 return V(::context->getConstantZero(T(Ty)));
2013 }
2014}
2015
2016Value *Nucleus::createConstantLong(int64_t i)
2017{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002018 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002019 return V(::context->getConstantInt64(i));
2020}
2021
2022Value *Nucleus::createConstantInt(int i)
2023{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002024 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002025 return V(::context->getConstantInt32(i));
2026}
2027
2028Value *Nucleus::createConstantInt(unsigned int i)
2029{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002030 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002031 return V(::context->getConstantInt32(i));
2032}
2033
2034Value *Nucleus::createConstantBool(bool b)
2035{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002036 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002037 return V(::context->getConstantInt1(b));
2038}
2039
2040Value *Nucleus::createConstantByte(signed char i)
2041{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002042 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002043 return V(::context->getConstantInt8(i));
2044}
2045
2046Value *Nucleus::createConstantByte(unsigned char i)
2047{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002048 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002049 return V(::context->getConstantInt8(i));
2050}
2051
2052Value *Nucleus::createConstantShort(short i)
2053{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002054 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002055 return V(::context->getConstantInt16(i));
2056}
2057
2058Value *Nucleus::createConstantShort(unsigned short i)
2059{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002060 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002061 return V(::context->getConstantInt16(i));
2062}
2063
2064Value *Nucleus::createConstantFloat(float x)
2065{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002066 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002067 return V(::context->getConstantFloat(x));
2068}
2069
2070Value *Nucleus::createNullPointer(Type *Ty)
2071{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002072 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002073 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002074}
2075
Nicolas Capens3d7faaa2022-10-04 14:48:57 -04002076static Ice::Constant *IceConstantData(const void *data, size_t size, size_t alignment = 1)
Antonio Maiorano02a39532020-01-21 15:15:34 -05002077{
2078 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2079}
2080
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002081Value *Nucleus::createConstantVector(std::vector<int64_t> constants, Type *type)
Nicolas Capens157ba262019-12-10 17:49:14 -05002082{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002083 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002084 const int vectorSize = 16;
2085 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2086 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002087
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002088 const auto &i = constants;
2089 const size_t s = constants.size();
Antonio Maiorano02a39532020-01-21 15:15:34 -05002090
Antonio Maioranoa0957112020-03-04 15:06:19 -05002091 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002092 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002093
2094 switch((int)reinterpret_cast<intptr_t>(type))
2095 {
Nicolas Capens112faf42019-12-13 17:32:26 -05002096 case Ice::IceType_v4i32:
2097 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002098 {
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002099 const int initializer[4] = { (int)i[0 % s], (int)i[1 % s], (int)i[2 % s], (int)i[3 % s] };
2100 static_assert(sizeof(initializer) == vectorSize);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002101 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002102 }
2103 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002104 case Ice::IceType_v8i16:
2105 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002106 {
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002107 const short initializer[8] = { (short)i[0 % s], (short)i[1 % s], (short)i[2 % s], (short)i[3 % s], (short)i[4 % s], (short)i[5 % s], (short)i[6 % s], (short)i[7 % s] };
2108 static_assert(sizeof(initializer) == vectorSize);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002109 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002110 }
2111 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002112 case Ice::IceType_v16i8:
2113 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002114 {
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002115 const char initializer[16] = { (char)i[0 % s], (char)i[1 % s], (char)i[2 % s], (char)i[3 % s], (char)i[4 % s], (char)i[5 % s], (char)i[6 % s], (char)i[7 % s],
2116 (char)i[8 % s], (char)i[9 % s], (char)i[10 % s], (char)i[11 % s], (char)i[12 % s], (char)i[13 % s], (char)i[14 % s], (char)i[15 % s] };
2117 static_assert(sizeof(initializer) == vectorSize);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002118 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002119 }
2120 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002121 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002122 {
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002123 const int initializer[4] = { (int)i[0 % s], (int)i[1 % s], (int)i[0 % s], (int)i[1 % s] };
2124 static_assert(sizeof(initializer) == vectorSize);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002125 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002126 }
2127 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002128 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002129 {
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002130 const short initializer[8] = { (short)i[0 % s], (short)i[1 % s], (short)i[2 % s], (short)i[3 % s], (short)i[0 % s], (short)i[1 % s], (short)i[2 % s], (short)i[3 % s] };
2131 static_assert(sizeof(initializer) == vectorSize);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002132 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002133 }
2134 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002135 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002136 {
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002137 const char initializer[16] = { (char)i[0 % s], (char)i[1 % s], (char)i[2 % s], (char)i[3 % s], (char)i[4 % s], (char)i[5 % s], (char)i[6 % s], (char)i[7 % s], (char)i[0 % s], (char)i[1 % s], (char)i[2 % s], (char)i[3 % s], (char)i[4 % s], (char)i[5 % s], (char)i[6 % s], (char)i[7 % s] };
2138 static_assert(sizeof(initializer) == vectorSize);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002139 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002140 }
2141 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002142 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002143 {
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002144 const char initializer[16] = { (char)i[0 % s], (char)i[1 % s], (char)i[2 % s], (char)i[3 % s], (char)i[0 % s], (char)i[1 % s], (char)i[2 % s], (char)i[3 % s], (char)i[0 % s], (char)i[1 % s], (char)i[2 % s], (char)i[3 % s], (char)i[0 % s], (char)i[1 % s], (char)i[2 % s], (char)i[3 % s] };
2145 static_assert(sizeof(initializer) == vectorSize);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002146 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002147 }
2148 break;
Nicolas Capens112faf42019-12-13 17:32:26 -05002149 default:
2150 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002151 }
2152
Antonio Maiorano02a39532020-01-21 15:15:34 -05002153 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002154
Antonio Maiorano02a39532020-01-21 15:15:34 -05002155 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002156 return V(result);
2157}
2158
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002159Value *Nucleus::createConstantVector(std::vector<double> constants, Type *type)
Nicolas Capens157ba262019-12-10 17:49:14 -05002160{
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002161 RR_DEBUG_INFO_UPDATE_LOC();
2162 const int vectorSize = 16;
2163 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2164 const int alignment = vectorSize;
2165
2166 const auto &f = constants;
2167 const size_t s = constants.size();
2168
2169 // TODO(b/148082873): Fix global variable constants when generating multiple functions
2170 Ice::Constant *ptr = nullptr;
2171
2172 switch((int)reinterpret_cast<intptr_t>(type))
2173 {
2174 case Ice::IceType_v4f32:
2175 {
2176 const float initializer[4] = { (float)f[0 % s], (float)f[1 % s], (float)f[2 % s], (float)f[3 % s] };
2177 static_assert(sizeof(initializer) == vectorSize);
2178 ptr = IceConstantData(initializer, vectorSize, alignment);
2179 }
2180 break;
2181 case Type_v2f32:
2182 {
2183 const float initializer[4] = { (float)f[0 % s], (float)f[1 % s], (float)f[0 % s], (float)f[1 % s] };
2184 static_assert(sizeof(initializer) == vectorSize);
2185 ptr = IceConstantData(initializer, vectorSize, alignment);
2186 }
2187 break;
2188 default:
2189 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
2190 }
2191
2192 ASSERT(ptr);
2193
2194 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
2195 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05002196}
2197
Antonio Maiorano62427e02020-02-13 09:18:05 -05002198Value *Nucleus::createConstantString(const char *v)
2199{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002200 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002201 return V(IceConstantData(v, strlen(v) + 1));
2202}
2203
Nicolas Capens54313fb2021-02-19 14:26:27 -05002204void Nucleus::setOptimizerCallback(OptimizerCallback *callback)
2205{
2206 ::optimizerCallback = callback;
2207}
2208
Nicolas Capens519cf222020-05-08 15:27:19 -04002209Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002210{
2211 return T(Ice::IceType_void);
2212}
2213
Nicolas Capens519cf222020-05-08 15:27:19 -04002214Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002215{
2216 return T(Ice::IceType_i1);
2217}
2218
Nicolas Capens519cf222020-05-08 15:27:19 -04002219Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002220{
2221 return T(Ice::IceType_i8);
2222}
2223
Nicolas Capens519cf222020-05-08 15:27:19 -04002224Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002225{
2226 return T(Ice::IceType_i8);
2227}
2228
Nicolas Capens519cf222020-05-08 15:27:19 -04002229Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002230{
2231 return T(Ice::IceType_i16);
2232}
2233
Nicolas Capens519cf222020-05-08 15:27:19 -04002234Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002235{
2236 return T(Ice::IceType_i16);
2237}
2238
Nicolas Capens519cf222020-05-08 15:27:19 -04002239Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002240{
2241 return T(Type_v4i8);
2242}
2243
Nicolas Capens519cf222020-05-08 15:27:19 -04002244Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002245{
2246 return T(Type_v4i8);
2247}
2248
Nicolas Capens442e25b2022-06-22 12:02:52 -04002249static RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002250{
Ben Clayton713b8d32019-12-17 20:37:56 +00002251 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002252}
2253
Nicolas Capens442e25b2022-06-22 12:02:52 -04002254static RValue<Byte> Extract(RValue<Byte8> val, int i)
Ben Clayton713b8d32019-12-17 20:37:56 +00002255{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002256 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002257}
2258
Nicolas Capens442e25b2022-06-22 12:02:52 -04002259static RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
Ben Clayton713b8d32019-12-17 20:37:56 +00002260{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002261 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002262}
Ben Clayton713b8d32019-12-17 20:37:56 +00002263
Nicolas Capens157ba262019-12-10 17:49:14 -05002264RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2265{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002266 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002267 if(emulateIntrinsics)
2268 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002269 return Scalarize([](auto a, auto b) { return SaturateUnsigned(Short(Int(a) + Int(b))); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002270 }
2271 else
2272 {
2273 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002274 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002275 auto paddusb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002276 paddusb->addArg(x.value());
2277 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002278 ::basicBlock->appendInst(paddusb);
2279
2280 return RValue<Byte8>(V(result));
2281 }
2282}
2283
2284RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2285{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002286 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002287 if(emulateIntrinsics)
2288 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002289 return Scalarize([](auto a, auto b) { return SaturateUnsigned(Short(Int(a) - Int(b))); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002290 }
2291 else
2292 {
2293 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002294 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002295 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002296 psubusw->addArg(x.value());
2297 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002298 ::basicBlock->appendInst(psubusw);
2299
2300 return RValue<Byte8>(V(result));
2301 }
2302}
2303
2304RValue<SByte> Extract(RValue<SByte8> val, int i)
2305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002307 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002308}
2309
2310RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002313 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002314}
2315
2316RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002319 if(emulateIntrinsics)
2320 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002321 return Scalarize([rhs](auto a) { return a >> SByte(rhs); }, lhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05002322 }
2323 else
2324 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002325#if defined(__i386__) || defined(__x86_64__)
2326 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2327 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2328 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002329
Ben Clayton713b8d32019-12-17 20:37:56 +00002330 return As<SByte8>(hi | lo);
2331#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002332 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002333#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002334 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002335}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002336
Nicolas Capens157ba262019-12-10 17:49:14 -05002337RValue<Int> SignMask(RValue<Byte8> x)
2338{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002339 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002340 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002341 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002342 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2343 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002344 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002345 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002346 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002347 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002348 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002349 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002350 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002351 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002352
Nicolas Capens157ba262019-12-10 17:49:14 -05002353 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002354 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002355}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002356
2357// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2358// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002359// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002360// }
2361
Nicolas Capens157ba262019-12-10 17:49:14 -05002362RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2363{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002364 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002365 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002366}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002367
Nicolas Capens519cf222020-05-08 15:27:19 -04002368Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002369{
2370 return T(Type_v8i8);
2371}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002372
Nicolas Capens598f8d82016-09-26 15:09:10 -04002373// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2374// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002375// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002376// }
2377
2378// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2379// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002380// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002381// }
2382
Nicolas Capens157ba262019-12-10 17:49:14 -05002383RValue<SByte> SaturateSigned(RValue<Short> x)
2384{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002385 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002386 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2387}
2388
2389RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2390{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002391 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002392 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002393 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002394 return Scalarize([](auto a, auto b) { return SaturateSigned(Short(Int(a) + Int(b))); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002395 }
2396 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002397 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002398 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002399 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002400 auto paddsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002401 paddsb->addArg(x.value());
2402 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002403 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002404
Nicolas Capens157ba262019-12-10 17:49:14 -05002405 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002406 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002407}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002408
Nicolas Capens157ba262019-12-10 17:49:14 -05002409RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2410{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002411 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002412 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002413 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002414 return Scalarize([](auto a, auto b) { return SaturateSigned(Short(Int(a) - Int(b))); }, x, y);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002415 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002416 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002417 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002418 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002419 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002420 auto psubsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002421 psubsb->addArg(x.value());
2422 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002423 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002424
Nicolas Capens157ba262019-12-10 17:49:14 -05002425 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002426 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002427}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002428
Nicolas Capens157ba262019-12-10 17:49:14 -05002429RValue<Int> SignMask(RValue<SByte8> x)
2430{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002431 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002432 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002433 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002434 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2435 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002436 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002437 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002438 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002439 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002440 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002441 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002442 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002443 ::basicBlock->appendInst(movmsk);
2444
2445 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002446 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002447}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002448
Nicolas Capens157ba262019-12-10 17:49:14 -05002449RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2450{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002451 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002452 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002453}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002454
Nicolas Capens157ba262019-12-10 17:49:14 -05002455RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2456{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002457 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002458 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002459}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002460
Nicolas Capens519cf222020-05-08 15:27:19 -04002461Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002462{
2463 return T(Type_v8i8);
2464}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002465
Nicolas Capens519cf222020-05-08 15:27:19 -04002466Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002467{
2468 return T(Ice::IceType_v16i8);
2469}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002470
Nicolas Capens519cf222020-05-08 15:27:19 -04002471Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002472{
2473 return T(Ice::IceType_v16i8);
2474}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002475
Nicolas Capens519cf222020-05-08 15:27:19 -04002476Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002477{
2478 return T(Type_v2i16);
2479}
Nicolas Capensd4227962016-11-09 14:24:25 -05002480
Nicolas Capens519cf222020-05-08 15:27:19 -04002481Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002482{
2483 return T(Type_v2i16);
2484}
Nicolas Capensd4227962016-11-09 14:24:25 -05002485
Nicolas Capens157ba262019-12-10 17:49:14 -05002486Short4::Short4(RValue<Int4> cast)
2487{
Nicolas Capens4e7d3102022-06-21 01:42:18 -04002488 std::vector<int> select = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002489 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002490 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2491
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002492 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002493 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002494
2495 storeValue(short4);
2496}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002497
2498// Short4::Short4(RValue<Float> cast)
2499// {
2500// }
2501
Nicolas Capens157ba262019-12-10 17:49:14 -05002502Short4::Short4(RValue<Float4> cast)
2503{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002504 // TODO(b/150791192): Generalize and optimize
2505 auto smin = std::numeric_limits<short>::min();
2506 auto smax = std::numeric_limits<short>::max();
2507 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002508}
2509
2510RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2511{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002512 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002513 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002514 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002515 return Scalarize([rhs](auto x) { return x << Short(rhs); }, lhs);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002516 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002517 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002518 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002519 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002520 }
2521}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002522
Nicolas Capens157ba262019-12-10 17:49:14 -05002523RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2524{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002525 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002526 if(emulateIntrinsics)
2527 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002528 return Scalarize([rhs](auto x) { return x >> Short(rhs); }, lhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05002529 }
2530 else
2531 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002532 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002533 }
2534}
2535
2536RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2537{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002538 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002539 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002540 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002541 ::basicBlock->appendInst(cmp);
2542
2543 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002544 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002545 ::basicBlock->appendInst(select);
2546
2547 return RValue<Short4>(V(result));
2548}
2549
2550RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2551{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002552 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002553 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002554 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002555 ::basicBlock->appendInst(cmp);
2556
2557 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002558 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002559 ::basicBlock->appendInst(select);
2560
2561 return RValue<Short4>(V(result));
2562}
2563
2564RValue<Short> SaturateSigned(RValue<Int> x)
2565{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002566 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002567 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2568}
2569
2570RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2571{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002572 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002573 if(emulateIntrinsics)
2574 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002575 return Scalarize([](auto a, auto b) { return SaturateSigned(Int(a) + Int(b)); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002576 }
2577 else
2578 {
2579 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002580 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002581 auto paddsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002582 paddsw->addArg(x.value());
2583 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002584 ::basicBlock->appendInst(paddsw);
2585
2586 return RValue<Short4>(V(result));
2587 }
2588}
2589
2590RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2591{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002592 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002593 if(emulateIntrinsics)
2594 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002595 return Scalarize([](auto a, auto b) { return SaturateSigned(Int(a) - Int(b)); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002596 }
2597 else
2598 {
2599 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002600 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002601 auto psubsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002602 psubsw->addArg(x.value());
2603 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002604 ::basicBlock->appendInst(psubsw);
2605
2606 return RValue<Short4>(V(result));
2607 }
2608}
2609
2610RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2611{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002612 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002613 if(emulateIntrinsics)
2614 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002615 return Scalarize([](auto a, auto b) { return Short((Int(a) * Int(b)) >> 16); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002616 }
2617 else
2618 {
2619 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002620 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002621 auto pmulhw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002622 pmulhw->addArg(x.value());
2623 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002624 ::basicBlock->appendInst(pmulhw);
2625
2626 return RValue<Short4>(V(result));
2627 }
2628}
2629
2630RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2631{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002632 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002633 if(emulateIntrinsics)
2634 {
2635 Int2 result;
2636 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2637 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2638
2639 return result;
2640 }
2641 else
2642 {
2643 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002644 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002645 auto pmaddwd = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002646 pmaddwd->addArg(x.value());
2647 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002648 ::basicBlock->appendInst(pmaddwd);
2649
2650 return As<Int2>(V(result));
2651 }
2652}
2653
2654RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2655{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002656 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002657 if(emulateIntrinsics)
2658 {
2659 SByte8 result;
2660 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2661 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2662 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2663 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2664 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2665 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2666 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2667 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2668
2669 return result;
2670 }
2671 else
2672 {
2673 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002674 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002675 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002676 pack->addArg(x.value());
2677 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002678 ::basicBlock->appendInst(pack);
2679
2680 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2681 }
2682}
2683
2684RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2685{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002686 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002687 if(emulateIntrinsics)
2688 {
2689 Byte8 result;
2690 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2691 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2692 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2693 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2694 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2695 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2696 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2697 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2698
2699 return result;
2700 }
2701 else
2702 {
2703 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002704 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002705 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002706 pack->addArg(x.value());
2707 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002708 ::basicBlock->appendInst(pack);
2709
2710 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2711 }
2712}
2713
2714RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2715{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002716 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002717 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002718}
2719
2720RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2721{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002722 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002723 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002724}
2725
Nicolas Capens519cf222020-05-08 15:27:19 -04002726Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002727{
2728 return T(Type_v4i16);
2729}
2730
2731UShort4::UShort4(RValue<Float4> cast, bool saturate)
2732{
2733 if(saturate)
2734 {
2735 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002736 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002737 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2738 // PackUnsigned takes care of 0x0000 saturation.
2739 Int4 int4(Min(cast, Float4(0xFFFF)));
2740 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002741 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002742 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002743 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002744 // ARM saturates the 32-bit integer result on overflow/undeflow.
2745 Int4 int4(cast);
2746 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002747 }
2748 else
2749 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002750 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002751 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002752 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002753 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002754 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002755 *this = Short4(Int4(cast));
2756 }
2757}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002758
Nicolas Capens157ba262019-12-10 17:49:14 -05002759RValue<UShort> Extract(RValue<UShort4> val, int i)
2760{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002761 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002762}
2763
Nicolas Capens157ba262019-12-10 17:49:14 -05002764RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2765{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002766 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002767 if(emulateIntrinsics)
2768 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002769 return Scalarize([rhs](auto x) { return x << UShort(rhs); }, lhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05002770 }
2771 else
2772 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002773 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002774 }
2775}
2776
2777RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002780 if(emulateIntrinsics)
2781 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002782 return Scalarize([rhs](auto x) { return x >> UShort(rhs); }, lhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05002783 }
2784 else
2785 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002786 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002787 }
2788}
2789
2790RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2791{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002792 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002793 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002794 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002795 ::basicBlock->appendInst(cmp);
2796
2797 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002798 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002799 ::basicBlock->appendInst(select);
2800
2801 return RValue<UShort4>(V(result));
2802}
2803
2804RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2805{
2806 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002807 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002808 ::basicBlock->appendInst(cmp);
2809
2810 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002811 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002812 ::basicBlock->appendInst(select);
2813
2814 return RValue<UShort4>(V(result));
2815}
2816
2817RValue<UShort> SaturateUnsigned(RValue<Int> x)
2818{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002819 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002820 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2821}
2822
2823RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2824{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002825 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002826 if(emulateIntrinsics)
2827 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002828 return Scalarize([](auto a, auto b) { return SaturateUnsigned(Int(a) + Int(b)); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002829 }
2830 else
2831 {
2832 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002833 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002834 auto paddusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002835 paddusw->addArg(x.value());
2836 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002837 ::basicBlock->appendInst(paddusw);
2838
2839 return RValue<UShort4>(V(result));
2840 }
2841}
2842
2843RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002846 if(emulateIntrinsics)
2847 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002848 return Scalarize([](auto a, auto b) { return SaturateUnsigned(Int(a) - Int(b)); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002849 }
2850 else
2851 {
2852 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002853 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002854 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002855 psubusw->addArg(x.value());
2856 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002857 ::basicBlock->appendInst(psubusw);
2858
2859 return RValue<UShort4>(V(result));
2860 }
2861}
2862
2863RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002866 if(emulateIntrinsics)
2867 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002868 return Scalarize([](auto a, auto b) { return UShort((UInt(a) * UInt(b)) >> 16); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002869 }
2870 else
2871 {
2872 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002873 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002874 auto pmulhuw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002875 pmulhuw->addArg(x.value());
2876 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002877 ::basicBlock->appendInst(pmulhuw);
2878
2879 return RValue<UShort4>(V(result));
2880 }
2881}
2882
2883RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2884{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002885 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002886 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2887
Nicolas Capens442e25b2022-06-22 12:02:52 -04002888 return Scalarize([](auto a, auto b) { return Int((Long(a) * Long(b)) >> Long(Int(32))); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002889}
2890
2891RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2892{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002893 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002894 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2895
2896 if(false) // Partial product based implementation.
2897 {
2898 auto xh = x >> 16;
2899 auto yh = y >> 16;
2900 auto xl = x & UInt4(0x0000FFFF);
2901 auto yl = y & UInt4(0x0000FFFF);
2902 auto xlyh = xl * yh;
2903 auto xhyl = xh * yl;
2904 auto xlyhh = xlyh >> 16;
2905 auto xhylh = xhyl >> 16;
2906 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2907 auto xhyll = xhyl & UInt4(0x0000FFFF);
2908 auto xlylh = (xl * yl) >> 16;
2909 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2910
2911 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002912 }
2913
Nicolas Capens442e25b2022-06-22 12:02:52 -04002914 return Scalarize([](auto a, auto b) { return UInt((Long(a) * Long(b)) >> Long(Int(32))); }, x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05002915}
2916
2917RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2918{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002919 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00002920 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002921 return UShort4(0);
2922}
2923
Nicolas Capens519cf222020-05-08 15:27:19 -04002924Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002925{
2926 return T(Type_v4i16);
2927}
2928
2929RValue<Short> Extract(RValue<Short8> val, int i)
2930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002932 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002933}
2934
2935RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2936{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002937 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002938 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002939}
2940
2941RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2942{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002943 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002944 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002945 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002946 return Scalarize([rhs](auto x) { return x << Short(rhs); }, lhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05002947 }
2948 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002949 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002950 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002951 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002952}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002953
Nicolas Capens157ba262019-12-10 17:49:14 -05002954RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
2955{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002956 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002957 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002958 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04002959 return Scalarize([rhs](auto x) { return x >> Short(rhs); }, lhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05002960 }
2961 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002962 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002963 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002964 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002965}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002966
Nicolas Capens157ba262019-12-10 17:49:14 -05002967RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
2968{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002969 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00002970 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002971 return Int4(0);
2972}
2973
2974RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
2975{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002976 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00002977 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002978 return Short8(0);
2979}
2980
Nicolas Capens519cf222020-05-08 15:27:19 -04002981Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002982{
2983 return T(Ice::IceType_v8i16);
2984}
2985
2986RValue<UShort> Extract(RValue<UShort8> val, int i)
2987{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002988 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002989 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002990}
2991
2992RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
2993{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002994 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002995 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002996}
2997
2998RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
2999{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003000 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003001 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003002 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003003 return Scalarize([rhs](auto x) { return x << UShort(rhs); }, lhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05003004 }
3005 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003006 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003007 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003008 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003009}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003010
Nicolas Capens157ba262019-12-10 17:49:14 -05003011RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3012{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003013 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003014 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003015 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003016 return Scalarize([rhs](auto x) { return x >> UShort(rhs); }, lhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003017 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003018 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003019 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003020 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003021 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003022}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003023
Nicolas Capens157ba262019-12-10 17:49:14 -05003024RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3025{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003026 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003027 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003028 return UShort8(0);
3029}
3030
Nicolas Capens519cf222020-05-08 15:27:19 -04003031Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003032{
3033 return T(Ice::IceType_v8i16);
3034}
3035
Ben Clayton713b8d32019-12-17 20:37:56 +00003036RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003037{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003038 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003039 RValue<Int> res = val;
3040 val += 1;
3041 return res;
3042}
3043
Ben Clayton713b8d32019-12-17 20:37:56 +00003044const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003045{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003046 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003047 val += 1;
3048 return val;
3049}
3050
Ben Clayton713b8d32019-12-17 20:37:56 +00003051RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003052{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003053 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003054 RValue<Int> res = val;
3055 val -= 1;
3056 return res;
3057}
3058
Ben Clayton713b8d32019-12-17 20:37:56 +00003059const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003060{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003061 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003062 val -= 1;
3063 return val;
3064}
3065
3066RValue<Int> RoundInt(RValue<Float> cast)
3067{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003068 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003069 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003070 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003071 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3072 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003073 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003074 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003075 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003076 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003077 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003078 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003079 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003080 ::basicBlock->appendInst(nearbyint);
3081
3082 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003083 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003084}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003085
Nicolas Capens519cf222020-05-08 15:27:19 -04003086Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003087{
3088 return T(Ice::IceType_i32);
3089}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003090
Nicolas Capens519cf222020-05-08 15:27:19 -04003091Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003092{
3093 return T(Ice::IceType_i64);
3094}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003095
Nicolas Capens157ba262019-12-10 17:49:14 -05003096UInt::UInt(RValue<Float> cast)
3097{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003098 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003099 // Smallest positive value representable in UInt, but not in Int
3100 const unsigned int ustart = 0x80000000u;
3101 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003102
Nicolas Capens157ba262019-12-10 17:49:14 -05003103 // If the value is negative, store 0, otherwise store the result of the conversion
3104 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003105 // Check if the value can be represented as an Int
3106 IfThenElse(cast >= ustartf,
3107 // If the value is too large, subtract ustart and re-add it after conversion.
3108 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3109 // Otherwise, just convert normally
3110 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003111 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003112}
Nicolas Capensa8086512016-11-07 17:32:17 -05003113
Ben Clayton713b8d32019-12-17 20:37:56 +00003114RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003115{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003116 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003117 RValue<UInt> res = val;
3118 val += 1;
3119 return res;
3120}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003121
Ben Clayton713b8d32019-12-17 20:37:56 +00003122const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003123{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003124 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003125 val += 1;
3126 return val;
3127}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003128
Ben Clayton713b8d32019-12-17 20:37:56 +00003129RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003130{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003131 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003132 RValue<UInt> res = val;
3133 val -= 1;
3134 return res;
3135}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003136
Ben Clayton713b8d32019-12-17 20:37:56 +00003137const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003138{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003139 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003140 val -= 1;
3141 return val;
3142}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003143
Nicolas Capens598f8d82016-09-26 15:09:10 -04003144// RValue<UInt> RoundUInt(RValue<Float> cast)
3145// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003146// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003147// }
3148
Nicolas Capens519cf222020-05-08 15:27:19 -04003149Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003150{
3151 return T(Ice::IceType_i32);
3152}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003153
3154// Int2::Int2(RValue<Int> cast)
3155// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003156// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003157// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003158//
3159// Constant *shuffle[2];
3160// shuffle[0] = Nucleus::createConstantInt(0);
3161// shuffle[1] = Nucleus::createConstantInt(0);
3162//
Nicolas Capens519cf222020-05-08 15:27:19 -04003163// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003164//
3165// storeValue(replicate);
3166// }
3167
Nicolas Capens157ba262019-12-10 17:49:14 -05003168RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3169{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003170 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003171 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003172 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003173 return Scalarize([rhs](auto x) { return x << rhs; }, lhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003174 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003175 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003176 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003177 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003178 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003179}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003180
Nicolas Capens157ba262019-12-10 17:49:14 -05003181RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3182{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003183 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003184 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003185 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003186 return Scalarize([rhs](auto x) { return x >> rhs; }, lhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003187 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003188 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003189 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003190 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003191 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003192}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003193
Nicolas Capens519cf222020-05-08 15:27:19 -04003194Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003195{
3196 return T(Type_v2i32);
3197}
3198
3199RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3200{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003201 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003202 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003203 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003204 return Scalarize([rhs](auto x) { return x << rhs; }, lhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003205 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003206 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003207 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003208 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003209 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003210}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003211
Nicolas Capens157ba262019-12-10 17:49:14 -05003212RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3213{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003214 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003215 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003216 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003217 return Scalarize([rhs](auto x) { return x >> rhs; }, lhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003218 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003219 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003220 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003221 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003222 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003223}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003224
Nicolas Capens519cf222020-05-08 15:27:19 -04003225Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003226{
3227 return T(Type_v2i32);
3228}
3229
Ben Clayton713b8d32019-12-17 20:37:56 +00003230Int4::Int4(RValue<Byte4> cast)
3231 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003232{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003233 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003234 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003235 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3236
3237 Value *e;
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003238 std::vector<int> swizzle = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003239 Value *b = Nucleus::createBitCast(a, Byte16::type());
3240 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003241
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003242 std::vector<int> swizzle2 = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003243 Value *d = Nucleus::createBitCast(c, Short8::type());
3244 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003245
Nicolas Capens519cf222020-05-08 15:27:19 -04003246 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003247 storeValue(f);
3248}
3249
Ben Clayton713b8d32019-12-17 20:37:56 +00003250Int4::Int4(RValue<SByte4> cast)
3251 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003252{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003253 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003254 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003255 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3256
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003257 std::vector<int> swizzle = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003258 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003259 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3260
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003261 std::vector<int> swizzle2 = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003262 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003263 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3264
3265 *this = As<Int4>(e) >> 24;
3266}
3267
Ben Clayton713b8d32019-12-17 20:37:56 +00003268Int4::Int4(RValue<Short4> cast)
3269 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003270{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003271 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003272 std::vector<int> swizzle = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003273 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003274
3275 *this = As<Int4>(c) >> 16;
3276}
3277
Ben Clayton713b8d32019-12-17 20:37:56 +00003278Int4::Int4(RValue<UShort4> cast)
3279 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003280{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003281 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003282 std::vector<int> swizzle = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003283 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003284 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003285 storeValue(d);
3286}
3287
Ben Clayton713b8d32019-12-17 20:37:56 +00003288Int4::Int4(RValue<Int> rhs)
3289 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003292 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003293
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003294 std::vector<int> swizzle = { 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003295 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3296
3297 storeValue(replicate);
3298}
3299
3300RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3301{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003302 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003303 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003304 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003305 return Scalarize([rhs](auto x) { return x << rhs; }, lhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003306 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003307 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003308 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003309 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003310 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003311}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003312
Nicolas Capens157ba262019-12-10 17:49:14 -05003313RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3314{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003315 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003316 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003317 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003318 return Scalarize([rhs](auto x) { return x >> rhs; }, lhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003319 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003320 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003321 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003322 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003323 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003324}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003325
Nicolas Capens157ba262019-12-10 17:49:14 -05003326RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3327{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003328 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003329 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003330}
3331
3332RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3333{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003334 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003335 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003336}
3337
3338RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3339{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003340 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003341 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003342}
3343
3344RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3345{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003346 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003347 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003348}
3349
3350RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3351{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003352 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003353 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003354}
3355
3356RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3357{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003358 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003359 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003360}
3361
Nicolas Capens629bf952022-01-18 15:08:14 -05003362RValue<Int4> Abs(RValue<Int4> x)
3363{
3364 // TODO: Optimize.
3365 auto negative = x >> 31;
3366 return (x ^ negative) - negative;
3367}
3368
Nicolas Capens157ba262019-12-10 17:49:14 -05003369RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3370{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003371 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003372 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003373 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003374 ::basicBlock->appendInst(cmp);
3375
3376 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003377 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003378 ::basicBlock->appendInst(select);
3379
3380 return RValue<Int4>(V(result));
3381}
3382
3383RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3384{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003385 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003386 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003387 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003388 ::basicBlock->appendInst(cmp);
3389
3390 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003391 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003392 ::basicBlock->appendInst(select);
3393
3394 return RValue<Int4>(V(result));
3395}
3396
3397RValue<Int4> RoundInt(RValue<Float4> cast)
3398{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003399 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003400 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003401 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003402 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3403 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003404 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003405 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003406 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003407 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003408 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003409 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003410 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003411 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003412
3413 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003414 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003415}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003416
Nicolas Capenseeb81842021-01-12 17:44:40 -05003417RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3418{
3419 RR_DEBUG_INFO_UPDATE_LOC();
3420
3421 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3422 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3423 // saturate to 0x80000000.
3424 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3425
3426 if(emulateIntrinsics || CPUID::ARM)
3427 {
3428 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3429 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3430 }
3431 else
3432 {
3433 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3434 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003435 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capenseeb81842021-01-12 17:44:40 -05003436 nearbyint->addArg(clamped.value());
3437 ::basicBlock->appendInst(nearbyint);
3438
3439 return RValue<Int4>(V(result));
3440 }
3441}
3442
Nicolas Capens157ba262019-12-10 17:49:14 -05003443RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3444{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003445 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003446 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003447 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003448 Short8 result;
3449 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3450 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3451 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3452 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3453 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3454 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3455 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3456 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003457
Nicolas Capens157ba262019-12-10 17:49:14 -05003458 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003459 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003460 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003461 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003462 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003463 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003464 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003465 pack->addArg(x.value());
3466 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003467 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003468
Nicolas Capens157ba262019-12-10 17:49:14 -05003469 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003470 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003471}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003472
Nicolas Capens157ba262019-12-10 17:49:14 -05003473RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3474{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003475 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003476 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003477 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003478 RValue<Int4> sx = As<Int4>(x);
3479 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003480
Nicolas Capens157ba262019-12-10 17:49:14 -05003481 RValue<Int4> sy = As<Int4>(y);
3482 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003483
Nicolas Capens157ba262019-12-10 17:49:14 -05003484 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003485 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003486 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003487 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003488 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003489 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003490 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003491 pack->addArg(x.value());
3492 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003493 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003494
Nicolas Capens157ba262019-12-10 17:49:14 -05003495 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003496 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003497}
Nicolas Capens33438a62017-09-27 11:47:35 -04003498
Nicolas Capens157ba262019-12-10 17:49:14 -05003499RValue<Int> SignMask(RValue<Int4> x)
3500{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003501 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003502 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003503 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003504 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3505 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003506 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003507 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003508 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003509 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003510 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003511 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003512 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003513 ::basicBlock->appendInst(movmsk);
3514
3515 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003516 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003517}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003518
Nicolas Capens519cf222020-05-08 15:27:19 -04003519Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003520{
3521 return T(Ice::IceType_v4i32);
3522}
3523
Ben Clayton713b8d32019-12-17 20:37:56 +00003524UInt4::UInt4(RValue<Float4> cast)
3525 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003526{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003527 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003528 // Smallest positive value representable in UInt, but not in Int
3529 const unsigned int ustart = 0x80000000u;
3530 const float ustartf = float(ustart);
3531
3532 // Check if the value can be represented as an Int
3533 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3534 // If the value is too large, subtract ustart and re-add it after conversion.
3535 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003536 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003537 (~uiValue & Int4(cast));
3538 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003539 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003540}
3541
Ben Clayton713b8d32019-12-17 20:37:56 +00003542UInt4::UInt4(RValue<UInt> rhs)
3543 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003544{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003545 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003546 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003547
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003548 std::vector<int> swizzle = { 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003549 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3550
3551 storeValue(replicate);
3552}
3553
3554RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3555{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003556 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003557 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003558 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003559 return Scalarize([rhs](auto x) { return x << rhs; }, lhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003560 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003561 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003562 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003563 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003564 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003565}
Ben Clayton88816fa2019-05-15 17:08:14 +01003566
Nicolas Capens157ba262019-12-10 17:49:14 -05003567RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3568{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003569 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003570 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003571 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04003572 return Scalarize([rhs](auto x) { return x >> rhs; }, lhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003573 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003574 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003575 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003576 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003577 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003578}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003579
Nicolas Capens157ba262019-12-10 17:49:14 -05003580RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3581{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003582 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003583 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003584}
3585
3586RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3587{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003588 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003589 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003590}
3591
3592RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3593{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003594 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003595 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003596}
3597
3598RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3599{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003600 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003601 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003602}
3603
3604RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3605{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003606 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003607 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003608}
3609
3610RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3611{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003612 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003613 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003614}
3615
3616RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3617{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003618 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003619 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003620 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003621 ::basicBlock->appendInst(cmp);
3622
3623 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003624 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003625 ::basicBlock->appendInst(select);
3626
3627 return RValue<UInt4>(V(result));
3628}
3629
3630RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3631{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003632 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003633 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003634 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003635 ::basicBlock->appendInst(cmp);
3636
3637 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003638 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003639 ::basicBlock->appendInst(select);
3640
3641 return RValue<UInt4>(V(result));
3642}
3643
Nicolas Capens519cf222020-05-08 15:27:19 -04003644Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003645{
3646 return T(Ice::IceType_v4i32);
3647}
3648
Nicolas Capens519cf222020-05-08 15:27:19 -04003649Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003650{
3651 return T(Ice::IceType_i16);
3652}
3653
Nicolas Capens157ba262019-12-10 17:49:14 -05003654RValue<Float> Sqrt(RValue<Float> x)
3655{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003656 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003657 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003658 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003659 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003660 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003661 ::basicBlock->appendInst(sqrt);
3662
3663 return RValue<Float>(V(result));
3664}
3665
3666RValue<Float> Round(RValue<Float> x)
3667{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003668 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003669 return Float4(Round(Float4(x))).x;
3670}
3671
3672RValue<Float> Trunc(RValue<Float> x)
3673{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003674 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003675 return Float4(Trunc(Float4(x))).x;
3676}
3677
3678RValue<Float> Frac(RValue<Float> x)
3679{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003680 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003681 return Float4(Frac(Float4(x))).x;
3682}
3683
3684RValue<Float> Floor(RValue<Float> x)
3685{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003686 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003687 return Float4(Floor(Float4(x))).x;
3688}
3689
3690RValue<Float> Ceil(RValue<Float> x)
3691{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003692 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003693 return Float4(Ceil(Float4(x))).x;
3694}
3695
Nicolas Capens519cf222020-05-08 15:27:19 -04003696Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003697{
3698 return T(Ice::IceType_f32);
3699}
3700
Nicolas Capens519cf222020-05-08 15:27:19 -04003701Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003702{
3703 return T(Type_v2f32);
3704}
3705
Ben Clayton713b8d32019-12-17 20:37:56 +00003706Float4::Float4(RValue<Float> rhs)
3707 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003708{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003709 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003710 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003711
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003712 std::vector<int> swizzle = { 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003713 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3714
3715 storeValue(replicate);
3716}
3717
Nicolas Capens4ee53092022-02-05 01:53:12 -05003718RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
3719{
Nicolas Capens442e25b2022-06-22 12:02:52 -04003720 return ScalarizeCall(fmodf, lhs, rhs);
Nicolas Capens4ee53092022-02-05 01:53:12 -05003721}
3722
Nicolas Capensbc74bc22022-01-26 10:47:00 -05003723RValue<Float4> MulAdd(RValue<Float4> x, RValue<Float4> y, RValue<Float4> z)
3724{
3725 // TODO(b/214591655): Use FMA when available.
3726 return x * y + z;
3727}
3728
Nicolas Capens75d79f22022-01-31 17:46:26 -05003729RValue<Float4> FMA(RValue<Float4> x, RValue<Float4> y, RValue<Float4> z)
3730{
3731 // TODO(b/214591655): Use FMA instructions when available.
Nicolas Capens442e25b2022-06-22 12:02:52 -04003732 return ScalarizeCall(fmaf, x, y, z);
Nicolas Capens75d79f22022-01-31 17:46:26 -05003733}
3734
Nicolas Capens629bf952022-01-18 15:08:14 -05003735RValue<Float4> Abs(RValue<Float4> x)
3736{
3737 // TODO: Optimize.
3738 Value *vector = Nucleus::createBitCast(x.value(), Int4::type());
Nicolas Capens4e7d3102022-06-21 01:42:18 -04003739 std::vector<int64_t> constantVector = { 0x7FFFFFFF };
Nicolas Capens629bf952022-01-18 15:08:14 -05003740 Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, Int4::type()));
3741
3742 return As<Float4>(result);
3743}
3744
Nicolas Capens157ba262019-12-10 17:49:14 -05003745RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3746{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003747 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003748 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003749 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003750 ::basicBlock->appendInst(cmp);
3751
3752 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003753 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003754 ::basicBlock->appendInst(select);
3755
3756 return RValue<Float4>(V(result));
3757}
3758
3759RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3760{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003761 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003762 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003763 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003764 ::basicBlock->appendInst(cmp);
3765
3766 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003767 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003768 ::basicBlock->appendInst(select);
3769
3770 return RValue<Float4>(V(result));
3771}
3772
Antonio Maioranod1561872020-12-14 14:03:53 -05003773bool HasRcpApprox()
3774{
3775 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3776 return false;
3777}
3778
3779RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
3780{
3781 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3782 UNREACHABLE("RValue<Float4> RcpApprox()");
3783 return { 0.0f };
3784}
3785
3786RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
3787{
3788 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3789 UNREACHABLE("RValue<Float> RcpApprox()");
3790 return { 0.0f };
3791}
3792
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05003793bool HasRcpSqrtApprox()
3794{
3795 return false;
3796}
3797
3798RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
3799{
3800 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3801 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
3802 return { 0.0f };
3803}
3804
3805RValue<Float> RcpSqrtApprox(RValue<Float> x)
3806{
3807 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3808 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
3809 return { 0.0f };
3810}
3811
Nicolas Capens157ba262019-12-10 17:49:14 -05003812RValue<Float4> Sqrt(RValue<Float4> x)
3813{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003814 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003815 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003816 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003817 Float4 result;
3818 result.x = Sqrt(Float(Float4(x).x));
3819 result.y = Sqrt(Float(Float4(x).y));
3820 result.z = Sqrt(Float(Float4(x).z));
3821 result.w = Sqrt(Float(Float4(x).w));
3822
3823 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003824 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003825 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003826 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003827 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003828 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003829 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003830 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04003831 ::basicBlock->appendInst(sqrt);
3832
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003833 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003834 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003835}
Nicolas Capens157ba262019-12-10 17:49:14 -05003836
3837RValue<Int> SignMask(RValue<Float4> x)
3838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003840 if(emulateIntrinsics || CPUID::ARM)
3841 {
3842 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3843 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3844 }
3845 else
3846 {
3847 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003848 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003849 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003850 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003851 ::basicBlock->appendInst(movmsk);
3852
3853 return RValue<Int>(V(result));
3854 }
3855}
3856
3857RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3858{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003859 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003860 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003861}
3862
3863RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003866 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003867}
3868
3869RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3870{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003871 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003872 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003873}
3874
3875RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3876{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003877 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003878 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003879}
3880
3881RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3882{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003883 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003884 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003885}
3886
3887RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3888{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003889 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003890 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003891}
3892
3893RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3894{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003895 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003896 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003897}
3898
3899RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3900{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003901 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003902 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003903}
3904
3905RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3906{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003907 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003908 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003909}
3910
3911RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3912{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003913 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003914 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003915}
3916
3917RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3918{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003919 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003920 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003921}
3922
3923RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3924{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003925 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003926 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003927}
3928
3929RValue<Float4> Round(RValue<Float4> x)
3930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003932 if(emulateIntrinsics || CPUID::ARM)
3933 {
3934 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3935 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
3936 }
3937 else if(CPUID::SSE4_1)
3938 {
3939 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003940 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003941 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003942 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003943 round->addArg(::context->getConstantInt32(0));
3944 ::basicBlock->appendInst(round);
3945
3946 return RValue<Float4>(V(result));
3947 }
3948 else
3949 {
3950 return Float4(RoundInt(x));
3951 }
3952}
3953
3954RValue<Float4> Trunc(RValue<Float4> x)
3955{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003956 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003957 if(CPUID::SSE4_1)
3958 {
3959 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003960 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003961 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003962 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003963 round->addArg(::context->getConstantInt32(3));
3964 ::basicBlock->appendInst(round);
3965
3966 return RValue<Float4>(V(result));
3967 }
3968 else
3969 {
3970 return Float4(Int4(x));
3971 }
3972}
3973
3974RValue<Float4> Frac(RValue<Float4> x)
3975{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003976 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003977 Float4 frc;
3978
3979 if(CPUID::SSE4_1)
3980 {
3981 frc = x - Floor(x);
3982 }
3983 else
3984 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003985 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05003986
Nicolas Capens442e25b2022-06-22 12:02:52 -04003987 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05003988 }
3989
3990 // x - floor(x) can be 1.0 for very small negative x.
3991 // Clamp against the value just below 1.0.
3992 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
3993}
3994
3995RValue<Float4> Floor(RValue<Float4> x)
3996{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003997 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003998 if(CPUID::SSE4_1)
3999 {
4000 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004001 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004002 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004003 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004004 round->addArg(::context->getConstantInt32(1));
4005 ::basicBlock->appendInst(round);
4006
4007 return RValue<Float4>(V(result));
4008 }
4009 else
4010 {
4011 return x - Frac(x);
4012 }
4013}
4014
4015RValue<Float4> Ceil(RValue<Float4> x)
4016{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004017 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004018 if(CPUID::SSE4_1)
4019 {
4020 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004021 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004022 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004023 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004024 round->addArg(::context->getConstantInt32(2));
4025 ::basicBlock->appendInst(round);
4026
4027 return RValue<Float4>(V(result));
4028 }
4029 else
4030 {
4031 return -Floor(-x);
4032 }
4033}
4034
Nicolas Capens519cf222020-05-08 15:27:19 -04004035Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004036{
4037 return T(Ice::IceType_v4f32);
4038}
4039
4040RValue<Long> Ticks()
4041{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004042 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004043 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004044 return Long(Int(0));
4045}
4046
Nicolas Capens3d7faaa2022-10-04 14:48:57 -04004047RValue<Pointer<Byte>> ConstantPointer(const void *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004048{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004049 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004050 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004051}
4052
Nicolas Capens3d7faaa2022-10-04 14:48:57 -04004053RValue<Pointer<Byte>> ConstantData(const void *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004054{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004055 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004056 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004057}
4058
Ben Clayton713b8d32019-12-17 20:37:56 +00004059Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004060{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004061 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004062 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004063}
4064
4065void Breakpoint()
4066{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004067 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004068 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004069 auto trap = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05004070 ::basicBlock->appendInst(trap);
4071}
4072
Ben Clayton713b8d32019-12-17 20:37:56 +00004073void Nucleus::createFence(std::memory_order memoryOrder)
4074{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004075 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004076 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004077 auto inst = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004078 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4079 inst->addArg(order);
4080 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004081}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004082
Ben Clayton713b8d32019-12-17 20:37:56 +00004083Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4084{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004085 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense4b77942021-08-03 17:09:41 -04004086 UNIMPLEMENTED("b/155867273 Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004087 return nullptr;
4088}
Nicolas Capense4b77942021-08-03 17:09:41 -04004089
Ben Clayton713b8d32019-12-17 20:37:56 +00004090void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4091{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004092 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capense4b77942021-08-03 17:09:41 -04004093 UNIMPLEMENTED("b/155867273 Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004094}
Nicolas Capens157ba262019-12-10 17:49:14 -05004095
Nicolas Capens4ee53092022-02-05 01:53:12 -05004096template<typename T>
4097struct UnderlyingType
4098{
4099 using Type = typename decltype(rr::Extract(std::declval<RValue<T>>(), 0))::rvalue_underlying_type;
4100};
4101
4102template<typename T>
4103using UnderlyingTypeT = typename UnderlyingType<T>::Type;
4104
4105template<typename T, typename EL = UnderlyingTypeT<T>>
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004106static void gather(T &out, RValue<Pointer<EL>> base, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment, bool zeroMaskedLanes)
Nicolas Capens4ee53092022-02-05 01:53:12 -05004107{
4108 constexpr bool atomic = false;
4109 constexpr std::memory_order order = std::memory_order_relaxed;
4110
4111 Pointer<Byte> baseBytePtr = base;
4112
4113 out = T(0);
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004114 for(int i = 0; i < SIMD::Width; i++)
Nicolas Capens4ee53092022-02-05 01:53:12 -05004115 {
4116 If(Extract(mask, i) != 0)
4117 {
4118 auto offset = Extract(offsets, i);
4119 auto el = Load(Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
4120 out = Insert(out, el, i);
4121 }
4122 Else If(zeroMaskedLanes)
4123 {
4124 out = Insert(out, EL(0), i);
4125 }
4126 }
4127}
4128
4129template<typename T, typename EL = UnderlyingTypeT<T>>
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004130static void scatter(RValue<Pointer<EL>> base, RValue<T> val, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment)
Nicolas Capens4ee53092022-02-05 01:53:12 -05004131{
4132 constexpr bool atomic = false;
4133 constexpr std::memory_order order = std::memory_order_relaxed;
4134
4135 Pointer<Byte> baseBytePtr = base;
4136
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004137 for(int i = 0; i < SIMD::Width; i++)
Nicolas Capens4ee53092022-02-05 01:53:12 -05004138 {
4139 If(Extract(mask, i) != 0)
4140 {
4141 auto offset = Extract(offsets, i);
4142 Store(Extract(val, i), Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
4143 }
4144 }
4145}
4146
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004147RValue<SIMD::Float> Gather(RValue<Pointer<Float>> base, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
Nicolas Capens157ba262019-12-10 17:49:14 -05004148{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004149 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004150 SIMD::Float result{};
Nicolas Capens4ee53092022-02-05 01:53:12 -05004151 gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
4152 return result;
Nicolas Capens157ba262019-12-10 17:49:14 -05004153}
4154
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004155RValue<SIMD::Int> Gather(RValue<Pointer<Int>> base, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
Nicolas Capens157ba262019-12-10 17:49:14 -05004156{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004157 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004158 SIMD::Int result{};
Nicolas Capens4ee53092022-02-05 01:53:12 -05004159 gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
4160 return result;
Nicolas Capens157ba262019-12-10 17:49:14 -05004161}
4162
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004163void Scatter(RValue<Pointer<Float>> base, RValue<SIMD::Float> val, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment)
Nicolas Capens157ba262019-12-10 17:49:14 -05004164{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004165 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004166 scatter(base, val, offsets, mask, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05004167}
4168
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004169void Scatter(RValue<Pointer<Int>> base, RValue<SIMD::Int> val, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment)
Nicolas Capens157ba262019-12-10 17:49:14 -05004170{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004171 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensd1116fa2022-06-29 10:39:18 -04004172 scatter<SIMD::Int>(base, val, offsets, mask, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05004173}
4174
Nicolas Capens157ba262019-12-10 17:49:14 -05004175RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4176{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004177 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004178 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004179 {
Ben Claytonce54c592020-02-07 11:30:51 +00004180 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004181 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004182 }
4183 else
4184 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004185 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004186 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004187 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004188 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004189 ::basicBlock->appendInst(ctlz);
4190
4191 return RValue<UInt>(V(result));
4192 }
4193}
4194
4195RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4196{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004197 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004198 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004199 {
Ben Claytonce54c592020-02-07 11:30:51 +00004200 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004201 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004202 }
4203 else
4204 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04004205 return Scalarize([isZeroUndef](auto a) { return Ctlz(a, isZeroUndef); }, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004206 }
4207}
4208
4209RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4210{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004211 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004212 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004213 {
Ben Claytonce54c592020-02-07 11:30:51 +00004214 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004215 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004216 }
4217 else
4218 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004219 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004220 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens442e25b2022-06-22 12:02:52 -04004221 auto cttz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
4222 cttz->addArg(x.value());
4223 ::basicBlock->appendInst(cttz);
Nicolas Capens157ba262019-12-10 17:49:14 -05004224
4225 return RValue<UInt>(V(result));
4226 }
4227}
4228
4229RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4230{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004231 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004232 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004233 {
Ben Claytonce54c592020-02-07 11:30:51 +00004234 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004235 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004236 }
4237 else
4238 {
Nicolas Capens442e25b2022-06-22 12:02:52 -04004239 return Scalarize([isZeroUndef](auto a) { return Cttz(a, isZeroUndef); }, x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004240 }
4241}
4242
Nicolas Capens4ee53092022-02-05 01:53:12 -05004243// TODO(b/148276653): Both atomicMin and atomicMax use a static (global) mutex that makes all min
4244// operations for a given T mutually exclusive, rather than only the ones on the value pointed to
4245// by ptr. Use a CAS loop, as is done for LLVMReactor's min/max atomic for Android.
4246// TODO(b/148207274): Or, move this down into Subzero as a CAS-based operation.
4247template<typename T>
4248static T atomicMin(T *ptr, T value)
4249{
4250 static std::mutex m;
4251
4252 std::lock_guard<std::mutex> lock(m);
4253 T origValue = *ptr;
4254 *ptr = std::min(origValue, value);
4255 return origValue;
4256}
4257
4258template<typename T>
4259static T atomicMax(T *ptr, T value)
4260{
4261 static std::mutex m;
4262
4263 std::lock_guard<std::mutex> lock(m);
4264 T origValue = *ptr;
4265 *ptr = std::max(origValue, value);
4266 return origValue;
4267}
4268
Antonio Maiorano370cba52019-12-31 11:36:07 -05004269RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4270{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004271 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004272 return Call(atomicMin<int32_t>, x, y);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004273}
4274
4275RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4276{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004277 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004278 return Call(atomicMin<uint32_t>, x, y);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004279}
4280
4281RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4282{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004283 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004284 return Call(atomicMax<int32_t>, x, y);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004285}
4286
4287RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4288{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004289 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens4ee53092022-02-05 01:53:12 -05004290 return Call(atomicMax<uint32_t>, x, y);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004291}
4292
Antonio Maioranoaae33732020-02-14 14:52:34 -05004293void EmitDebugLocation()
4294{
4295#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004296 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004297#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004298}
Ben Clayton713b8d32019-12-17 20:37:56 +00004299void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004300void FlushDebug() {}
4301
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004302namespace {
4303namespace coro {
4304
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004305// Instance data per generated coroutine
4306// This is the "handle" type used for Coroutine functions
4307// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4308struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004309{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004310 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004311 bool done = false; // the coroutine should stop at the next yield()
4312 bool terminated = false; // the coroutine has finished.
4313 bool inRoutine = false; // is the coroutine currently executing?
4314 marl::Scheduler::Fiber *mainFiber = nullptr;
4315 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004316 void *promisePtr = nullptr;
4317};
4318
4319CoroutineData *createCoroutineData()
4320{
4321 return new CoroutineData{};
4322}
4323
4324void destroyCoroutineData(CoroutineData *coroData)
4325{
4326 delete coroData;
4327}
4328
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004329// suspend() pauses execution of the coroutine, and resumes execution from the
4330// caller's call to await().
4331// Returns true if await() is called again, or false if coroutine_destroy()
4332// is called.
4333bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004334{
Ben Claytonc3466532020-03-24 11:54:05 +00004335 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4336 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4337 ASSERT(coroData->inRoutine);
4338 coroData->inRoutine = false;
4339 coroData->mainFiber->notify();
4340 while(!coroData->inRoutine)
4341 {
4342 coroData->routineFiber->wait();
4343 }
4344 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004345}
4346
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004347// resume() is called by await(), blocking until the coroutine calls yield()
4348// or the coroutine terminates.
4349void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004350{
Ben Claytonc3466532020-03-24 11:54:05 +00004351 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4352 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4353 ASSERT(!coroData->inRoutine);
4354 coroData->inRoutine = true;
4355 coroData->routineFiber->notify();
4356 while(coroData->inRoutine)
4357 {
4358 coroData->mainFiber->wait();
4359 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004360}
4361
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004362// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4363// until the coroutine ends, and deletes the coroutine data.
4364void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004365{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004366 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004367 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4368 ASSERT(!coroData->inRoutine);
4369 if(!coroData->terminated)
4370 {
4371 coroData->done = true;
4372 coroData->inRoutine = true;
4373 coroData->routineFiber->notify();
4374 while(!coroData->terminated)
4375 {
4376 coroData->mainFiber->wait();
4377 }
4378 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004379 if(coroData->useInternalScheduler)
4380 {
4381 ::getOrCreateScheduler().unbind();
4382 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004383 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004384}
4385
4386namespace detail {
4387thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4388} // namespace detail
4389
4390void setHandleParam(Nucleus::CoroutineHandle handle)
4391{
4392 ASSERT(!detail::coroHandle);
4393 detail::coroHandle = handle;
4394}
4395
4396Nucleus::CoroutineHandle getHandleParam()
4397{
4398 ASSERT(detail::coroHandle);
4399 auto handle = detail::coroHandle;
4400 detail::coroHandle = {};
4401 return handle;
4402}
4403
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004404bool isDone(Nucleus::CoroutineHandle handle)
4405{
4406 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004407 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004408}
4409
4410void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4411{
4412 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4413 coroData->promisePtr = promisePtr;
4414}
4415
4416void *getPromisePtr(Nucleus::CoroutineHandle handle)
4417{
4418 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4419 return coroData->promisePtr;
4420}
4421
4422} // namespace coro
4423} // namespace
4424
4425// Used to generate coroutines.
4426// Lifetime: from yield to acquireCoroutine
4427class CoroutineGenerator
4428{
4429public:
4430 CoroutineGenerator()
4431 {
4432 }
4433
4434 // Inserts instructions at the top of the current function to make it a coroutine.
4435 void generateCoroutineBegin()
4436 {
4437 // Begin building the main coroutine_begin() function.
4438 // We insert these instructions at the top of the entry node,
4439 // before existing reactor-generated instructions.
4440
4441 // CoroutineHandle coroutine_begin(<Arguments>)
4442 // {
4443 // this->handle = coro::getHandleParam();
4444 //
4445 // YieldType promise;
4446 // coro::setPromisePtr(handle, &promise); // For await
4447 //
4448 // ... <REACTOR CODE> ...
4449 //
4450
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004451 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004452 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004453
4454 // YieldType promise;
4455 // coro::setPromisePtr(handle, &promise); // For await
4456 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004457 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004458 }
4459
4460 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4461 void generateYield(Value *val)
4462 {
4463 // ... <REACTOR CODE> ...
4464 //
4465 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004466 // if (!coro::suspend(handle)) {
4467 // return false; // coroutine has been stopped by the caller.
4468 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004469 //
4470 // ... <REACTOR CODE> ...
4471
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004472 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004473 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004474
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004475 // if (!coro::suspend(handle)) {
4476 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4477 auto doneBlock = Nucleus::createBasicBlock();
4478 auto resumeBlock = Nucleus::createBasicBlock();
4479 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4480
4481 // return false; // coroutine has been stopped by the caller.
4482 ::basicBlock = doneBlock;
4483 Nucleus::createRetVoid(); // coroutine return value is ignored.
4484
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004485 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004486 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004487 }
4488
4489 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4490
4491 // Generates the await function for the current coroutine.
4492 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4493 static FunctionUniquePtr generateAwaitFunction()
4494 {
4495 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4496 // {
4497 // if (coro::isDone())
4498 // {
4499 // return false;
4500 // }
4501 // else // resume
4502 // {
4503 // YieldType* promise = coro::getPromisePtr(handle);
4504 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004505 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004506 // return true;
4507 // }
4508 // }
4509
4510 // Subzero doesn't support bool types (IceType_i1) as return type
4511 const Ice::Type ReturnType = Ice::IceType_i32;
4512 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4513 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4514
4515 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4516 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4517
4518 Ice::Variable *handle = awaitFunc->getArgs()[0];
4519 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4520
4521 auto doneBlock = awaitFunc->makeNode();
4522 {
4523 // return false;
4524 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4525 doneBlock->appendInst(ret);
4526 }
4527
4528 auto resumeBlock = awaitFunc->makeNode();
4529 {
4530 // YieldType* promise = coro::getPromisePtr(handle);
4531 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4532
4533 // *out = *promise;
4534 // Load promise value
4535 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4536 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4537 resumeBlock->appendInst(load);
4538 // Then store it in output param
4539 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4540 resumeBlock->appendInst(store);
4541
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004542 // coro::resume(handle);
4543 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004544
4545 // return true;
4546 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4547 resumeBlock->appendInst(ret);
4548 }
4549
4550 // if (coro::isDone())
4551 // {
4552 // <doneBlock>
4553 // }
4554 // else // resume
4555 // {
4556 // <resumeBlock>
4557 // }
4558 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004559 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004560 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4561 bb->appendInst(br);
4562
4563 return FunctionUniquePtr{ awaitFunc };
4564 }
4565
4566 // Generates the destroy function for the current coroutine.
4567 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4568 static FunctionUniquePtr generateDestroyFunction()
4569 {
4570 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4571 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004572 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004573 // return;
4574 // }
4575
4576 const Ice::Type ReturnType = Ice::IceType_void;
4577 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4578
4579 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4580 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4581
4582 Ice::Variable *handle = destroyFunc->getArgs()[0];
4583
4584 auto *bb = destroyFunc->getEntryNode();
4585
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004586 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4587 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004588
4589 // return;
4590 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4591 bb->appendInst(ret);
4592
4593 return FunctionUniquePtr{ destroyFunc };
4594 }
4595
4596private:
4597 Ice::Variable *handle{};
4598 Ice::Variable *promise{};
4599};
4600
4601static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4602{
4603 // This doubles up as our coroutine handle
4604 auto coroData = coro::createCoroutineData();
4605
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004606 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4607 if(coroData->useInternalScheduler)
4608 {
4609 ::getOrCreateScheduler().bind();
4610 }
4611
Ben Clayton76e9e532020-03-16 20:35:04 +00004612 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004613 // Store handle in TLS so that the coroutine can grab it right away, before
4614 // any fiber switch occurs.
4615 coro::setHandleParam(coroData);
4616
Ben Claytonc3466532020-03-24 11:54:05 +00004617 ASSERT(!coroData->routineFiber);
4618 coroData->routineFiber = marl::Scheduler::Fiber::current();
4619
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004620 beginFunc();
4621
Ben Claytonc3466532020-03-24 11:54:05 +00004622 ASSERT(coroData->inRoutine);
4623 coroData->done = true; // coroutine is done.
4624 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4625 coroData->inRoutine = false;
4626 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004627 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004628
Ben Claytonc3466532020-03-24 11:54:05 +00004629 ASSERT(!coroData->mainFiber);
4630 coroData->mainFiber = marl::Scheduler::Fiber::current();
4631
4632 // block until the first yield or coroutine end
4633 ASSERT(!coroData->inRoutine);
4634 coroData->inRoutine = true;
4635 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4636 while(coroData->inRoutine)
4637 {
4638 coroData->mainFiber->wait();
4639 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004640
4641 return coroData;
4642}
4643
4644void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4645{
4646 // Start by creating a regular function
4647 createFunction(yieldType, params);
4648
4649 // Save in case yield() is called
4650 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4651 ::coroYieldType = yieldType;
4652}
4653
4654void Nucleus::yield(Value *val)
4655{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004656 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004657 Variable::materializeAll();
4658
4659 // On first yield, we start generating coroutine functions
4660 if(!::coroGen)
4661 {
4662 ::coroGen = std::make_shared<CoroutineGenerator>();
4663 ::coroGen->generateCoroutineBegin();
4664 }
4665
4666 ASSERT(::coroGen);
4667 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004668}
4669
Ben Clayton713b8d32019-12-17 20:37:56 +00004670static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4671{
4672 return false;
4673}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004674
4675static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4676{
4677}
Nicolas Capens157ba262019-12-10 17:49:14 -05004678
Nicolas Capens79d4c6c2022-04-22 17:20:26 -04004679std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name)
Nicolas Capens157ba262019-12-10 17:49:14 -05004680{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004681 if(::coroGen)
4682 {
4683 // Finish generating coroutine functions
4684 {
4685 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004686 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004687 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004688
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004689 auto awaitFunc = ::coroGen->generateAwaitFunction();
4690 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004691
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004692 // At this point, we no longer need the CoroutineGenerator.
4693 ::coroGen.reset();
4694 ::coroYieldType = nullptr;
4695
4696 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
Nicolas Capens79d4c6c2022-04-22 17:20:26 -04004697 { name, "await", "destroy" });
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004698
4699 return routine;
4700 }
4701 else
4702 {
4703 {
4704 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004705 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004706 }
4707
4708 ::coroYieldType = nullptr;
4709
4710 // Not an actual coroutine (no yields), so return stubs for await and destroy
Nicolas Capens79d4c6c2022-04-22 17:20:26 -04004711 auto routine = rr::acquireRoutine({ ::function }, { name });
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004712
4713 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4714 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4715 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4716 return routine;
4717 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004718}
4719
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004720Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004721{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004722 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4723
4724 if(isCoroutine)
4725 {
4726 return rr::invokeCoroutineBegin(func);
4727 }
4728 else
4729 {
4730 // For regular routines, just invoke the begin func directly
4731 return func();
4732 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004733}
Nicolas Capens157ba262019-12-10 17:49:14 -05004734
Nicolas Capens44f94692022-06-20 23:15:46 -04004735SIMD::Int::Int(RValue<scalar::Int> rhs)
Nicolas Capens7e960682022-06-30 15:53:27 -04004736 : XYZW(this)
Nicolas Capens44f94692022-06-20 23:15:46 -04004737{
4738 RR_DEBUG_INFO_UPDATE_LOC();
4739 Value *vector = Nucleus::createBitCast(rhs.value(), SIMD::Int::type());
4740
4741 std::vector<int> swizzle = { 0 };
4742 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
4743
4744 storeValue(replicate);
4745}
4746
4747RValue<SIMD::Int> operator<<(RValue<SIMD::Int> lhs, unsigned char rhs)
4748{
4749 RR_DEBUG_INFO_UPDATE_LOC();
4750 if(emulateIntrinsics)
4751 {
4752 return Scalarize([rhs](auto x) { return x << rhs; }, lhs);
4753 }
4754 else
4755 {
4756 return RValue<SIMD::Int>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
4757 }
4758}
4759
4760RValue<SIMD::Int> operator>>(RValue<SIMD::Int> lhs, unsigned char rhs)
4761{
4762 RR_DEBUG_INFO_UPDATE_LOC();
4763 if(emulateIntrinsics)
4764 {
4765 return Scalarize([rhs](auto x) { return x >> rhs; }, lhs);
4766 }
4767 else
4768 {
4769 return RValue<SIMD::Int>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
4770 }
4771}
4772
4773RValue<SIMD::Int> CmpEQ(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
4774{
4775 RR_DEBUG_INFO_UPDATE_LOC();
4776 return RValue<SIMD::Int>(Nucleus::createICmpEQ(x.value(), y.value()));
4777}
4778
4779RValue<SIMD::Int> CmpLT(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
4780{
4781 RR_DEBUG_INFO_UPDATE_LOC();
4782 return RValue<SIMD::Int>(Nucleus::createICmpSLT(x.value(), y.value()));
4783}
4784
4785RValue<SIMD::Int> CmpLE(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
4786{
4787 RR_DEBUG_INFO_UPDATE_LOC();
4788 return RValue<SIMD::Int>(Nucleus::createICmpSLE(x.value(), y.value()));
4789}
4790
4791RValue<SIMD::Int> CmpNEQ(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
4792{
4793 RR_DEBUG_INFO_UPDATE_LOC();
4794 return RValue<SIMD::Int>(Nucleus::createICmpNE(x.value(), y.value()));
4795}
4796
4797RValue<SIMD::Int> CmpNLT(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
4798{
4799 RR_DEBUG_INFO_UPDATE_LOC();
4800 return RValue<SIMD::Int>(Nucleus::createICmpSGE(x.value(), y.value()));
4801}
4802
4803RValue<SIMD::Int> CmpNLE(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
4804{
4805 RR_DEBUG_INFO_UPDATE_LOC();
4806 return RValue<SIMD::Int>(Nucleus::createICmpSGT(x.value(), y.value()));
4807}
4808
4809RValue<SIMD::Int> Abs(RValue<SIMD::Int> x)
4810{
4811 // TODO: Optimize.
4812 auto negative = x >> 31;
4813 return (x ^ negative) - negative;
4814}
4815
4816RValue<SIMD::Int> Max(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
4817{
4818 RR_DEBUG_INFO_UPDATE_LOC();
4819 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
4820 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
4821 ::basicBlock->appendInst(cmp);
4822
4823 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
4824 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
4825 ::basicBlock->appendInst(select);
4826
4827 return RValue<SIMD::Int>(V(result));
4828}
4829
4830RValue<SIMD::Int> Min(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
4831{
4832 RR_DEBUG_INFO_UPDATE_LOC();
4833 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
4834 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
4835 ::basicBlock->appendInst(cmp);
4836
4837 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
4838 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
4839 ::basicBlock->appendInst(select);
4840
4841 return RValue<SIMD::Int>(V(result));
4842}
4843
4844RValue<SIMD::Int> RoundInt(RValue<SIMD::Float> cast)
4845{
4846 RR_DEBUG_INFO_UPDATE_LOC();
4847 if(emulateIntrinsics || CPUID::ARM)
4848 {
4849 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4850 return SIMD::Int((cast + SIMD::Float(0x00C00000)) - SIMD::Float(0x00C00000));
4851 }
4852 else
4853 {
4854 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
4855 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4856 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
4857 nearbyint->addArg(cast.value());
4858 ::basicBlock->appendInst(nearbyint);
4859
4860 return RValue<SIMD::Int>(V(result));
4861 }
4862}
4863
4864RValue<SIMD::Int> RoundIntClamped(RValue<SIMD::Float> cast)
4865{
4866 RR_DEBUG_INFO_UPDATE_LOC();
4867
4868 // cvtps2dq produces 0x80000000, a negative value, for input larger than
4869 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
4870 // saturate to 0x80000000.
4871 RValue<SIMD::Float> clamped = Min(cast, SIMD::Float(0x7FFFFF80));
4872
4873 if(emulateIntrinsics || CPUID::ARM)
4874 {
4875 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4876 return SIMD::Int((clamped + SIMD::Float(0x00C00000)) - SIMD::Float(0x00C00000));
4877 }
4878 else
4879 {
4880 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
4881 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4882 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
4883 nearbyint->addArg(clamped.value());
4884 ::basicBlock->appendInst(nearbyint);
4885
4886 return RValue<SIMD::Int>(V(result));
4887 }
4888}
4889
Nicolas Capens0ed3fa62022-06-22 16:48:07 -04004890RValue<Int4> Extract128(RValue<SIMD::Int> val, int i)
4891{
4892 ASSERT(SIMD::Width == 4);
4893 ASSERT(i == 0);
4894
4895 return As<Int4>(val);
4896}
4897
4898RValue<SIMD::Int> Insert128(RValue<SIMD::Int> val, RValue<Int4> element, int i)
4899{
4900 ASSERT(SIMD::Width == 4);
4901 ASSERT(i == 0);
4902
4903 return As<SIMD::Int>(element);
4904}
4905
Nicolas Capens3b0ad202022-06-02 15:02:31 -04004906Type *SIMD::Int::type()
4907{
4908 return T(Ice::IceType_v4i32);
4909}
4910
Nicolas Capens44f94692022-06-20 23:15:46 -04004911SIMD::UInt::UInt(RValue<SIMD::Float> cast)
Nicolas Capens7e960682022-06-30 15:53:27 -04004912 : XYZW(this)
Nicolas Capens44f94692022-06-20 23:15:46 -04004913{
4914 RR_DEBUG_INFO_UPDATE_LOC();
4915 // Smallest positive value representable in UInt, but not in Int
4916 const unsigned int ustart = 0x80000000u;
4917 const float ustartf = float(ustart);
4918
4919 // Check if the value can be represented as an Int
4920 SIMD::Int uiValue = CmpNLT(cast, SIMD::Float(ustartf));
4921 // If the value is too large, subtract ustart and re-add it after conversion.
4922 uiValue = (uiValue & As<SIMD::Int>(As<SIMD::UInt>(SIMD::Int(cast - SIMD::Float(ustartf))) + SIMD::UInt(ustart))) |
4923 // Otherwise, just convert normally
4924 (~uiValue & SIMD::Int(cast));
4925 // If the value is negative, store 0, otherwise store the result of the conversion
4926 storeValue((~(As<SIMD::Int>(cast) >> 31) & uiValue).value());
4927}
4928
4929SIMD::UInt::UInt(RValue<scalar::UInt> rhs)
Nicolas Capens7e960682022-06-30 15:53:27 -04004930 : XYZW(this)
Nicolas Capens44f94692022-06-20 23:15:46 -04004931{
4932 RR_DEBUG_INFO_UPDATE_LOC();
4933 Value *vector = Nucleus::createBitCast(rhs.value(), SIMD::UInt::type());
4934
4935 std::vector<int> swizzle = { 0 };
4936 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
4937
4938 storeValue(replicate);
4939}
4940
4941RValue<SIMD::UInt> operator<<(RValue<SIMD::UInt> lhs, unsigned char rhs)
4942{
4943 RR_DEBUG_INFO_UPDATE_LOC();
4944 if(emulateIntrinsics)
4945 {
4946 return Scalarize([rhs](auto x) { return x << rhs; }, lhs);
4947 }
4948 else
4949 {
4950 return RValue<SIMD::UInt>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
4951 }
4952}
4953
4954RValue<SIMD::UInt> operator>>(RValue<SIMD::UInt> lhs, unsigned char rhs)
4955{
4956 RR_DEBUG_INFO_UPDATE_LOC();
4957 if(emulateIntrinsics)
4958 {
4959 return Scalarize([rhs](auto x) { return x >> rhs; }, lhs);
4960 }
4961 else
4962 {
4963 return RValue<SIMD::UInt>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
4964 }
4965}
4966
4967RValue<SIMD::UInt> CmpEQ(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
4968{
4969 RR_DEBUG_INFO_UPDATE_LOC();
4970 return RValue<SIMD::UInt>(Nucleus::createICmpEQ(x.value(), y.value()));
4971}
4972
4973RValue<SIMD::UInt> CmpLT(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
4974{
4975 RR_DEBUG_INFO_UPDATE_LOC();
4976 return RValue<SIMD::UInt>(Nucleus::createICmpULT(x.value(), y.value()));
4977}
4978
4979RValue<SIMD::UInt> CmpLE(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
4980{
4981 RR_DEBUG_INFO_UPDATE_LOC();
4982 return RValue<SIMD::UInt>(Nucleus::createICmpULE(x.value(), y.value()));
4983}
4984
4985RValue<SIMD::UInt> CmpNEQ(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
4986{
4987 RR_DEBUG_INFO_UPDATE_LOC();
4988 return RValue<SIMD::UInt>(Nucleus::createICmpNE(x.value(), y.value()));
4989}
4990
4991RValue<SIMD::UInt> CmpNLT(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
4992{
4993 RR_DEBUG_INFO_UPDATE_LOC();
4994 return RValue<SIMD::UInt>(Nucleus::createICmpUGE(x.value(), y.value()));
4995}
4996
4997RValue<SIMD::UInt> CmpNLE(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
4998{
4999 RR_DEBUG_INFO_UPDATE_LOC();
5000 return RValue<SIMD::UInt>(Nucleus::createICmpUGT(x.value(), y.value()));
5001}
5002
5003RValue<SIMD::UInt> Max(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
5004{
5005 RR_DEBUG_INFO_UPDATE_LOC();
5006 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5007 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
5008 ::basicBlock->appendInst(cmp);
5009
5010 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5011 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
5012 ::basicBlock->appendInst(select);
5013
5014 return RValue<SIMD::UInt>(V(result));
5015}
5016
5017RValue<SIMD::UInt> Min(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
5018{
5019 RR_DEBUG_INFO_UPDATE_LOC();
5020 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5021 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
5022 ::basicBlock->appendInst(cmp);
5023
5024 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5025 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
5026 ::basicBlock->appendInst(select);
5027
5028 return RValue<SIMD::UInt>(V(result));
5029}
5030
Nicolas Capens0ed3fa62022-06-22 16:48:07 -04005031RValue<UInt4> Extract128(RValue<SIMD::UInt> val, int i)
5032{
5033 ASSERT(SIMD::Width == 4);
5034 ASSERT(i == 0);
5035
5036 return As<UInt4>(val);
5037}
5038
5039RValue<SIMD::UInt> Insert128(RValue<SIMD::UInt> val, RValue<UInt4> element, int i)
5040{
5041 ASSERT(SIMD::Width == 4);
5042 ASSERT(i == 0);
5043
5044 return As<SIMD::UInt>(element);
5045}
5046
Nicolas Capens44f94692022-06-20 23:15:46 -04005047Type *SIMD::UInt::type()
5048{
5049 return T(Ice::IceType_v4i32);
5050}
5051
5052SIMD::Float::Float(RValue<scalar::Float> rhs)
Nicolas Capens7e960682022-06-30 15:53:27 -04005053 : XYZW(this)
Nicolas Capens44f94692022-06-20 23:15:46 -04005054{
5055 RR_DEBUG_INFO_UPDATE_LOC();
5056 Value *vector = Nucleus::createBitCast(rhs.value(), SIMD::Float::type());
5057
5058 std::vector<int> swizzle = { 0 };
5059 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
5060
5061 storeValue(replicate);
5062}
5063
5064RValue<SIMD::Float> operator%(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
5065{
5066 return ScalarizeCall(fmodf, lhs, rhs);
5067}
5068
5069RValue<SIMD::Float> MulAdd(RValue<SIMD::Float> x, RValue<SIMD::Float> y, RValue<SIMD::Float> z)
5070{
5071 // TODO(b/214591655): Use FMA when available.
5072 return x * y + z;
5073}
5074
5075RValue<SIMD::Float> FMA(RValue<SIMD::Float> x, RValue<SIMD::Float> y, RValue<SIMD::Float> z)
5076{
5077 // TODO(b/214591655): Use FMA instructions when available.
5078 return ScalarizeCall(fmaf, x, y, z);
5079}
5080
5081RValue<SIMD::Float> Abs(RValue<SIMD::Float> x)
5082{
5083 // TODO: Optimize.
5084 Value *vector = Nucleus::createBitCast(x.value(), SIMD::Int::type());
5085 std::vector<int64_t> constantVector = { 0x7FFFFFFF };
5086 Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, SIMD::Int::type()));
5087
5088 return As<SIMD::Float>(result);
5089}
5090
5091RValue<SIMD::Float> Max(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5092{
5093 RR_DEBUG_INFO_UPDATE_LOC();
5094 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5095 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
5096 ::basicBlock->appendInst(cmp);
5097
5098 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
5099 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
5100 ::basicBlock->appendInst(select);
5101
5102 return RValue<SIMD::Float>(V(result));
5103}
5104
5105RValue<SIMD::Float> Min(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5106{
5107 RR_DEBUG_INFO_UPDATE_LOC();
5108 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5109 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
5110 ::basicBlock->appendInst(cmp);
5111
5112 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
5113 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
5114 ::basicBlock->appendInst(select);
5115
5116 return RValue<SIMD::Float>(V(result));
5117}
5118
5119RValue<SIMD::Float> Sqrt(RValue<SIMD::Float> x)
5120{
5121 RR_DEBUG_INFO_UPDATE_LOC();
5122 if(emulateIntrinsics || CPUID::ARM)
5123 {
5124 return Scalarize([](auto a) { return Sqrt(a); }, x);
5125 }
5126 else
5127 {
5128 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
5129 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
5130 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
5131 sqrt->addArg(x.value());
5132 ::basicBlock->appendInst(sqrt);
5133
5134 return RValue<SIMD::Float>(V(result));
5135 }
5136}
5137
5138RValue<SIMD::Int> CmpEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5139{
5140 RR_DEBUG_INFO_UPDATE_LOC();
5141 return RValue<SIMD::Int>(Nucleus::createFCmpOEQ(x.value(), y.value()));
5142}
5143
5144RValue<SIMD::Int> CmpLT(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5145{
5146 RR_DEBUG_INFO_UPDATE_LOC();
5147 return RValue<SIMD::Int>(Nucleus::createFCmpOLT(x.value(), y.value()));
5148}
5149
5150RValue<SIMD::Int> CmpLE(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5151{
5152 RR_DEBUG_INFO_UPDATE_LOC();
5153 return RValue<SIMD::Int>(Nucleus::createFCmpOLE(x.value(), y.value()));
5154}
5155
5156RValue<SIMD::Int> CmpNEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5157{
5158 RR_DEBUG_INFO_UPDATE_LOC();
5159 return RValue<SIMD::Int>(Nucleus::createFCmpONE(x.value(), y.value()));
5160}
5161
5162RValue<SIMD::Int> CmpNLT(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5163{
5164 RR_DEBUG_INFO_UPDATE_LOC();
5165 return RValue<SIMD::Int>(Nucleus::createFCmpOGE(x.value(), y.value()));
5166}
5167
5168RValue<SIMD::Int> CmpNLE(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5169{
5170 RR_DEBUG_INFO_UPDATE_LOC();
5171 return RValue<SIMD::Int>(Nucleus::createFCmpOGT(x.value(), y.value()));
5172}
5173
5174RValue<SIMD::Int> CmpUEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5175{
5176 RR_DEBUG_INFO_UPDATE_LOC();
5177 return RValue<SIMD::Int>(Nucleus::createFCmpUEQ(x.value(), y.value()));
5178}
5179
5180RValue<SIMD::Int> CmpULT(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5181{
5182 RR_DEBUG_INFO_UPDATE_LOC();
5183 return RValue<SIMD::Int>(Nucleus::createFCmpULT(x.value(), y.value()));
5184}
5185
5186RValue<SIMD::Int> CmpULE(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5187{
5188 RR_DEBUG_INFO_UPDATE_LOC();
5189 return RValue<SIMD::Int>(Nucleus::createFCmpULE(x.value(), y.value()));
5190}
5191
5192RValue<SIMD::Int> CmpUNEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5193{
5194 RR_DEBUG_INFO_UPDATE_LOC();
5195 return RValue<SIMD::Int>(Nucleus::createFCmpUNE(x.value(), y.value()));
5196}
5197
5198RValue<SIMD::Int> CmpUNLT(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5199{
5200 RR_DEBUG_INFO_UPDATE_LOC();
5201 return RValue<SIMD::Int>(Nucleus::createFCmpUGE(x.value(), y.value()));
5202}
5203
5204RValue<SIMD::Int> CmpUNLE(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
5205{
5206 RR_DEBUG_INFO_UPDATE_LOC();
5207 return RValue<SIMD::Int>(Nucleus::createFCmpUGT(x.value(), y.value()));
5208}
5209
5210RValue<SIMD::Float> Round(RValue<SIMD::Float> x)
5211{
5212 RR_DEBUG_INFO_UPDATE_LOC();
5213 if(emulateIntrinsics || CPUID::ARM)
5214 {
5215 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
5216 return (x + SIMD::Float(0x00C00000)) - SIMD::Float(0x00C00000);
5217 }
5218 else if(CPUID::SSE4_1)
5219 {
5220 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
5221 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
5222 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
5223 round->addArg(x.value());
5224 round->addArg(::context->getConstantInt32(0));
5225 ::basicBlock->appendInst(round);
5226
5227 return RValue<SIMD::Float>(V(result));
5228 }
5229 else
5230 {
5231 return SIMD::Float(RoundInt(x));
5232 }
5233}
5234
5235RValue<SIMD::Float> Trunc(RValue<SIMD::Float> x)
5236{
5237 RR_DEBUG_INFO_UPDATE_LOC();
5238 if(CPUID::SSE4_1)
5239 {
5240 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
5241 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
5242 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
5243 round->addArg(x.value());
5244 round->addArg(::context->getConstantInt32(3));
5245 ::basicBlock->appendInst(round);
5246
5247 return RValue<SIMD::Float>(V(result));
5248 }
5249 else
5250 {
5251 return SIMD::Float(SIMD::Int(x));
5252 }
5253}
5254
5255RValue<SIMD::Float> Frac(RValue<SIMD::Float> x)
5256{
5257 RR_DEBUG_INFO_UPDATE_LOC();
5258 SIMD::Float frc;
5259
5260 if(CPUID::SSE4_1)
5261 {
5262 frc = x - Floor(x);
5263 }
5264 else
5265 {
5266 frc = x - SIMD::Float(SIMD::Int(x)); // Signed fractional part.
5267
5268 frc += As<SIMD::Float>(As<SIMD::Int>(CmpNLE(SIMD::Float(0.0f), frc)) & As<SIMD::Int>(SIMD::Float(1.0f))); // Add 1.0 if negative.
5269 }
5270
5271 // x - floor(x) can be 1.0 for very small negative x.
5272 // Clamp against the value just below 1.0.
5273 return Min(frc, As<SIMD::Float>(SIMD::Int(0x3F7FFFFF)));
5274}
5275
5276RValue<SIMD::Float> Floor(RValue<SIMD::Float> x)
5277{
5278 RR_DEBUG_INFO_UPDATE_LOC();
5279 if(CPUID::SSE4_1)
5280 {
5281 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
5282 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
5283 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
5284 round->addArg(x.value());
5285 round->addArg(::context->getConstantInt32(1));
5286 ::basicBlock->appendInst(round);
5287
5288 return RValue<SIMD::Float>(V(result));
5289 }
5290 else
5291 {
5292 return x - Frac(x);
5293 }
5294}
5295
5296RValue<SIMD::Float> Ceil(RValue<SIMD::Float> x)
5297{
5298 RR_DEBUG_INFO_UPDATE_LOC();
5299 if(CPUID::SSE4_1)
5300 {
5301 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
5302 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
5303 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
5304 round->addArg(x.value());
5305 round->addArg(::context->getConstantInt32(2));
5306 ::basicBlock->appendInst(round);
5307
5308 return RValue<SIMD::Float>(V(result));
5309 }
5310 else
5311 {
5312 return -Floor(-x);
5313 }
5314}
5315
Nicolas Capens0ed3fa62022-06-22 16:48:07 -04005316RValue<Float4> Extract128(RValue<SIMD::Float> val, int i)
5317{
5318 ASSERT(SIMD::Width == 4);
5319 ASSERT(i == 0);
5320
5321 return As<Float4>(val);
5322}
5323
5324RValue<SIMD::Float> Insert128(RValue<SIMD::Float> val, RValue<Float4> element, int i)
5325{
5326 ASSERT(SIMD::Width == 4);
5327 ASSERT(i == 0);
5328
5329 return As<SIMD::Float>(element);
5330}
5331
Nicolas Capens44f94692022-06-20 23:15:46 -04005332Type *SIMD::Float::type()
5333{
5334 return T(Ice::IceType_v4f32);
5335}
5336
Nicolas Capens157ba262019-12-10 17:49:14 -05005337} // namespace rr