blob: ce4475ee664a5648cae7bf1e9f1232e3352902be [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano9c14bda2020-09-18 16:33:36 -040016#include "EmulatedIntrinsics.hpp"
17#include "OptimalIntrinsics.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050018#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000019#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050020#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040021
Nicolas Capens1a3ce872018-10-10 10:42:36 -040022#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000023#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040024
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040026#include "src/IceCfgNode.h"
27#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceELFStreamer.h"
29#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040030#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000031#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040032
Ben Clayton713b8d32019-12-17 20:37:56 +000033#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040034#include "llvm/Support/FileSystem.h"
35#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040036
Antonio Maiorano8bce0672020-02-28 13:13:45 -050037#include "marl/event.h"
38
Nicolas Capens6a990f82018-07-06 15:54:07 -040039#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000040# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040041#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040042
Nicolas Capensbd65da92017-01-05 16:31:06 -050043#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000044# ifndef WIN32_LEAN_AND_MEAN
45# define WIN32_LEAN_AND_MEAN
46# endif // !WIN32_LEAN_AND_MEAN
47# ifndef NOMINMAX
48# define NOMINMAX
49# endif // !NOMINMAX
50# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050051#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040052
Ben Clayton683bad82020-02-10 23:57:09 +000053#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040054#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000055#include <limits>
56#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040057
Antonio Maiorano02a39532020-01-21 15:15:34 -050058// Subzero utility functions
59// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050060namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050061namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050062
63Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
64{
65 uint32_t sequenceNumber = 0;
66 auto function = Ice::Cfg::create(context, sequenceNumber).release();
67
68 Ice::CfgLocalAllocatorScope allocScope{ function };
69
70 for(auto type : paramTypes)
71 {
72 Ice::Variable *arg = function->makeVariable(type);
73 function->addArg(arg);
74 }
75
76 Ice::CfgNode *node = function->makeNode();
77 function->setEntryNode(node);
78
79 return function;
80}
81
82Ice::Type getPointerType(Ice::Type elementType)
83{
84 if(sizeof(void *) == 8)
85 {
86 return Ice::IceType_i64;
87 }
88 else
89 {
90 return Ice::IceType_i32;
91 }
92}
93
94Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
95{
96 int typeSize = Ice::typeWidthInBytes(type);
97 int totalSize = typeSize * (arraySize ? arraySize : 1);
98
99 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
100 auto address = function->makeVariable(getPointerType(type));
101 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
102 function->getEntryNode()->getInsts().push_front(alloca);
103
104 return address;
105}
106
107Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500108{
109 if(sizeof(void *) == 8)
110 {
111 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
112 }
113 else
114 {
115 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
116 }
117}
118
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400119// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
120Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500121
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400122// Wrapper for calls on C functions with Ice types
123Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
124{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500125 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400126
127 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
128 // then truncate result to bool.
129 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
130 const bool returningBool = (retTy == Ice::IceType_i1);
131 if(returningBool)
132 {
133 ret = function->makeVariable(Ice::IceType_i32);
134 }
135 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500136 {
137 ret = function->makeVariable(retTy);
138 }
139
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400140 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500141 for(auto arg : iceArgs)
142 {
143 call->addArg(arg);
144 }
145
146 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400147
148 if(returningBool)
149 {
150 // Truncate result to bool so that if any (lsb) bits were set, result will be true
151 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
152 }
153
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500154 return ret;
155}
156
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400157Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
158{
159 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
160 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
161}
162
Antonio Maiorano62427e02020-02-13 09:18:05 -0500163// Wrapper for calls on C functions with Ice types
164template<typename Return, typename... CArgs, typename... RArgs>
165Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
166{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400167 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
168
Nicolas Capens519cf222020-05-08 15:27:19 -0400169 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500170 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500171 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500172}
173
Antonio Maiorano02a39532020-01-21 15:15:34 -0500174// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500175Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500176{
177 Ice::Variable *result = function->makeVariable(v->getType());
178 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
179 basicBlock->appendInst(cast);
180 return result;
181}
182
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400183Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
184{
185 Ice::Variable *to = function->makeVariable(toType);
186 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
187 basicBlock->appendInst(cast);
188 return to;
189}
190
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500191Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500192{
193 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
194 // absolute address. We circumvent this by casting to a non-const variable, and loading
195 // from that.
196 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
197 {
198 ptr = sz::createUnconstCast(function, basicBlock, cptr);
199 }
200
201 Ice::Variable *result = function->makeVariable(type);
202 auto load = Ice::InstLoad::create(function, result, ptr, align);
203 basicBlock->appendInst(load);
204
205 return result;
206}
207
208} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500209} // namespace
210
Ben Clayton713b8d32019-12-17 20:37:56 +0000211namespace rr {
212class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500213class CoroutineGenerator;
214} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500215
216namespace {
217
218// Default configuration settings. Must be accessed under mutex lock.
219std::mutex defaultConfigLock;
220rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000221{
Nicolas Capens157ba262019-12-10 17:49:14 -0500222 // This uses a static in a function to avoid the cost of a global static
223 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
224 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000225 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500226 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000227}
228
Nicolas Capens157ba262019-12-10 17:49:14 -0500229Ice::GlobalContext *context = nullptr;
230Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400231Ice::CfgNode *entryBlock = nullptr;
232Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500233Ice::CfgNode *basicBlock = nullptr;
234Ice::CfgLocalAllocatorScope *allocator = nullptr;
235rr::ELFMemoryStreamer *routine = nullptr;
236
237std::mutex codegenMutex;
238
239Ice::ELFFileStreamer *elfFile = nullptr;
240Ice::Fdstream *out = nullptr;
241
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500242// Coroutine globals
243rr::Type *coroYieldType = nullptr;
244std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500245marl::Scheduler &getOrCreateScheduler()
246{
247 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100248 marl::Scheduler::Config cfg;
249 cfg.setWorkerThreadCount(8);
250 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500251 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500252
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500253 return *scheduler;
254}
Nicolas Capens157ba262019-12-10 17:49:14 -0500255} // Anonymous namespace
256
257namespace {
258
259#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000260# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500261#endif
262
Ben Clayton713b8d32019-12-17 20:37:56 +0000263#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
264# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500265#endif
266
Antonio Maiorano370cba52019-12-31 11:36:07 -0500267Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400268{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500269 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100270 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500271 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000272 case rr::Optimization::Level::None: return Ice::Opt_m1;
273 case rr::Optimization::Level::Less: return Ice::Opt_m1;
274 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500275 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
276 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100277 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500278 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400279}
280
Antonio Maiorano370cba52019-12-31 11:36:07 -0500281Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
282{
283 switch(memoryOrder)
284 {
285 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
286 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
287 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
288 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
289 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
290 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
291 }
292 return Ice::Intrinsics::MemoryOrderInvalid;
293}
294
Nicolas Capens157ba262019-12-10 17:49:14 -0500295class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500296{
Nicolas Capens157ba262019-12-10 17:49:14 -0500297public:
298 const static bool ARM;
299 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400300
Nicolas Capens157ba262019-12-10 17:49:14 -0500301private:
302 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100303 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000304#if defined(__i386__) || defined(__x86_64__)
305# if defined(_WIN32)
306 __cpuid(registers, info);
307# else
308 __asm volatile("cpuid"
309 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
310 : "a"(info));
311# endif
312#else
313 registers[0] = 0;
314 registers[1] = 0;
315 registers[2] = 0;
316 registers[3] = 0;
317#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100318 }
319
Nicolas Capens157ba262019-12-10 17:49:14 -0500320 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500321 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000322#if defined(__arm__) || defined(__aarch64__)
323 return true;
324#elif defined(__i386__) || defined(__x86_64__)
325 return false;
326#elif defined(__mips__)
327 return false;
328#else
329# error "Unknown architecture"
330#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500331 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500332
Nicolas Capens157ba262019-12-10 17:49:14 -0500333 static bool detectSSE4_1()
334 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000335#if defined(__i386__) || defined(__x86_64__)
336 int registers[4];
337 cpuid(registers, 1);
338 return (registers[2] & 0x00080000) != 0;
339#else
340 return false;
341#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500342 }
343};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500344
Nicolas Capens157ba262019-12-10 17:49:14 -0500345const bool CPUID::ARM = CPUID::detectARM();
346const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
347const bool emulateIntrinsics = false;
348const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400349
Nicolas Capens157ba262019-12-10 17:49:14 -0500350constexpr bool subzeroDumpEnabled = false;
351constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500352
353#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500354static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
355static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500356#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500357
358} // anonymous namespace
359
360namespace rr {
361
Antonio Maioranoab210f92019-12-13 16:26:24 -0500362std::string BackendName()
363{
364 return "Subzero";
365}
366
Ben Clayton713b8d32019-12-17 20:37:56 +0000367const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500368 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500369};
370
371enum EmulatedType
372{
373 EmulatedShift = 16,
374 EmulatedV2 = 2 << EmulatedShift,
375 EmulatedV4 = 4 << EmulatedShift,
376 EmulatedV8 = 8 << EmulatedShift,
377 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
378
379 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
380 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
381 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000382 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
383 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500384 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
385};
386
Ben Clayton713b8d32019-12-17 20:37:56 +0000387class Value : public Ice::Operand
388{};
389class SwitchCases : public Ice::InstSwitch
390{};
391class BasicBlock : public Ice::CfgNode
392{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500393
394Ice::Type T(Type *t)
395{
396 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
397 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500398}
399
Nicolas Capens157ba262019-12-10 17:49:14 -0500400Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400401{
Ben Clayton713b8d32019-12-17 20:37:56 +0000402 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500403}
404
405Type *T(EmulatedType t)
406{
Ben Clayton713b8d32019-12-17 20:37:56 +0000407 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500408}
409
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500410std::vector<Ice::Type> T(const std::vector<Type *> &types)
411{
412 std::vector<Ice::Type> result;
413 result.reserve(types.size());
414 for(auto &t : types)
415 {
416 result.push_back(T(t));
417 }
418 return result;
419}
420
Nicolas Capens157ba262019-12-10 17:49:14 -0500421Value *V(Ice::Operand *v)
422{
Ben Clayton713b8d32019-12-17 20:37:56 +0000423 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500424}
425
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500426Ice::Operand *V(Value *v)
427{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500428 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500429}
430
Antonio Maiorano62427e02020-02-13 09:18:05 -0500431std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
432{
433 std::vector<Ice::Operand *> result;
434 result.reserve(values.size());
435 for(auto &v : values)
436 {
437 result.push_back(V(v));
438 }
439 return result;
440}
441
Nicolas Capens157ba262019-12-10 17:49:14 -0500442BasicBlock *B(Ice::CfgNode *b)
443{
Ben Clayton713b8d32019-12-17 20:37:56 +0000444 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500445}
446
447static size_t typeSize(Type *type)
448{
449 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400450 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500451 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800452 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000453 case Type_v2i32: return 8;
454 case Type_v4i16: return 8;
455 case Type_v2i16: return 4;
456 case Type_v8i8: return 8;
457 case Type_v4i8: return 4;
458 case Type_v2f32: return 8;
459 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500460 }
461 }
462
463 return Ice::typeWidthInBytes(T(type));
464}
465
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400466static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500467{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400468 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500469 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
470 {
471 Nucleus::createRetVoid();
472 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400473
474 // Connect the entry block to the top of the initial basic block
475 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
476 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500477}
478
Ben Clayton713b8d32019-12-17 20:37:56 +0000479using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
480using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500481
482inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
483{
Ben Clayton713b8d32019-12-17 20:37:56 +0000484 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500485}
486
487inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
488{
489 return &sectionHeader(elfHeader)[index];
490}
491
492static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
493{
494 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
495
496 uint32_t index = relocation.getSymbol();
497 int table = relocationTable.sh_link;
498 void *symbolValue = nullptr;
499
500 if(index != SHN_UNDEF)
501 {
502 if(table == SHN_UNDEF) return nullptr;
503 const SectionHeader *symbolTable = elfSection(elfHeader, table);
504
505 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
506 if(index >= symtab_entries)
507 {
508 ASSERT(index < symtab_entries && "Symbol Index out of range");
509 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800510 }
511
Nicolas Capens157ba262019-12-10 17:49:14 -0500512 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000513 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500514 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800515
Nicolas Capens157ba262019-12-10 17:49:14 -0500516 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400517 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500518 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000519 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400520 }
521 else
522 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500523 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400524 }
Nicolas Capens66478362016-10-13 15:36:36 -0400525 }
526
Nicolas Capens157ba262019-12-10 17:49:14 -0500527 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000528 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500529
530 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400531 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400532 switch(relocation.getType())
533 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000534 case R_ARM_NONE:
535 // No relocation
536 break;
537 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500538 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000539 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500540 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
541 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
542 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400543 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000544 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500545 {
546 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
547 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
548 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400549 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000550 default:
551 ASSERT(false && "Unsupported relocation type");
552 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400553 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500554 }
555 else
556 {
557 switch(relocation.getType())
558 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000559 case R_386_NONE:
560 // No relocation
561 break;
562 case R_386_32:
563 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
564 break;
565 case R_386_PC32:
566 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
567 break;
568 default:
569 ASSERT(false && "Unsupported relocation type");
570 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500571 }
Nicolas Capens66478362016-10-13 15:36:36 -0400572 }
573
Nicolas Capens157ba262019-12-10 17:49:14 -0500574 return symbolValue;
575}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400576
Nicolas Capens157ba262019-12-10 17:49:14 -0500577static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
578{
579 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
580
581 uint32_t index = relocation.getSymbol();
582 int table = relocationTable.sh_link;
583 void *symbolValue = nullptr;
584
585 if(index != SHN_UNDEF)
586 {
587 if(table == SHN_UNDEF) return nullptr;
588 const SectionHeader *symbolTable = elfSection(elfHeader, table);
589
590 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
591 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400592 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500593 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400594 return nullptr;
595 }
596
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000598 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500599 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400600
Nicolas Capens157ba262019-12-10 17:49:14 -0500601 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400602 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000604 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500605 }
606 else
607 {
608 return nullptr;
609 }
610 }
Nicolas Capens66478362016-10-13 15:36:36 -0400611
Nicolas Capens157ba262019-12-10 17:49:14 -0500612 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000613 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
614 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400615
Nicolas Capens157ba262019-12-10 17:49:14 -0500616 switch(relocation.getType())
617 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000618 case R_X86_64_NONE:
619 // No relocation
620 break;
621 case R_X86_64_64:
622 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
623 break;
624 case R_X86_64_PC32:
625 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
626 break;
627 case R_X86_64_32S:
628 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
629 break;
630 default:
631 ASSERT(false && "Unsupported relocation type");
632 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500633 }
634
635 return symbolValue;
636}
637
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400638struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500639{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400640 const void *entry;
641 size_t codeSize = 0;
642};
643
644std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
645{
646 ASSERT(functionNames.size() > 0);
647 std::vector<EntryPoint> entryPoints(functionNames.size());
648
Ben Clayton713b8d32019-12-17 20:37:56 +0000649 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500650
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400651 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500652 if(!elfHeader->checkMagic())
653 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400654 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500655 }
656
657 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000658 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
659#if defined(__i386__)
660 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
661#elif defined(__x86_64__)
662 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
663#elif defined(__arm__)
664 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
665#elif defined(__aarch64__)
666 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
667#elif defined(__mips__)
668 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
669#else
670# error "Unsupported platform"
671#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500672
Ben Clayton713b8d32019-12-17 20:37:56 +0000673 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500674
675 for(int i = 0; i < elfHeader->e_shnum; i++)
676 {
677 if(sectionHeader[i].sh_type == SHT_PROGBITS)
678 {
679 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
680 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400681 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500682 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400683 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500684
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400685 for(size_t j = 0; j < functionNames.size(); ++j)
686 {
687 if(strstr(sectionName, functionNames[j]) != nullptr)
688 {
689 return j;
690 }
691 }
692
693 UNREACHABLE("Failed to find executable section that matches input function names");
694 return static_cast<size_t>(-1);
695 };
696
697 size_t index = findSectionNameEntryIndex();
698 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
699 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400700 }
701 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500702 else if(sectionHeader[i].sh_type == SHT_REL)
703 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000704 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400705
Nicolas Capens157ba262019-12-10 17:49:14 -0500706 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
707 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000708 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500709 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
710 }
711 }
712 else if(sectionHeader[i].sh_type == SHT_RELA)
713 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000714 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500715
716 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
717 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000718 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500719 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
720 }
721 }
722 }
723
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400724 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500725}
726
727template<typename T>
728struct ExecutableAllocator
729{
730 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000731 template<class U>
732 ExecutableAllocator(const ExecutableAllocator<U> &other)
733 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500734
735 using value_type = T;
736 using size_type = std::size_t;
737
738 T *allocate(size_type n)
739 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000740 return (T *)allocateMemoryPages(
741 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500742 }
743
744 void deallocate(T *p, size_type n)
745 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800746 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500747 }
748};
749
750class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
751{
752 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
753 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
754
755public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000756 ELFMemoryStreamer()
757 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500758 {
759 position = 0;
760 buffer.reserve(0x1000);
761 }
762
763 ~ELFMemoryStreamer() override
764 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500765 }
766
767 void write8(uint8_t Value) override
768 {
769 if(position == (uint64_t)buffer.size())
770 {
771 buffer.push_back(Value);
772 position++;
773 }
774 else if(position < (uint64_t)buffer.size())
775 {
776 buffer[position] = Value;
777 position++;
778 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000779 else
780 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500781 }
782
783 void writeBytes(llvm::StringRef Bytes) override
784 {
785 std::size_t oldSize = buffer.size();
786 buffer.resize(oldSize + Bytes.size());
787 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
788 position += Bytes.size();
789 }
790
791 uint64_t tell() const override { return position; }
792
793 void seek(uint64_t Off) override { position = Off; }
794
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400795 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500796 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400797 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500798
799#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500800 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
801#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400802 for(auto &entryPoint : entryPoints)
803 {
804 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
805 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500806#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500807
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400808 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400809 }
810
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500811 void finalize()
812 {
813 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
814
815 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
816 }
817
Ben Clayton713b8d32019-12-17 20:37:56 +0000818 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400819 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500820 ASSERT(func);
821 funcs[index] = func;
822 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400823
Nicolas Capens157ba262019-12-10 17:49:14 -0500824 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400825 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500826 ASSERT(funcs[index]);
827 return funcs[index];
828 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400829
Antonio Maiorano02a39532020-01-21 15:15:34 -0500830 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500831 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500832 // TODO(b/148086935): Replace with a buffer allocator.
833 size_t space = size + alignment;
834 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
835 void *ptr = buf.get();
836 void *alignedPtr = std::align(alignment, size, ptr, space);
837 ASSERT(alignedPtr);
838 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500839 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500840 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500841 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400842
Nicolas Capens157ba262019-12-10 17:49:14 -0500843private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000844 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500845 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
846 std::size_t position;
847 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500848};
849
Antonio Maiorano62427e02020-02-13 09:18:05 -0500850#ifdef ENABLE_RR_PRINT
851void VPrintf(const std::vector<Value *> &vals)
852{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400853 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500854}
855#endif // ENABLE_RR_PRINT
856
Nicolas Capens157ba262019-12-10 17:49:14 -0500857Nucleus::Nucleus()
858{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400859 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500860
861 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
862 Ice::ClFlags::getParsedClFlags(Flags);
863
Ben Clayton713b8d32019-12-17 20:37:56 +0000864#if defined(__arm__)
865 Flags.setTargetArch(Ice::Target_ARM32);
866 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
867#elif defined(__mips__)
868 Flags.setTargetArch(Ice::Target_MIPS32);
869 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
870#else // x86
871 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
872 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
873#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500874 Flags.setOutFileType(Ice::FT_Elf);
875 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
876 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
877 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
878 Flags.setDisableHybridAssembly(true);
879
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500880 // Emit functions into separate sections in the ELF so we can find them by name
881 Flags.setFunctionSections(true);
882
Nicolas Capens157ba262019-12-10 17:49:14 -0500883 static llvm::raw_os_ostream cout(std::cout);
884 static llvm::raw_os_ostream cerr(std::cerr);
885
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500886 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500887 {
888 // Decorate text asm with liveness info
889 Flags.setDecorateAsm(true);
890 }
891
Ben Clayton713b8d32019-12-17 20:37:56 +0000892 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500893 {
894 std::error_code errorCode;
895 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
896 ::elfFile = new Ice::ELFFileStreamer(*out);
897 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
898 }
899 else
900 {
901 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
902 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
903 ::routine = elfMemory;
904 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400905
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400906#if !__has_feature(memory_sanitizer)
907 // thread_local variables in shared libraries are initialized at load-time,
908 // but this is not observed by MemorySanitizer if the loader itself was not
909 // instrumented, leading to false-positive unitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400910 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400911#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500912 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500913}
914
915Nucleus::~Nucleus()
916{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400917 delete Variable::unmaterializedVariables;
918 Variable::unmaterializedVariables = nullptr;
919
Nicolas Capens157ba262019-12-10 17:49:14 -0500920 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500921 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500922
923 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500924 ::allocator = nullptr;
925
Nicolas Capens157ba262019-12-10 17:49:14 -0500926 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500927 ::function = nullptr;
928
Nicolas Capens157ba262019-12-10 17:49:14 -0500929 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500930 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500931
932 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500933 ::elfFile = nullptr;
934
Nicolas Capens157ba262019-12-10 17:49:14 -0500935 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500936 ::out = nullptr;
937
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400938 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500939 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400940 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500941
942 ::codegenMutex.unlock();
943}
944
945void Nucleus::setDefaultConfig(const Config &cfg)
946{
947 std::unique_lock<std::mutex> lock(::defaultConfigLock);
948 ::defaultConfig() = cfg;
949}
950
951void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
952{
953 std::unique_lock<std::mutex> lock(::defaultConfigLock);
954 auto &config = ::defaultConfig();
955 config = cfgEdit.apply(config);
956}
957
958Config Nucleus::getDefaultConfig()
959{
960 std::unique_lock<std::mutex> lock(::defaultConfigLock);
961 return ::defaultConfig();
962}
963
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500964// This function lowers and produces executable binary code in memory for the input functions,
965// and returns a Routine with the entry points to these functions.
966template<size_t Count>
967static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500968{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500969 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
970 // and GlobalContext::emitItems.
971
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500972 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500973 {
974 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500975 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500976 }
977
978 ::context->emitFileHeader();
979
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500980 // Translate
981
982 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500983 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500984 Ice::Cfg *currFunc = functions[i];
985
986 // Install function allocator in TLS for Cfg-specific container allocators
987 Ice::CfgLocalAllocatorScope allocScope(currFunc);
988
989 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
990
991 rr::optimize(currFunc);
992
993 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500994 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500995
996 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500997 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500998
999 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1000
1001 if(subzeroEmitTextAsm)
1002 {
1003 currFunc->emit();
1004 }
1005
1006 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -05001007 }
1008
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001009 // Emit items
1010
1011 ::context->lowerGlobals("");
1012
Nicolas Capens157ba262019-12-10 17:49:14 -05001013 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001014
1015 for(size_t i = 0; i < Count; ++i)
1016 {
1017 Ice::Cfg *currFunc = functions[i];
1018
1019 // Accumulate globals from functions to emit into the "last" section at the end
1020 auto globals = currFunc->getGlobalInits();
1021 if(globals && !globals->empty())
1022 {
1023 ::context->getGlobals()->merge(globals.get());
1024 }
1025
1026 auto assembler = currFunc->releaseAssembler();
1027 assembler->alignFunction();
1028 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1029 }
1030
Nicolas Capens157ba262019-12-10 17:49:14 -05001031 ::context->lowerGlobals("last");
1032 ::context->lowerConstants();
1033 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001034
Nicolas Capens157ba262019-12-10 17:49:14 -05001035 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001036 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001037 objectWriter->writeNonUserSections();
1038
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001039 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001040 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1041 ASSERT(entryPoints.size() == Count);
1042 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001043 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001044 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001045 }
1046
1047 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001048
1049 Routine *handoffRoutine = ::routine;
1050 ::routine = nullptr;
1051
1052 return std::shared_ptr<Routine>(handoffRoutine);
1053}
1054
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001055std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1056{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001057 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001058 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1059}
1060
Nicolas Capens157ba262019-12-10 17:49:14 -05001061Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1062{
1063 Ice::Type type = T(t);
1064 int typeSize = Ice::typeWidthInBytes(type);
1065 int totalSize = typeSize * (arraySize ? arraySize : 1);
1066
1067 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1068 auto address = ::function->makeVariable(T(getPointerType(t)));
1069 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1070 ::function->getEntryNode()->getInsts().push_front(alloca);
1071
1072 return V(address);
1073}
1074
1075BasicBlock *Nucleus::createBasicBlock()
1076{
1077 return B(::function->makeNode());
1078}
1079
1080BasicBlock *Nucleus::getInsertBlock()
1081{
1082 return B(::basicBlock);
1083}
1084
1085void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1086{
Ben Clayton713b8d32019-12-17 20:37:56 +00001087 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001088
1089 Variable::materializeAll();
1090
1091 ::basicBlock = basicBlock;
1092}
1093
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001094void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001095{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001096 ASSERT(::function == nullptr);
1097 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001098 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001099 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001100 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001101
1102 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1103
1104 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1105 // becomes invalid if another one is created; for example, when creating await and destroy functions
1106 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1107 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001108 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1109
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001110 ::entryBlock = ::function->getEntryNode();
1111 ::basicBlock = ::function->makeNode();
1112 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001113}
1114
1115Value *Nucleus::getArgument(unsigned int index)
1116{
1117 return V(::function->getArgs()[index]);
1118}
1119
1120void Nucleus::createRetVoid()
1121{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001122 RR_DEBUG_INFO_UPDATE_LOC();
1123
Nicolas Capens157ba262019-12-10 17:49:14 -05001124 // Code generated after this point is unreachable, so any variables
1125 // being read can safely return an undefined value. We have to avoid
1126 // materializing variables after the terminator ret instruction.
1127 Variable::killUnmaterialized();
1128
1129 Ice::InstRet *ret = Ice::InstRet::create(::function);
1130 ::basicBlock->appendInst(ret);
1131}
1132
1133void Nucleus::createRet(Value *v)
1134{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001135 RR_DEBUG_INFO_UPDATE_LOC();
1136
Nicolas Capens157ba262019-12-10 17:49:14 -05001137 // Code generated after this point is unreachable, so any variables
1138 // being read can safely return an undefined value. We have to avoid
1139 // materializing variables after the terminator ret instruction.
1140 Variable::killUnmaterialized();
1141
1142 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1143 ::basicBlock->appendInst(ret);
1144}
1145
1146void Nucleus::createBr(BasicBlock *dest)
1147{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001148 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001149 Variable::materializeAll();
1150
1151 auto br = Ice::InstBr::create(::function, dest);
1152 ::basicBlock->appendInst(br);
1153}
1154
1155void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1156{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001157 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001158 Variable::materializeAll();
1159
1160 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1161 ::basicBlock->appendInst(br);
1162}
1163
1164static bool isCommutative(Ice::InstArithmetic::OpKind op)
1165{
1166 switch(op)
1167 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001168 case Ice::InstArithmetic::Add:
1169 case Ice::InstArithmetic::Fadd:
1170 case Ice::InstArithmetic::Mul:
1171 case Ice::InstArithmetic::Fmul:
1172 case Ice::InstArithmetic::And:
1173 case Ice::InstArithmetic::Or:
1174 case Ice::InstArithmetic::Xor:
1175 return true;
1176 default:
1177 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001178 }
1179}
1180
1181static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1182{
1183 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1184
1185 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1186
1187 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1188 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1189 ::basicBlock->appendInst(arithmetic);
1190
1191 return V(result);
1192}
1193
1194Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1195{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001196 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001197 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1198}
1199
1200Value *Nucleus::createSub(Value *lhs, Value *rhs)
1201{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001202 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001203 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1204}
1205
1206Value *Nucleus::createMul(Value *lhs, Value *rhs)
1207{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001208 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001209 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1210}
1211
1212Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1213{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001214 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001215 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1216}
1217
1218Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1219{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001220 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001221 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1222}
1223
1224Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1225{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001226 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001227 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1228}
1229
1230Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1231{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001232 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001233 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1234}
1235
1236Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1237{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001238 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001239 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1240}
1241
1242Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1243{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001244 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001245 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1246}
1247
1248Value *Nucleus::createURem(Value *lhs, Value *rhs)
1249{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001250 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001251 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1252}
1253
1254Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1255{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001256 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001257 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1258}
1259
1260Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1261{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001262 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001263 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1264 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001265 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001266 return nullptr;
1267}
1268
1269RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1270{
1271 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001272}
1273
1274Value *Nucleus::createShl(Value *lhs, Value *rhs)
1275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001277 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1278}
1279
1280Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001282 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001283 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1284}
1285
1286Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001289 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1290}
1291
1292Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001295 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1296}
1297
1298Value *Nucleus::createOr(Value *lhs, Value *rhs)
1299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001301 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1302}
1303
1304Value *Nucleus::createXor(Value *lhs, Value *rhs)
1305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001307 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1308}
1309
1310Value *Nucleus::createNeg(Value *v)
1311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001313 return createSub(createNullValue(T(v->getType())), v);
1314}
1315
1316Value *Nucleus::createFNeg(Value *v)
1317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001318 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001319 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1320 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001321
1322 return createFSub(negativeZero, v);
1323}
1324
1325Value *Nucleus::createNot(Value *v)
1326{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001327 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001328 if(Ice::isScalarIntegerType(v->getType()))
1329 {
1330 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1331 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001332 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001333 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001334 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001335 return createXor(v, createConstantVector(c, T(v->getType())));
1336 }
1337}
1338
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001339static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1340{
1341#if defined(__i386__) || defined(__x86_64__)
1342 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1343 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1344 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1345#else
1346 if(atomic)
1347 {
1348 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1349 }
1350 if(memoryOrder != std::memory_order_relaxed)
1351 {
1352 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1353 }
1354#endif
1355
1356 // Vulkan doesn't allow sequential memory order
1357 ASSERT(memoryOrder != std::memory_order_seq_cst);
1358}
1359
Nicolas Capens157ba262019-12-10 17:49:14 -05001360Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1361{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001362 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001363 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001364
1365 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001366 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001367
Ben Clayton713b8d32019-12-17 20:37:56 +00001368 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001369 {
1370 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001371 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001372 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001373 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001374 auto pointer = RValue<Pointer<Byte>>(ptr);
1375 Int x = *Pointer<Int>(pointer);
1376
1377 Int4 vector;
1378 vector = Insert(vector, x, 0);
1379
Antonio Maiorano02a39532020-01-21 15:15:34 -05001380 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001381 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1382 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001383 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001384 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001385 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001386 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001387 auto pointer = RValue<Pointer<Byte>>(ptr);
1388 Int x = *Pointer<Int>(pointer);
1389 Int y = *Pointer<Int>(pointer + 4);
1390
1391 Int4 vector;
1392 vector = Insert(vector, x, 0);
1393 vector = Insert(vector, y, 1);
1394
Antonio Maiorano02a39532020-01-21 15:15:34 -05001395 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001396 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1397 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001398 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001399 else
1400 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001401 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001402 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001403 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001404 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001405 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001406 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001407 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1408 load->addArg(ptr);
1409 load->addArg(::context->getConstantInt32(typeSize(type)));
1410 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001411 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001412 }
1413 else
1414 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001415 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001416 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001417
Antonio Maiorano02a39532020-01-21 15:15:34 -05001418 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001419 return V(result);
1420}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001421
Nicolas Capens157ba262019-12-10 17:49:14 -05001422Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1423{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001424 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001425 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001426
Ben Clayton713b8d32019-12-17 20:37:56 +00001427#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001428 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001429 if(align != 0)
1430 {
1431 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1432 call->addArg(ptr);
1433 call->addArg(::context->getConstantInt64(typeSize(type)));
1434 ::basicBlock->appendInst(call);
1435 }
1436#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001437
Nicolas Capens157ba262019-12-10 17:49:14 -05001438 int valueType = (int)reinterpret_cast<intptr_t>(type);
1439
Ben Clayton713b8d32019-12-17 20:37:56 +00001440 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001441 {
1442 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001443 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001444 if(typeSize(type) == 4)
1445 {
1446 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1447 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1448 ::basicBlock->appendInst(bitcast);
1449
1450 RValue<Int4> v(V(vector));
1451
1452 auto pointer = RValue<Pointer<Byte>>(ptr);
1453 Int x = Extract(v, 0);
1454 *Pointer<Int>(pointer) = x;
1455 }
1456 else if(typeSize(type) == 8)
1457 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001458 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001459 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1460 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1461 ::basicBlock->appendInst(bitcast);
1462
1463 RValue<Int4> v(V(vector));
1464
1465 auto pointer = RValue<Pointer<Byte>>(ptr);
1466 Int x = Extract(v, 0);
1467 *Pointer<Int>(pointer) = x;
1468 Int y = Extract(v, 1);
1469 *Pointer<Int>(pointer + 4) = y;
1470 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001471 else
1472 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001473 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001474 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001475 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001476 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001477 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1478 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1479 store->addArg(value);
1480 store->addArg(ptr);
1481 store->addArg(::context->getConstantInt32(typeSize(type)));
1482 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001483 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001484 }
1485 else
1486 {
1487 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001488
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001489 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001490 ::basicBlock->appendInst(store);
1491 }
1492
1493 return value;
1494}
1495
1496Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1497{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001498 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001499 ASSERT(index->getType() == Ice::IceType_i32);
1500
1501 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1502 {
1503 int32_t offset = constant->getValue() * (int)typeSize(type);
1504
1505 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001506 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001507 return ptr;
1508 }
1509
Nicolas Capens157ba262019-12-10 17:49:14 -05001510 return createAdd(ptr, createConstantInt(offset));
1511 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001512
Nicolas Capens157ba262019-12-10 17:49:14 -05001513 if(!Ice::isByteSizedType(T(type)))
1514 {
1515 index = createMul(index, createConstantInt((int)typeSize(type)));
1516 }
1517
Ben Clayton713b8d32019-12-17 20:37:56 +00001518 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001519 {
1520 if(unsignedIndex)
1521 {
1522 index = createZExt(index, T(Ice::IceType_i64));
1523 }
1524 else
1525 {
1526 index = createSExt(index, T(Ice::IceType_i64));
1527 }
1528 }
1529
1530 return createAdd(ptr, index);
1531}
1532
Antonio Maiorano370cba52019-12-31 11:36:07 -05001533static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1534{
1535 Ice::Variable *result = ::function->makeVariable(value->getType());
1536
1537 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1538 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1539 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1540 auto op = ::context->getConstantInt32(rmwOp);
1541 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1542 inst->addArg(op);
1543 inst->addArg(ptr);
1544 inst->addArg(value);
1545 inst->addArg(order);
1546 ::basicBlock->appendInst(inst);
1547
1548 return V(result);
1549}
1550
Nicolas Capens157ba262019-12-10 17:49:14 -05001551Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1552{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001553 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001554 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001555}
1556
1557Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1558{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001559 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001560 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001561}
1562
1563Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1564{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001565 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001566 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001567}
1568
1569Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1570{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001571 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001572 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001573}
1574
1575Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1576{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001577 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001578 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001579}
1580
1581Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1582{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001583 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001584 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001585}
1586
1587Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1588{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001589 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001590 Ice::Variable *result = ::function->makeVariable(value->getType());
1591
1592 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1593 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1594 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1595 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1596 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1597 inst->addArg(ptr);
1598 inst->addArg(compare);
1599 inst->addArg(value);
1600 inst->addArg(orderEq);
1601 inst->addArg(orderNeq);
1602 ::basicBlock->appendInst(inst);
1603
1604 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001605}
1606
1607static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1608{
1609 if(v->getType() == T(destType))
1610 {
1611 return v;
1612 }
1613
1614 Ice::Variable *result = ::function->makeVariable(T(destType));
1615 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1616 ::basicBlock->appendInst(cast);
1617
1618 return V(result);
1619}
1620
1621Value *Nucleus::createTrunc(Value *v, Type *destType)
1622{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001623 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001624 return createCast(Ice::InstCast::Trunc, v, destType);
1625}
1626
1627Value *Nucleus::createZExt(Value *v, Type *destType)
1628{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001629 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001630 return createCast(Ice::InstCast::Zext, v, destType);
1631}
1632
1633Value *Nucleus::createSExt(Value *v, Type *destType)
1634{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001635 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001636 return createCast(Ice::InstCast::Sext, v, destType);
1637}
1638
1639Value *Nucleus::createFPToUI(Value *v, Type *destType)
1640{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001641 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001642 return createCast(Ice::InstCast::Fptoui, v, destType);
1643}
1644
1645Value *Nucleus::createFPToSI(Value *v, Type *destType)
1646{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001647 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001648 return createCast(Ice::InstCast::Fptosi, v, destType);
1649}
1650
1651Value *Nucleus::createSIToFP(Value *v, Type *destType)
1652{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001653 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001654 return createCast(Ice::InstCast::Sitofp, v, destType);
1655}
1656
1657Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1658{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001659 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001660 return createCast(Ice::InstCast::Fptrunc, v, destType);
1661}
1662
1663Value *Nucleus::createFPExt(Value *v, Type *destType)
1664{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001665 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001666 return createCast(Ice::InstCast::Fpext, v, destType);
1667}
1668
1669Value *Nucleus::createBitCast(Value *v, Type *destType)
1670{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001671 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001672 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1673 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1674 // emulate them by writing to the stack and reading back as the destination type.
1675 if(emulateMismatchedBitCast)
1676 {
1677 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1678 {
1679 Value *address = allocateStackVariable(destType);
1680 createStore(v, address, T(v->getType()));
1681 return createLoad(address, destType);
1682 }
1683 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1684 {
1685 Value *address = allocateStackVariable(T(v->getType()));
1686 createStore(v, address, T(v->getType()));
1687 return createLoad(address, destType);
1688 }
1689 }
1690
1691 return createCast(Ice::InstCast::Bitcast, v, destType);
1692}
1693
1694static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1695{
1696 ASSERT(lhs->getType() == rhs->getType());
1697
1698 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1699 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1700 ::basicBlock->appendInst(cmp);
1701
1702 return V(result);
1703}
1704
1705Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1706{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001707 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001708 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1709}
1710
1711Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1712{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001713 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001714 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1715}
1716
1717Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1718{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001719 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001720 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1721}
1722
1723Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1724{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001725 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001726 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1727}
1728
1729Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1730{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001731 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001732 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1733}
1734
1735Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1736{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001737 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001738 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1739}
1740
1741Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1742{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001743 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001744 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1745}
1746
1747Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1748{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001749 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001750 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1751}
1752
1753Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1754{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001755 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001756 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1757}
1758
1759Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1760{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001761 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001762 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1763}
1764
1765Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1766{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001767 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001768 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1769}
1770
1771static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1772{
1773 ASSERT(lhs->getType() == rhs->getType());
1774 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1775
1776 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1777 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1778 ::basicBlock->appendInst(cmp);
1779
1780 return V(result);
1781}
1782
1783Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1784{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001785 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001786 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1787}
1788
1789Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001792 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1793}
1794
1795Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001798 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1799}
1800
1801Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1802{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001803 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001804 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1805}
1806
1807Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1808{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001809 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001810 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1811}
1812
1813Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1814{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001815 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001816 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1817}
1818
1819Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1820{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001821 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001822 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1823}
1824
1825Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1826{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001827 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001828 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1829}
1830
1831Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1832{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001833 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001834 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1835}
1836
1837Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001840 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1841}
1842
1843Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001846 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1847}
1848
1849Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001852 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1853}
1854
1855Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001858 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1859}
1860
1861Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001864 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1865}
1866
1867Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1868{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001869 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001870 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001871 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001872 ::basicBlock->appendInst(extract);
1873
1874 return V(result);
1875}
1876
1877Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1878{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001879 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001880 auto result = ::function->makeVariable(vector->getType());
1881 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1882 ::basicBlock->appendInst(insert);
1883
1884 return V(result);
1885}
1886
1887Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1888{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001889 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001890 ASSERT(V1->getType() == V2->getType());
1891
1892 int size = Ice::typeNumElements(V1->getType());
1893 auto result = ::function->makeVariable(V1->getType());
1894 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1895
1896 for(int i = 0; i < size; i++)
1897 {
1898 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1899 }
1900
1901 ::basicBlock->appendInst(shuffle);
1902
1903 return V(result);
1904}
1905
1906Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1907{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001908 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001909 ASSERT(ifTrue->getType() == ifFalse->getType());
1910
1911 auto result = ::function->makeVariable(ifTrue->getType());
1912 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1913 ::basicBlock->appendInst(select);
1914
1915 return V(result);
1916}
1917
1918SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1919{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001920 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001921 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1922 ::basicBlock->appendInst(switchInst);
1923
Ben Clayton713b8d32019-12-17 20:37:56 +00001924 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001925}
1926
1927void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1928{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001929 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001930 switchCases->addBranch(label, label, branch);
1931}
1932
1933void Nucleus::createUnreachable()
1934{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001935 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001936 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1937 ::basicBlock->appendInst(unreachable);
1938}
1939
Antonio Maiorano62427e02020-02-13 09:18:05 -05001940Type *Nucleus::getType(Value *value)
1941{
1942 return T(V(value)->getType());
1943}
1944
1945Type *Nucleus::getContainedType(Type *vectorType)
1946{
1947 Ice::Type vecTy = T(vectorType);
1948 switch(vecTy)
1949 {
1950 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1951 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1952 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1953 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1954 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1955 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1956 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1957 default:
1958 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1959 return {};
1960 }
1961}
1962
Nicolas Capens157ba262019-12-10 17:49:14 -05001963Type *Nucleus::getPointerType(Type *ElementType)
1964{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001965 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001966}
1967
Antonio Maiorano62427e02020-02-13 09:18:05 -05001968static constexpr Ice::Type getNaturalIntType()
1969{
1970 constexpr size_t intSize = sizeof(int);
1971 static_assert(intSize == 4 || intSize == 8, "");
1972 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1973}
1974
1975Type *Nucleus::getPrintfStorageType(Type *valueType)
1976{
1977 Ice::Type valueTy = T(valueType);
1978 switch(valueTy)
1979 {
1980 case Ice::IceType_i32:
1981 return T(getNaturalIntType());
1982
1983 case Ice::IceType_f32:
1984 return T(Ice::IceType_f64);
1985
1986 default:
1987 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1988 return {};
1989 }
1990}
1991
Nicolas Capens157ba262019-12-10 17:49:14 -05001992Value *Nucleus::createNullValue(Type *Ty)
1993{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001994 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001995 if(Ice::isVectorType(T(Ty)))
1996 {
1997 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001998 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001999 return createConstantVector(c, Ty);
2000 }
2001 else
2002 {
2003 return V(::context->getConstantZero(T(Ty)));
2004 }
2005}
2006
2007Value *Nucleus::createConstantLong(int64_t i)
2008{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002009 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002010 return V(::context->getConstantInt64(i));
2011}
2012
2013Value *Nucleus::createConstantInt(int i)
2014{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002015 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002016 return V(::context->getConstantInt32(i));
2017}
2018
2019Value *Nucleus::createConstantInt(unsigned int i)
2020{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002021 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002022 return V(::context->getConstantInt32(i));
2023}
2024
2025Value *Nucleus::createConstantBool(bool b)
2026{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002027 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002028 return V(::context->getConstantInt1(b));
2029}
2030
2031Value *Nucleus::createConstantByte(signed char i)
2032{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002033 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002034 return V(::context->getConstantInt8(i));
2035}
2036
2037Value *Nucleus::createConstantByte(unsigned char i)
2038{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002039 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002040 return V(::context->getConstantInt8(i));
2041}
2042
2043Value *Nucleus::createConstantShort(short i)
2044{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002045 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002046 return V(::context->getConstantInt16(i));
2047}
2048
2049Value *Nucleus::createConstantShort(unsigned short i)
2050{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002051 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002052 return V(::context->getConstantInt16(i));
2053}
2054
2055Value *Nucleus::createConstantFloat(float x)
2056{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002057 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002058 return V(::context->getConstantFloat(x));
2059}
2060
2061Value *Nucleus::createNullPointer(Type *Ty)
2062{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002063 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002064 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002065}
2066
Antonio Maiorano02a39532020-01-21 15:15:34 -05002067static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2068{
2069 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2070}
2071
Nicolas Capens157ba262019-12-10 17:49:14 -05002072Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2073{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002074 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002075 const int vectorSize = 16;
2076 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2077 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002078
2079 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002080 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002081
Antonio Maioranoa0957112020-03-04 15:06:19 -05002082 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002083 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002084
2085 switch((int)reinterpret_cast<intptr_t>(type))
2086 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002087 case Ice::IceType_v4i32:
2088 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002089 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002090 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002091 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002092 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002093 }
2094 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002095 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002096 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002097 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002098 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002099 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002100 }
2101 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002102 case Ice::IceType_v8i16:
2103 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002104 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002105 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002106 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002107 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002108 }
2109 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002110 case Ice::IceType_v16i8:
2111 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002112 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002113 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002114 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002115 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002116 }
2117 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002118 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002119 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002120 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002121 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002122 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002123 }
2124 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002125 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002126 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002127 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002128 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002129 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002130 }
2131 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002132 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002133 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002134 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002135 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002136 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002137 }
2138 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002139 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002140 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002141 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002142 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002143 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002144 }
2145 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002146 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002147 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002148 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002149 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002150 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002151 }
2152 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002153 default:
2154 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002155 }
2156
Antonio Maiorano02a39532020-01-21 15:15:34 -05002157 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002158
Antonio Maiorano02a39532020-01-21 15:15:34 -05002159 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002160 return V(result);
2161}
2162
2163Value *Nucleus::createConstantVector(const double *constants, Type *type)
2164{
Ben Clayton713b8d32019-12-17 20:37:56 +00002165 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002166}
2167
Antonio Maiorano62427e02020-02-13 09:18:05 -05002168Value *Nucleus::createConstantString(const char *v)
2169{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002170 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002171 return V(IceConstantData(v, strlen(v) + 1));
2172}
2173
Nicolas Capens519cf222020-05-08 15:27:19 -04002174Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002175{
2176 return T(Ice::IceType_void);
2177}
2178
Nicolas Capens519cf222020-05-08 15:27:19 -04002179Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002180{
2181 return T(Ice::IceType_i1);
2182}
2183
Nicolas Capens519cf222020-05-08 15:27:19 -04002184Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002185{
2186 return T(Ice::IceType_i8);
2187}
2188
Nicolas Capens519cf222020-05-08 15:27:19 -04002189Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002190{
2191 return T(Ice::IceType_i8);
2192}
2193
Nicolas Capens519cf222020-05-08 15:27:19 -04002194Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002195{
2196 return T(Ice::IceType_i16);
2197}
2198
Nicolas Capens519cf222020-05-08 15:27:19 -04002199Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002200{
2201 return T(Ice::IceType_i16);
2202}
2203
Nicolas Capens519cf222020-05-08 15:27:19 -04002204Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002205{
2206 return T(Type_v4i8);
2207}
2208
Nicolas Capens519cf222020-05-08 15:27:19 -04002209Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002210{
2211 return T(Type_v4i8);
2212}
2213
Ben Clayton713b8d32019-12-17 20:37:56 +00002214namespace {
2215RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002216{
Ben Clayton713b8d32019-12-17 20:37:56 +00002217 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002218}
2219
Ben Clayton713b8d32019-12-17 20:37:56 +00002220RValue<Byte> Extract(RValue<Byte8> val, int i)
2221{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002222 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002223}
2224
2225RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2226{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002227 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002228}
2229} // namespace
2230
Nicolas Capens157ba262019-12-10 17:49:14 -05002231RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2232{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002233 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002234 if(emulateIntrinsics)
2235 {
2236 Byte8 result;
2237 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2238 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2239 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2240 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2241 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2242 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2243 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2244 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2245
2246 return result;
2247 }
2248 else
2249 {
2250 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002251 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002252 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2253 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002254 paddusb->addArg(x.value());
2255 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002256 ::basicBlock->appendInst(paddusb);
2257
2258 return RValue<Byte8>(V(result));
2259 }
2260}
2261
2262RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2263{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002264 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002265 if(emulateIntrinsics)
2266 {
2267 Byte8 result;
2268 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2269 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2270 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2271 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2272 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2273 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2274 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2275 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2276
2277 return result;
2278 }
2279 else
2280 {
2281 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002282 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002283 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2284 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002285 psubusw->addArg(x.value());
2286 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002287 ::basicBlock->appendInst(psubusw);
2288
2289 return RValue<Byte8>(V(result));
2290 }
2291}
2292
2293RValue<SByte> Extract(RValue<SByte8> val, int i)
2294{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002295 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002296 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002297}
2298
2299RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2300{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002301 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002302 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002303}
2304
2305RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2306{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002307 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002308 if(emulateIntrinsics)
2309 {
2310 SByte8 result;
2311 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2312 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2313 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2314 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2315 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2316 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2317 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2318 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2319
2320 return result;
2321 }
2322 else
2323 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002324#if defined(__i386__) || defined(__x86_64__)
2325 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2326 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2327 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002328
Ben Clayton713b8d32019-12-17 20:37:56 +00002329 return As<SByte8>(hi | lo);
2330#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002331 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002332#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002333 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002334}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002335
Nicolas Capens157ba262019-12-10 17:49:14 -05002336RValue<Int> SignMask(RValue<Byte8> x)
2337{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002338 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002339 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002340 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002341 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2342 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002343 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002344 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002345 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002346 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002347 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002348 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2349 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002350 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002351 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002352
Nicolas Capens157ba262019-12-10 17:49:14 -05002353 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002354 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002355}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002356
2357// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2358// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002359// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002360// }
2361
Nicolas Capens157ba262019-12-10 17:49:14 -05002362RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2363{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002364 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002365 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002366}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002367
Nicolas Capens519cf222020-05-08 15:27:19 -04002368Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002369{
2370 return T(Type_v8i8);
2371}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002372
Nicolas Capens598f8d82016-09-26 15:09:10 -04002373// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2374// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002375// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002376// }
2377
2378// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2379// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002380// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002381// }
2382
Nicolas Capens157ba262019-12-10 17:49:14 -05002383RValue<SByte> SaturateSigned(RValue<Short> x)
2384{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002385 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002386 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2387}
2388
2389RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2390{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002391 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002392 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002393 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002394 SByte8 result;
2395 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2396 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2397 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2398 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2399 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2400 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2401 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2402 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002403
Nicolas Capens157ba262019-12-10 17:49:14 -05002404 return result;
2405 }
2406 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002407 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002408 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002409 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002410 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2411 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002412 paddsb->addArg(x.value());
2413 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002414 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002415
Nicolas Capens157ba262019-12-10 17:49:14 -05002416 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002417 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002418}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002419
Nicolas Capens157ba262019-12-10 17:49:14 -05002420RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2421{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002422 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002423 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002424 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002425 SByte8 result;
2426 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2427 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2428 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2429 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2430 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2431 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2432 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2433 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002434
Nicolas Capens157ba262019-12-10 17:49:14 -05002435 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002436 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002437 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002438 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002439 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002440 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002441 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2442 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002443 psubsb->addArg(x.value());
2444 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002445 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002446
Nicolas Capens157ba262019-12-10 17:49:14 -05002447 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002448 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002449}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002450
Nicolas Capens157ba262019-12-10 17:49:14 -05002451RValue<Int> SignMask(RValue<SByte8> x)
2452{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002453 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002454 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002455 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002456 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2457 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002458 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002459 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002460 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002461 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002462 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002463 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2464 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002465 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002466 ::basicBlock->appendInst(movmsk);
2467
2468 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002469 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002470}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002471
Nicolas Capens157ba262019-12-10 17:49:14 -05002472RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2473{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002474 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002475 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002476}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002477
Nicolas Capens157ba262019-12-10 17:49:14 -05002478RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2479{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002480 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002481 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002482}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002483
Nicolas Capens519cf222020-05-08 15:27:19 -04002484Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002485{
2486 return T(Type_v8i8);
2487}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002488
Nicolas Capens519cf222020-05-08 15:27:19 -04002489Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002490{
2491 return T(Ice::IceType_v16i8);
2492}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002493
Nicolas Capens519cf222020-05-08 15:27:19 -04002494Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002495{
2496 return T(Ice::IceType_v16i8);
2497}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002498
Nicolas Capens519cf222020-05-08 15:27:19 -04002499Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002500{
2501 return T(Type_v2i16);
2502}
Nicolas Capensd4227962016-11-09 14:24:25 -05002503
Nicolas Capens519cf222020-05-08 15:27:19 -04002504Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002505{
2506 return T(Type_v2i16);
2507}
Nicolas Capensd4227962016-11-09 14:24:25 -05002508
Nicolas Capens157ba262019-12-10 17:49:14 -05002509Short4::Short4(RValue<Int4> cast)
2510{
Ben Clayton713b8d32019-12-17 20:37:56 +00002511 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002512 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002513 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2514
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002515 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002516 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002517
2518 storeValue(short4);
2519}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002520
2521// Short4::Short4(RValue<Float> cast)
2522// {
2523// }
2524
Nicolas Capens157ba262019-12-10 17:49:14 -05002525Short4::Short4(RValue<Float4> cast)
2526{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002527 // TODO(b/150791192): Generalize and optimize
2528 auto smin = std::numeric_limits<short>::min();
2529 auto smax = std::numeric_limits<short>::max();
2530 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002531}
2532
2533RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2534{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002535 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002536 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002537 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002538 Short4 result;
2539 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2540 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2541 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2542 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002543
2544 return result;
2545 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002546 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002547 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002548 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002549 }
2550}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002551
Nicolas Capens157ba262019-12-10 17:49:14 -05002552RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2553{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002554 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002555 if(emulateIntrinsics)
2556 {
2557 Short4 result;
2558 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2559 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2560 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2561 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2562
2563 return result;
2564 }
2565 else
2566 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002567 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002568 }
2569}
2570
2571RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2572{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002573 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002574 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002575 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002576 ::basicBlock->appendInst(cmp);
2577
2578 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002579 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002580 ::basicBlock->appendInst(select);
2581
2582 return RValue<Short4>(V(result));
2583}
2584
2585RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2586{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002587 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002588 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002589 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002590 ::basicBlock->appendInst(cmp);
2591
2592 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002593 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002594 ::basicBlock->appendInst(select);
2595
2596 return RValue<Short4>(V(result));
2597}
2598
2599RValue<Short> SaturateSigned(RValue<Int> x)
2600{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002601 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002602 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2603}
2604
2605RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2606{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002607 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002608 if(emulateIntrinsics)
2609 {
2610 Short4 result;
2611 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2612 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2613 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2614 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2615
2616 return result;
2617 }
2618 else
2619 {
2620 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002621 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002622 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2623 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002624 paddsw->addArg(x.value());
2625 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002626 ::basicBlock->appendInst(paddsw);
2627
2628 return RValue<Short4>(V(result));
2629 }
2630}
2631
2632RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2633{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002634 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002635 if(emulateIntrinsics)
2636 {
2637 Short4 result;
2638 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2639 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2640 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2641 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2642
2643 return result;
2644 }
2645 else
2646 {
2647 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002648 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002649 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2650 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002651 psubsw->addArg(x.value());
2652 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002653 ::basicBlock->appendInst(psubsw);
2654
2655 return RValue<Short4>(V(result));
2656 }
2657}
2658
2659RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2660{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002661 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002662 if(emulateIntrinsics)
2663 {
2664 Short4 result;
2665 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2666 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2667 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2668 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2669
2670 return result;
2671 }
2672 else
2673 {
2674 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002675 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002676 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2677 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002678 pmulhw->addArg(x.value());
2679 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002680 ::basicBlock->appendInst(pmulhw);
2681
2682 return RValue<Short4>(V(result));
2683 }
2684}
2685
2686RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2687{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002688 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002689 if(emulateIntrinsics)
2690 {
2691 Int2 result;
2692 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2693 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2694
2695 return result;
2696 }
2697 else
2698 {
2699 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002700 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002701 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2702 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002703 pmaddwd->addArg(x.value());
2704 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002705 ::basicBlock->appendInst(pmaddwd);
2706
2707 return As<Int2>(V(result));
2708 }
2709}
2710
2711RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2712{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002713 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002714 if(emulateIntrinsics)
2715 {
2716 SByte8 result;
2717 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2718 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2719 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2720 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2721 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2722 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2723 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2724 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2725
2726 return result;
2727 }
2728 else
2729 {
2730 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002731 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002732 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2733 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002734 pack->addArg(x.value());
2735 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002736 ::basicBlock->appendInst(pack);
2737
2738 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2739 }
2740}
2741
2742RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2743{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002744 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002745 if(emulateIntrinsics)
2746 {
2747 Byte8 result;
2748 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2749 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2750 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2751 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2752 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2753 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2754 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2755 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2756
2757 return result;
2758 }
2759 else
2760 {
2761 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002762 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002763 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2764 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002765 pack->addArg(x.value());
2766 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002767 ::basicBlock->appendInst(pack);
2768
2769 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2770 }
2771}
2772
2773RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2774{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002775 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002776 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002777}
2778
2779RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2780{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002781 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002782 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002783}
2784
Nicolas Capens519cf222020-05-08 15:27:19 -04002785Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002786{
2787 return T(Type_v4i16);
2788}
2789
2790UShort4::UShort4(RValue<Float4> cast, bool saturate)
2791{
2792 if(saturate)
2793 {
2794 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002795 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002796 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2797 // PackUnsigned takes care of 0x0000 saturation.
2798 Int4 int4(Min(cast, Float4(0xFFFF)));
2799 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002800 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002801 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002802 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002803 // ARM saturates the 32-bit integer result on overflow/undeflow.
2804 Int4 int4(cast);
2805 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002806 }
2807 else
2808 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002809 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002810 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002811 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002812 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002813 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002814 *this = Short4(Int4(cast));
2815 }
2816}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002817
Nicolas Capens157ba262019-12-10 17:49:14 -05002818RValue<UShort> Extract(RValue<UShort4> val, int i)
2819{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002820 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002821}
2822
2823RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2824{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002825 return RValue<UShort4>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002826}
2827
2828RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2829{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002830 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002831 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002832
Nicolas Capens157ba262019-12-10 17:49:14 -05002833 {
2834 UShort4 result;
2835 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2836 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2837 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2838 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2839
2840 return result;
2841 }
2842 else
2843 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002844 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002845 }
2846}
2847
2848RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2849{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002850 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002851 if(emulateIntrinsics)
2852 {
2853 UShort4 result;
2854 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2855 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2856 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2857 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2858
2859 return result;
2860 }
2861 else
2862 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002863 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002864 }
2865}
2866
2867RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2868{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002869 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002870 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002871 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002872 ::basicBlock->appendInst(cmp);
2873
2874 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002875 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002876 ::basicBlock->appendInst(select);
2877
2878 return RValue<UShort4>(V(result));
2879}
2880
2881RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2882{
2883 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002884 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002885 ::basicBlock->appendInst(cmp);
2886
2887 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002888 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002889 ::basicBlock->appendInst(select);
2890
2891 return RValue<UShort4>(V(result));
2892}
2893
2894RValue<UShort> SaturateUnsigned(RValue<Int> x)
2895{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002896 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002897 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2898}
2899
2900RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2901{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002902 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002903 if(emulateIntrinsics)
2904 {
2905 UShort4 result;
2906 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2907 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2908 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2909 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2910
2911 return result;
2912 }
2913 else
2914 {
2915 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002916 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002917 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2918 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002919 paddusw->addArg(x.value());
2920 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002921 ::basicBlock->appendInst(paddusw);
2922
2923 return RValue<UShort4>(V(result));
2924 }
2925}
2926
2927RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2928{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002929 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002930 if(emulateIntrinsics)
2931 {
2932 UShort4 result;
2933 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2934 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2935 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2936 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2937
2938 return result;
2939 }
2940 else
2941 {
2942 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002943 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002944 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2945 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002946 psubusw->addArg(x.value());
2947 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002948 ::basicBlock->appendInst(psubusw);
2949
2950 return RValue<UShort4>(V(result));
2951 }
2952}
2953
2954RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2955{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002956 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002957 if(emulateIntrinsics)
2958 {
2959 UShort4 result;
2960 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2961 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2962 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2963 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2964
2965 return result;
2966 }
2967 else
2968 {
2969 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002970 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002971 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2972 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002973 pmulhuw->addArg(x.value());
2974 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002975 ::basicBlock->appendInst(pmulhuw);
2976
2977 return RValue<UShort4>(V(result));
2978 }
2979}
2980
2981RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2982{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002983 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002984 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2985
2986 // Scalarized implementation.
2987 Int4 result;
2988 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2989 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2990 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2991 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2992
2993 return result;
2994}
2995
2996RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2997{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002998 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002999 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3000
3001 if(false) // Partial product based implementation.
3002 {
3003 auto xh = x >> 16;
3004 auto yh = y >> 16;
3005 auto xl = x & UInt4(0x0000FFFF);
3006 auto yl = y & UInt4(0x0000FFFF);
3007 auto xlyh = xl * yh;
3008 auto xhyl = xh * yl;
3009 auto xlyhh = xlyh >> 16;
3010 auto xhylh = xhyl >> 16;
3011 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3012 auto xhyll = xhyl & UInt4(0x0000FFFF);
3013 auto xlylh = (xl * yl) >> 16;
3014 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3015
3016 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003017 }
3018
Nicolas Capens157ba262019-12-10 17:49:14 -05003019 // Scalarized implementation.
3020 Int4 result;
3021 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3022 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3023 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3024 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3025
3026 return As<UInt4>(result);
3027}
3028
3029RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3030{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003031 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003032 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003033 return UShort4(0);
3034}
3035
Nicolas Capens519cf222020-05-08 15:27:19 -04003036Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003037{
3038 return T(Type_v4i16);
3039}
3040
3041RValue<Short> Extract(RValue<Short8> val, int i)
3042{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003043 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003044 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003045}
3046
3047RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3048{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003049 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003050 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003051}
3052
3053RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3054{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003055 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003056 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003057 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003058 Short8 result;
3059 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3060 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3061 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3062 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3063 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3064 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3065 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3066 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003067
Nicolas Capens157ba262019-12-10 17:49:14 -05003068 return result;
3069 }
3070 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003071 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003072 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003073 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003074}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003075
Nicolas Capens157ba262019-12-10 17:49:14 -05003076RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3077{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003078 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003079 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003080 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003081 Short8 result;
3082 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3083 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3084 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3085 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3086 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3087 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3088 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3089 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003090
Nicolas Capens157ba262019-12-10 17:49:14 -05003091 return result;
3092 }
3093 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003094 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003095 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003096 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003097}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003098
Nicolas Capens157ba262019-12-10 17:49:14 -05003099RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3100{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003101 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003102 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003103 return Int4(0);
3104}
3105
3106RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3107{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003108 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003109 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003110 return Short8(0);
3111}
3112
Nicolas Capens519cf222020-05-08 15:27:19 -04003113Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003114{
3115 return T(Ice::IceType_v8i16);
3116}
3117
3118RValue<UShort> Extract(RValue<UShort8> val, int i)
3119{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003120 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003121 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003122}
3123
3124RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3125{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003126 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003127 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003128}
3129
3130RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3131{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003132 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003133 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003134 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003135 UShort8 result;
3136 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3137 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3138 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3139 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3140 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3141 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3142 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3143 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003144
Nicolas Capens157ba262019-12-10 17:49:14 -05003145 return result;
3146 }
3147 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003148 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003149 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003150 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003151}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003152
Nicolas Capens157ba262019-12-10 17:49:14 -05003153RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3154{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003155 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003156 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003157 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003158 UShort8 result;
3159 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3160 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3161 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3162 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3163 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3164 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3165 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3166 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003167
Nicolas Capens157ba262019-12-10 17:49:14 -05003168 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003169 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003170 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003171 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003172 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003173 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003174}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003175
Nicolas Capens157ba262019-12-10 17:49:14 -05003176RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3177{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003178 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003179 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003180 return UShort8(0);
3181}
3182
Nicolas Capens519cf222020-05-08 15:27:19 -04003183Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003184{
3185 return T(Ice::IceType_v8i16);
3186}
3187
Ben Clayton713b8d32019-12-17 20:37:56 +00003188RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003190 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003191 RValue<Int> res = val;
3192 val += 1;
3193 return res;
3194}
3195
Ben Clayton713b8d32019-12-17 20:37:56 +00003196const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003197{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003198 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003199 val += 1;
3200 return val;
3201}
3202
Ben Clayton713b8d32019-12-17 20:37:56 +00003203RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003204{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003205 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003206 RValue<Int> res = val;
3207 val -= 1;
3208 return res;
3209}
3210
Ben Clayton713b8d32019-12-17 20:37:56 +00003211const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003212{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003213 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003214 val -= 1;
3215 return val;
3216}
3217
3218RValue<Int> RoundInt(RValue<Float> cast)
3219{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003220 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003221 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003222 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003223 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3224 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003225 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003226 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003227 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003228 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003229 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003230 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3231 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003232 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003233 ::basicBlock->appendInst(nearbyint);
3234
3235 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003236 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003237}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003238
Nicolas Capens519cf222020-05-08 15:27:19 -04003239Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003240{
3241 return T(Ice::IceType_i32);
3242}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003243
Nicolas Capens519cf222020-05-08 15:27:19 -04003244Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003245{
3246 return T(Ice::IceType_i64);
3247}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003248
Nicolas Capens157ba262019-12-10 17:49:14 -05003249UInt::UInt(RValue<Float> cast)
3250{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003251 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003252 // Smallest positive value representable in UInt, but not in Int
3253 const unsigned int ustart = 0x80000000u;
3254 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003255
Nicolas Capens157ba262019-12-10 17:49:14 -05003256 // If the value is negative, store 0, otherwise store the result of the conversion
3257 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003258 // Check if the value can be represented as an Int
3259 IfThenElse(cast >= ustartf,
3260 // If the value is too large, subtract ustart and re-add it after conversion.
3261 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3262 // Otherwise, just convert normally
3263 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003264 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003265}
Nicolas Capensa8086512016-11-07 17:32:17 -05003266
Ben Clayton713b8d32019-12-17 20:37:56 +00003267RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003268{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003269 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003270 RValue<UInt> res = val;
3271 val += 1;
3272 return res;
3273}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003274
Ben Clayton713b8d32019-12-17 20:37:56 +00003275const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003276{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003277 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003278 val += 1;
3279 return val;
3280}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003281
Ben Clayton713b8d32019-12-17 20:37:56 +00003282RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003283{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003284 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003285 RValue<UInt> res = val;
3286 val -= 1;
3287 return res;
3288}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003289
Ben Clayton713b8d32019-12-17 20:37:56 +00003290const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003291{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003292 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003293 val -= 1;
3294 return val;
3295}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003296
Nicolas Capens598f8d82016-09-26 15:09:10 -04003297// RValue<UInt> RoundUInt(RValue<Float> cast)
3298// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003299// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003300// }
3301
Nicolas Capens519cf222020-05-08 15:27:19 -04003302Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003303{
3304 return T(Ice::IceType_i32);
3305}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003306
3307// Int2::Int2(RValue<Int> cast)
3308// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003309// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003310// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003311//
3312// Constant *shuffle[2];
3313// shuffle[0] = Nucleus::createConstantInt(0);
3314// shuffle[1] = Nucleus::createConstantInt(0);
3315//
Nicolas Capens519cf222020-05-08 15:27:19 -04003316// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003317//
3318// storeValue(replicate);
3319// }
3320
Nicolas Capens157ba262019-12-10 17:49:14 -05003321RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3322{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003323 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003324 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003325 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003326 Int2 result;
3327 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3328 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003329
Nicolas Capens157ba262019-12-10 17:49:14 -05003330 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003332 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003333 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003334 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003335 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003336}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003337
Nicolas Capens157ba262019-12-10 17:49:14 -05003338RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3339{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003340 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003341 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003342 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003343 Int2 result;
3344 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3345 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3346
3347 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003348 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003349 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003350 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003351 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003352 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003353}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003354
Nicolas Capens519cf222020-05-08 15:27:19 -04003355Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003356{
3357 return T(Type_v2i32);
3358}
3359
3360RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3361{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003362 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003363 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003364 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003365 UInt2 result;
3366 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3367 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003368
Nicolas Capens157ba262019-12-10 17:49:14 -05003369 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003370 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003371 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003372 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003373 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003374 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003375}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003376
Nicolas Capens157ba262019-12-10 17:49:14 -05003377RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3378{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003379 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003380 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003381 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003382 UInt2 result;
3383 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3384 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003385
Nicolas Capens157ba262019-12-10 17:49:14 -05003386 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003387 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003388 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003389 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003390 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003391 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003392}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003393
Nicolas Capens519cf222020-05-08 15:27:19 -04003394Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003395{
3396 return T(Type_v2i32);
3397}
3398
Ben Clayton713b8d32019-12-17 20:37:56 +00003399Int4::Int4(RValue<Byte4> cast)
3400 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003401{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003402 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003403 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003404 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3405
3406 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003407 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003408 Value *b = Nucleus::createBitCast(a, Byte16::type());
3409 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003410
Ben Clayton713b8d32019-12-17 20:37:56 +00003411 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003412 Value *d = Nucleus::createBitCast(c, Short8::type());
3413 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003414
Nicolas Capens519cf222020-05-08 15:27:19 -04003415 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003416 storeValue(f);
3417}
3418
Ben Clayton713b8d32019-12-17 20:37:56 +00003419Int4::Int4(RValue<SByte4> cast)
3420 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003421{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003422 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003423 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003424 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3425
Ben Clayton713b8d32019-12-17 20:37:56 +00003426 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003427 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003428 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3429
Ben Clayton713b8d32019-12-17 20:37:56 +00003430 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003431 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003432 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3433
3434 *this = As<Int4>(e) >> 24;
3435}
3436
Ben Clayton713b8d32019-12-17 20:37:56 +00003437Int4::Int4(RValue<Short4> cast)
3438 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003439{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003440 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003441 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003442 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003443
3444 *this = As<Int4>(c) >> 16;
3445}
3446
Ben Clayton713b8d32019-12-17 20:37:56 +00003447Int4::Int4(RValue<UShort4> cast)
3448 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003449{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003450 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003451 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003452 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003453 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003454 storeValue(d);
3455}
3456
Ben Clayton713b8d32019-12-17 20:37:56 +00003457Int4::Int4(RValue<Int> rhs)
3458 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003459{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003460 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003461 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003462
Ben Clayton713b8d32019-12-17 20:37:56 +00003463 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003464 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3465
3466 storeValue(replicate);
3467}
3468
3469RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3470{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003471 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003472 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003473 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003474 Int4 result;
3475 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3476 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3477 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3478 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003479
Nicolas Capens157ba262019-12-10 17:49:14 -05003480 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003481 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003482 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003483 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003484 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003485 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003486}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003487
Nicolas Capens157ba262019-12-10 17:49:14 -05003488RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3489{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003490 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003491 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003492 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003493 Int4 result;
3494 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3495 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3496 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3497 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003498
Nicolas Capens157ba262019-12-10 17:49:14 -05003499 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003500 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003501 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003502 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003503 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003504 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003505}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003506
Nicolas Capens157ba262019-12-10 17:49:14 -05003507RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3508{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003509 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003510 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003511}
3512
3513RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3514{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003515 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003516 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003517}
3518
3519RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3520{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003521 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003522 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003523}
3524
3525RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3526{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003527 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003528 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003529}
3530
3531RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3532{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003533 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003534 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003535}
3536
3537RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3538{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003539 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003540 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003541}
3542
3543RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3544{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003545 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003546 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003547 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003548 ::basicBlock->appendInst(cmp);
3549
3550 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003551 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003552 ::basicBlock->appendInst(select);
3553
3554 return RValue<Int4>(V(result));
3555}
3556
3557RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3558{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003559 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003560 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003561 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003562 ::basicBlock->appendInst(cmp);
3563
3564 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003565 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003566 ::basicBlock->appendInst(select);
3567
3568 return RValue<Int4>(V(result));
3569}
3570
3571RValue<Int4> RoundInt(RValue<Float4> cast)
3572{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003573 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003574 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003575 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003576 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3577 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003578 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003579 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003580 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003581 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003582 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003583 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3584 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003585 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003586 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003587
3588 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003589 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003590}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003591
Nicolas Capenseeb81842021-01-12 17:44:40 -05003592RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3593{
3594 RR_DEBUG_INFO_UPDATE_LOC();
3595
3596 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3597 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3598 // saturate to 0x80000000.
3599 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3600
3601 if(emulateIntrinsics || CPUID::ARM)
3602 {
3603 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3604 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3605 }
3606 else
3607 {
3608 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3609 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3610 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3611 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3612 nearbyint->addArg(clamped.value());
3613 ::basicBlock->appendInst(nearbyint);
3614
3615 return RValue<Int4>(V(result));
3616 }
3617}
3618
Nicolas Capens157ba262019-12-10 17:49:14 -05003619RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3620{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003621 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003622 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003623 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003624 Short8 result;
3625 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3626 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3627 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3628 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3629 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3630 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3631 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3632 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003633
Nicolas Capens157ba262019-12-10 17:49:14 -05003634 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003635 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003636 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003637 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003638 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003639 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003640 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3641 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003642 pack->addArg(x.value());
3643 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003644 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003645
Nicolas Capens157ba262019-12-10 17:49:14 -05003646 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003647 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003648}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003649
Nicolas Capens157ba262019-12-10 17:49:14 -05003650RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3651{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003652 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003653 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003654 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003655 RValue<Int4> sx = As<Int4>(x);
3656 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003657
Nicolas Capens157ba262019-12-10 17:49:14 -05003658 RValue<Int4> sy = As<Int4>(y);
3659 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003660
Nicolas Capens157ba262019-12-10 17:49:14 -05003661 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003662 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003663 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003664 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003665 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003666 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003667 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3668 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003669 pack->addArg(x.value());
3670 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003671 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003672
Nicolas Capens157ba262019-12-10 17:49:14 -05003673 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003674 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003675}
Nicolas Capens33438a62017-09-27 11:47:35 -04003676
Nicolas Capens157ba262019-12-10 17:49:14 -05003677RValue<Int> SignMask(RValue<Int4> x)
3678{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003679 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003680 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003681 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003682 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3683 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003684 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003685 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003686 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003687 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003688 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003689 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3690 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003691 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003692 ::basicBlock->appendInst(movmsk);
3693
3694 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003695 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003696}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003697
Nicolas Capens519cf222020-05-08 15:27:19 -04003698Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003699{
3700 return T(Ice::IceType_v4i32);
3701}
3702
Ben Clayton713b8d32019-12-17 20:37:56 +00003703UInt4::UInt4(RValue<Float4> cast)
3704 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003705{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003706 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003707 // Smallest positive value representable in UInt, but not in Int
3708 const unsigned int ustart = 0x80000000u;
3709 const float ustartf = float(ustart);
3710
3711 // Check if the value can be represented as an Int
3712 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3713 // If the value is too large, subtract ustart and re-add it after conversion.
3714 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003715 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003716 (~uiValue & Int4(cast));
3717 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003718 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003719}
3720
Ben Clayton713b8d32019-12-17 20:37:56 +00003721UInt4::UInt4(RValue<UInt> rhs)
3722 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003723{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003724 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003725 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003726
Ben Clayton713b8d32019-12-17 20:37:56 +00003727 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003728 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3729
3730 storeValue(replicate);
3731}
3732
3733RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3734{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003735 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003736 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003737 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003738 UInt4 result;
3739 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3740 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3741 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3742 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003743
Nicolas Capens157ba262019-12-10 17:49:14 -05003744 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003745 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003746 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003747 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003748 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003749 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003750}
Ben Clayton88816fa2019-05-15 17:08:14 +01003751
Nicolas Capens157ba262019-12-10 17:49:14 -05003752RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3753{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003754 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003755 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003756 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003757 UInt4 result;
3758 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3759 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3760 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3761 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003762
Nicolas Capens157ba262019-12-10 17:49:14 -05003763 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003764 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003765 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003766 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003767 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003768 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003769}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003770
Nicolas Capens157ba262019-12-10 17:49:14 -05003771RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3772{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003773 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003774 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003775}
3776
3777RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003780 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003781}
3782
3783RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3784{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003785 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003786 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003787}
3788
3789RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003792 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003793}
3794
3795RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003798 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003799}
3800
3801RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3802{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003803 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003804 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003805}
3806
3807RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3808{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003809 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003810 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003811 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003812 ::basicBlock->appendInst(cmp);
3813
3814 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003815 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003816 ::basicBlock->appendInst(select);
3817
3818 return RValue<UInt4>(V(result));
3819}
3820
3821RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3822{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003823 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003824 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003825 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003826 ::basicBlock->appendInst(cmp);
3827
3828 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003829 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003830 ::basicBlock->appendInst(select);
3831
3832 return RValue<UInt4>(V(result));
3833}
3834
Nicolas Capens519cf222020-05-08 15:27:19 -04003835Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003836{
3837 return T(Ice::IceType_v4i32);
3838}
3839
Nicolas Capens519cf222020-05-08 15:27:19 -04003840Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003841{
3842 return T(Ice::IceType_i16);
3843}
3844
3845RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3846{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003847 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003848 return 1.0f / x;
3849}
3850
3851RValue<Float> RcpSqrt_pp(RValue<Float> x)
3852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003854 return Rcp_pp(Sqrt(x));
3855}
3856
3857RValue<Float> Sqrt(RValue<Float> x)
3858{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003859 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003860 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003861 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003862 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3863 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003864 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003865 ::basicBlock->appendInst(sqrt);
3866
3867 return RValue<Float>(V(result));
3868}
3869
3870RValue<Float> Round(RValue<Float> x)
3871{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003872 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003873 return Float4(Round(Float4(x))).x;
3874}
3875
3876RValue<Float> Trunc(RValue<Float> x)
3877{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003878 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003879 return Float4(Trunc(Float4(x))).x;
3880}
3881
3882RValue<Float> Frac(RValue<Float> x)
3883{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003884 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003885 return Float4(Frac(Float4(x))).x;
3886}
3887
3888RValue<Float> Floor(RValue<Float> x)
3889{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003890 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003891 return Float4(Floor(Float4(x))).x;
3892}
3893
3894RValue<Float> Ceil(RValue<Float> x)
3895{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003896 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003897 return Float4(Ceil(Float4(x))).x;
3898}
3899
Nicolas Capens519cf222020-05-08 15:27:19 -04003900Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003901{
3902 return T(Ice::IceType_f32);
3903}
3904
Nicolas Capens519cf222020-05-08 15:27:19 -04003905Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003906{
3907 return T(Type_v2f32);
3908}
3909
Ben Clayton713b8d32019-12-17 20:37:56 +00003910Float4::Float4(RValue<Float> rhs)
3911 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003912{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003913 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003914 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003915
Ben Clayton713b8d32019-12-17 20:37:56 +00003916 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003917 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3918
3919 storeValue(replicate);
3920}
3921
3922RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3923{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003924 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003925 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003926 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003927 ::basicBlock->appendInst(cmp);
3928
3929 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003930 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003931 ::basicBlock->appendInst(select);
3932
3933 return RValue<Float4>(V(result));
3934}
3935
3936RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3937{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003938 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003939 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003940 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003941 ::basicBlock->appendInst(cmp);
3942
3943 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003944 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003945 ::basicBlock->appendInst(select);
3946
3947 return RValue<Float4>(V(result));
3948}
3949
3950RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3951{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003952 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003953 return Float4(1.0f) / x;
3954}
3955
3956RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3957{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003958 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003959 return Rcp_pp(Sqrt(x));
3960}
3961
Antonio Maioranod1561872020-12-14 14:03:53 -05003962bool HasRcpApprox()
3963{
3964 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3965 return false;
3966}
3967
3968RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
3969{
3970 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3971 UNREACHABLE("RValue<Float4> RcpApprox()");
3972 return { 0.0f };
3973}
3974
3975RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
3976{
3977 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3978 UNREACHABLE("RValue<Float> RcpApprox()");
3979 return { 0.0f };
3980}
3981
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05003982bool HasRcpSqrtApprox()
3983{
3984 return false;
3985}
3986
3987RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
3988{
3989 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3990 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
3991 return { 0.0f };
3992}
3993
3994RValue<Float> RcpSqrtApprox(RValue<Float> x)
3995{
3996 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3997 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
3998 return { 0.0f };
3999}
4000
Nicolas Capens157ba262019-12-10 17:49:14 -05004001RValue<Float4> Sqrt(RValue<Float4> x)
4002{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004003 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004004 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04004005 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004006 Float4 result;
4007 result.x = Sqrt(Float(Float4(x).x));
4008 result.y = Sqrt(Float(Float4(x).y));
4009 result.z = Sqrt(Float(Float4(x).z));
4010 result.w = Sqrt(Float(Float4(x).w));
4011
4012 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04004013 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004014 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04004015 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004016 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004017 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04004018 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4019 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004020 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04004021 ::basicBlock->appendInst(sqrt);
4022
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04004023 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04004024 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04004025}
Nicolas Capens157ba262019-12-10 17:49:14 -05004026
4027RValue<Int> SignMask(RValue<Float4> x)
4028{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004029 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004030 if(emulateIntrinsics || CPUID::ARM)
4031 {
4032 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
4033 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
4034 }
4035 else
4036 {
4037 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004038 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004039 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4040 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004041 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004042 ::basicBlock->appendInst(movmsk);
4043
4044 return RValue<Int>(V(result));
4045 }
4046}
4047
4048RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
4049{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004050 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004051 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004052}
4053
4054RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
4055{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004056 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004057 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004058}
4059
4060RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4061{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004062 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004063 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004064}
4065
4066RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4067{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004068 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004069 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004070}
4071
4072RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4073{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004074 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004075 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004076}
4077
4078RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4079{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004080 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004081 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004082}
4083
4084RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4085{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004086 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004087 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004088}
4089
4090RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4091{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004092 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004093 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004094}
4095
4096RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4097{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004098 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004099 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004100}
4101
4102RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4103{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004104 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004105 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004106}
4107
4108RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4109{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004110 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004111 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004112}
4113
4114RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4115{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004116 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004117 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004118}
4119
4120RValue<Float4> Round(RValue<Float4> x)
4121{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004122 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004123 if(emulateIntrinsics || CPUID::ARM)
4124 {
4125 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4126 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4127 }
4128 else if(CPUID::SSE4_1)
4129 {
4130 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004131 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004132 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4133 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004134 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004135 round->addArg(::context->getConstantInt32(0));
4136 ::basicBlock->appendInst(round);
4137
4138 return RValue<Float4>(V(result));
4139 }
4140 else
4141 {
4142 return Float4(RoundInt(x));
4143 }
4144}
4145
4146RValue<Float4> Trunc(RValue<Float4> x)
4147{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004148 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004149 if(CPUID::SSE4_1)
4150 {
4151 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004152 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004153 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4154 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004155 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004156 round->addArg(::context->getConstantInt32(3));
4157 ::basicBlock->appendInst(round);
4158
4159 return RValue<Float4>(V(result));
4160 }
4161 else
4162 {
4163 return Float4(Int4(x));
4164 }
4165}
4166
4167RValue<Float4> Frac(RValue<Float4> x)
4168{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004169 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004170 Float4 frc;
4171
4172 if(CPUID::SSE4_1)
4173 {
4174 frc = x - Floor(x);
4175 }
4176 else
4177 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004178 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004179
Ben Clayton713b8d32019-12-17 20:37:56 +00004180 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004181 }
4182
4183 // x - floor(x) can be 1.0 for very small negative x.
4184 // Clamp against the value just below 1.0.
4185 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4186}
4187
4188RValue<Float4> Floor(RValue<Float4> x)
4189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004190 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004191 if(CPUID::SSE4_1)
4192 {
4193 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004194 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004195 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4196 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004197 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004198 round->addArg(::context->getConstantInt32(1));
4199 ::basicBlock->appendInst(round);
4200
4201 return RValue<Float4>(V(result));
4202 }
4203 else
4204 {
4205 return x - Frac(x);
4206 }
4207}
4208
4209RValue<Float4> Ceil(RValue<Float4> x)
4210{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004211 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004212 if(CPUID::SSE4_1)
4213 {
4214 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004215 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004216 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4217 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004218 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004219 round->addArg(::context->getConstantInt32(2));
4220 ::basicBlock->appendInst(round);
4221
4222 return RValue<Float4>(V(result));
4223 }
4224 else
4225 {
4226 return -Floor(-x);
4227 }
4228}
4229
Nicolas Capens519cf222020-05-08 15:27:19 -04004230Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004231{
4232 return T(Ice::IceType_v4f32);
4233}
4234
4235RValue<Long> Ticks()
4236{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004237 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004238 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004239 return Long(Int(0));
4240}
4241
Ben Clayton713b8d32019-12-17 20:37:56 +00004242RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004243{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004244 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004245 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004246}
4247
Ben Clayton713b8d32019-12-17 20:37:56 +00004248RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004249{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004250 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004251 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004252}
4253
Ben Clayton713b8d32019-12-17 20:37:56 +00004254Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004255{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004256 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004257 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004258}
4259
4260void Breakpoint()
4261{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004262 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004263 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004264 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4265 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4266 ::basicBlock->appendInst(trap);
4267}
4268
Ben Clayton713b8d32019-12-17 20:37:56 +00004269void Nucleus::createFence(std::memory_order memoryOrder)
4270{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004271 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004272 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4273 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4274 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4275 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4276 inst->addArg(order);
4277 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004278}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004279
Ben Clayton713b8d32019-12-17 20:37:56 +00004280Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004282 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004283 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004284 return nullptr;
4285}
4286void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004288 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004289 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004290}
Nicolas Capens157ba262019-12-10 17:49:14 -05004291
4292RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004295 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4296}
4297
4298RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004301 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4302}
4303
4304void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004307 return emulated::Scatter(base, val, offsets, mask, alignment);
4308}
4309
4310void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004313 return emulated::Scatter(base, val, offsets, mask, alignment);
4314}
4315
4316RValue<Float> Exp2(RValue<Float> x)
4317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004319 return emulated::Exp2(x);
4320}
4321
4322RValue<Float> Log2(RValue<Float> x)
4323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004325 return emulated::Log2(x);
4326}
4327
4328RValue<Float4> Sin(RValue<Float4> x)
4329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004330 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004331 return optimal::Sin(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004332}
4333
4334RValue<Float4> Cos(RValue<Float4> x)
4335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004336 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004337 return optimal::Cos(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004338}
4339
4340RValue<Float4> Tan(RValue<Float4> x)
4341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004342 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004343 return optimal::Tan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004344}
4345
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004346RValue<Float4> Asin(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004348 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004349 if(p == Precision::Full)
4350 {
4351 return emulated::Asin(x);
4352 }
4353 return optimal::Asin_8_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004354}
4355
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004356RValue<Float4> Acos(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004357{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004358 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004359 // Surprisingly, deqp-vk's precision.acos.highp/mediump tests pass when using the 4-term polynomial approximation
4360 // version of acos, unlike for Asin, which requires higher precision algorithms.
4361 return optimal::Acos_4_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004362}
4363
4364RValue<Float4> Atan(RValue<Float4> x)
4365{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004366 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004367 return optimal::Atan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004368}
4369
4370RValue<Float4> Sinh(RValue<Float4> x)
4371{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004372 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004373 return optimal::Sinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004374}
4375
4376RValue<Float4> Cosh(RValue<Float4> x)
4377{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004378 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004379 return optimal::Cosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004380}
4381
4382RValue<Float4> Tanh(RValue<Float4> x)
4383{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004384 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004385 return optimal::Tanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004386}
4387
4388RValue<Float4> Asinh(RValue<Float4> x)
4389{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004390 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004391 return optimal::Asinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004392}
4393
4394RValue<Float4> Acosh(RValue<Float4> x)
4395{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004396 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004397 return optimal::Acosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004398}
4399
4400RValue<Float4> Atanh(RValue<Float4> x)
4401{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004402 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004403 return optimal::Atanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004404}
4405
4406RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4407{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004408 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004409 return optimal::Atan2(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004410}
4411
4412RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4413{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004414 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004415 return optimal::Pow(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004416}
4417
4418RValue<Float4> Exp(RValue<Float4> x)
4419{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004420 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004421 return optimal::Exp(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004422}
4423
4424RValue<Float4> Log(RValue<Float4> x)
4425{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004426 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004427 return optimal::Log(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004428}
4429
4430RValue<Float4> Exp2(RValue<Float4> x)
4431{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004432 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004433 return optimal::Exp2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004434}
4435
4436RValue<Float4> Log2(RValue<Float4> x)
4437{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004438 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004439 return optimal::Log2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004440}
4441
4442RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4443{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004444 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004445 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004446 {
Ben Claytonce54c592020-02-07 11:30:51 +00004447 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004448 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004449 }
4450 else
4451 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004452 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004453 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4454 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4455 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004456 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004457 ::basicBlock->appendInst(ctlz);
4458
4459 return RValue<UInt>(V(result));
4460 }
4461}
4462
4463RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4464{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004465 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004466 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004467 {
Ben Claytonce54c592020-02-07 11:30:51 +00004468 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004469 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004470 }
4471 else
4472 {
4473 // TODO: implement vectorized version in Subzero
4474 UInt4 result;
4475 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4476 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4477 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4478 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4479 return result;
4480 }
4481}
4482
4483RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4484{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004485 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004486 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004487 {
Ben Claytonce54c592020-02-07 11:30:51 +00004488 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004489 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004490 }
4491 else
4492 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004493 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004494 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4495 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4496 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004497 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004498 ::basicBlock->appendInst(ctlz);
4499
4500 return RValue<UInt>(V(result));
4501 }
4502}
4503
4504RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4505{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004506 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004507 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004508 {
Ben Claytonce54c592020-02-07 11:30:51 +00004509 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004510 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004511 }
4512 else
4513 {
4514 // TODO: implement vectorized version in Subzero
4515 UInt4 result;
4516 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4517 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4518 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4519 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4520 return result;
4521 }
4522}
4523
Antonio Maiorano370cba52019-12-31 11:36:07 -05004524RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4525{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004526 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004527 return emulated::MinAtomic(x, y, memoryOrder);
4528}
4529
4530RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4531{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004532 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004533 return emulated::MinAtomic(x, y, memoryOrder);
4534}
4535
4536RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4537{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004538 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004539 return emulated::MaxAtomic(x, y, memoryOrder);
4540}
4541
4542RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4543{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004544 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004545 return emulated::MaxAtomic(x, y, memoryOrder);
4546}
4547
Antonio Maioranoaae33732020-02-14 14:52:34 -05004548void EmitDebugLocation()
4549{
4550#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004551 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004552#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004553}
Ben Clayton713b8d32019-12-17 20:37:56 +00004554void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004555void FlushDebug() {}
4556
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004557namespace {
4558namespace coro {
4559
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004560// Instance data per generated coroutine
4561// This is the "handle" type used for Coroutine functions
4562// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4563struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004564{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004565 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004566 bool done = false; // the coroutine should stop at the next yield()
4567 bool terminated = false; // the coroutine has finished.
4568 bool inRoutine = false; // is the coroutine currently executing?
4569 marl::Scheduler::Fiber *mainFiber = nullptr;
4570 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004571 void *promisePtr = nullptr;
4572};
4573
4574CoroutineData *createCoroutineData()
4575{
4576 return new CoroutineData{};
4577}
4578
4579void destroyCoroutineData(CoroutineData *coroData)
4580{
4581 delete coroData;
4582}
4583
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004584// suspend() pauses execution of the coroutine, and resumes execution from the
4585// caller's call to await().
4586// Returns true if await() is called again, or false if coroutine_destroy()
4587// is called.
4588bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004589{
Ben Claytonc3466532020-03-24 11:54:05 +00004590 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4591 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4592 ASSERT(coroData->inRoutine);
4593 coroData->inRoutine = false;
4594 coroData->mainFiber->notify();
4595 while(!coroData->inRoutine)
4596 {
4597 coroData->routineFiber->wait();
4598 }
4599 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004600}
4601
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004602// resume() is called by await(), blocking until the coroutine calls yield()
4603// or the coroutine terminates.
4604void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004605{
Ben Claytonc3466532020-03-24 11:54:05 +00004606 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4607 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4608 ASSERT(!coroData->inRoutine);
4609 coroData->inRoutine = true;
4610 coroData->routineFiber->notify();
4611 while(coroData->inRoutine)
4612 {
4613 coroData->mainFiber->wait();
4614 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004615}
4616
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004617// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4618// until the coroutine ends, and deletes the coroutine data.
4619void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004620{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004621 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004622 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4623 ASSERT(!coroData->inRoutine);
4624 if(!coroData->terminated)
4625 {
4626 coroData->done = true;
4627 coroData->inRoutine = true;
4628 coroData->routineFiber->notify();
4629 while(!coroData->terminated)
4630 {
4631 coroData->mainFiber->wait();
4632 }
4633 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004634 if(coroData->useInternalScheduler)
4635 {
4636 ::getOrCreateScheduler().unbind();
4637 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004638 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004639}
4640
4641namespace detail {
4642thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4643} // namespace detail
4644
4645void setHandleParam(Nucleus::CoroutineHandle handle)
4646{
4647 ASSERT(!detail::coroHandle);
4648 detail::coroHandle = handle;
4649}
4650
4651Nucleus::CoroutineHandle getHandleParam()
4652{
4653 ASSERT(detail::coroHandle);
4654 auto handle = detail::coroHandle;
4655 detail::coroHandle = {};
4656 return handle;
4657}
4658
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004659bool isDone(Nucleus::CoroutineHandle handle)
4660{
4661 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004662 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004663}
4664
4665void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4666{
4667 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4668 coroData->promisePtr = promisePtr;
4669}
4670
4671void *getPromisePtr(Nucleus::CoroutineHandle handle)
4672{
4673 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4674 return coroData->promisePtr;
4675}
4676
4677} // namespace coro
4678} // namespace
4679
4680// Used to generate coroutines.
4681// Lifetime: from yield to acquireCoroutine
4682class CoroutineGenerator
4683{
4684public:
4685 CoroutineGenerator()
4686 {
4687 }
4688
4689 // Inserts instructions at the top of the current function to make it a coroutine.
4690 void generateCoroutineBegin()
4691 {
4692 // Begin building the main coroutine_begin() function.
4693 // We insert these instructions at the top of the entry node,
4694 // before existing reactor-generated instructions.
4695
4696 // CoroutineHandle coroutine_begin(<Arguments>)
4697 // {
4698 // this->handle = coro::getHandleParam();
4699 //
4700 // YieldType promise;
4701 // coro::setPromisePtr(handle, &promise); // For await
4702 //
4703 // ... <REACTOR CODE> ...
4704 //
4705
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004706 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004707 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004708
4709 // YieldType promise;
4710 // coro::setPromisePtr(handle, &promise); // For await
4711 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004712 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004713 }
4714
4715 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4716 void generateYield(Value *val)
4717 {
4718 // ... <REACTOR CODE> ...
4719 //
4720 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004721 // if (!coro::suspend(handle)) {
4722 // return false; // coroutine has been stopped by the caller.
4723 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004724 //
4725 // ... <REACTOR CODE> ...
4726
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004727 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004728 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004729
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004730 // if (!coro::suspend(handle)) {
4731 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4732 auto doneBlock = Nucleus::createBasicBlock();
4733 auto resumeBlock = Nucleus::createBasicBlock();
4734 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4735
4736 // return false; // coroutine has been stopped by the caller.
4737 ::basicBlock = doneBlock;
4738 Nucleus::createRetVoid(); // coroutine return value is ignored.
4739
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004740 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004741 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004742 }
4743
4744 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4745
4746 // Generates the await function for the current coroutine.
4747 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4748 static FunctionUniquePtr generateAwaitFunction()
4749 {
4750 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4751 // {
4752 // if (coro::isDone())
4753 // {
4754 // return false;
4755 // }
4756 // else // resume
4757 // {
4758 // YieldType* promise = coro::getPromisePtr(handle);
4759 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004760 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004761 // return true;
4762 // }
4763 // }
4764
4765 // Subzero doesn't support bool types (IceType_i1) as return type
4766 const Ice::Type ReturnType = Ice::IceType_i32;
4767 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4768 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4769
4770 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4771 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4772
4773 Ice::Variable *handle = awaitFunc->getArgs()[0];
4774 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4775
4776 auto doneBlock = awaitFunc->makeNode();
4777 {
4778 // return false;
4779 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4780 doneBlock->appendInst(ret);
4781 }
4782
4783 auto resumeBlock = awaitFunc->makeNode();
4784 {
4785 // YieldType* promise = coro::getPromisePtr(handle);
4786 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4787
4788 // *out = *promise;
4789 // Load promise value
4790 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4791 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4792 resumeBlock->appendInst(load);
4793 // Then store it in output param
4794 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4795 resumeBlock->appendInst(store);
4796
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004797 // coro::resume(handle);
4798 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004799
4800 // return true;
4801 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4802 resumeBlock->appendInst(ret);
4803 }
4804
4805 // if (coro::isDone())
4806 // {
4807 // <doneBlock>
4808 // }
4809 // else // resume
4810 // {
4811 // <resumeBlock>
4812 // }
4813 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004814 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004815 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4816 bb->appendInst(br);
4817
4818 return FunctionUniquePtr{ awaitFunc };
4819 }
4820
4821 // Generates the destroy function for the current coroutine.
4822 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4823 static FunctionUniquePtr generateDestroyFunction()
4824 {
4825 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4826 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004827 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004828 // return;
4829 // }
4830
4831 const Ice::Type ReturnType = Ice::IceType_void;
4832 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4833
4834 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4835 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4836
4837 Ice::Variable *handle = destroyFunc->getArgs()[0];
4838
4839 auto *bb = destroyFunc->getEntryNode();
4840
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004841 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4842 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004843
4844 // return;
4845 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4846 bb->appendInst(ret);
4847
4848 return FunctionUniquePtr{ destroyFunc };
4849 }
4850
4851private:
4852 Ice::Variable *handle{};
4853 Ice::Variable *promise{};
4854};
4855
4856static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4857{
4858 // This doubles up as our coroutine handle
4859 auto coroData = coro::createCoroutineData();
4860
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004861 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4862 if(coroData->useInternalScheduler)
4863 {
4864 ::getOrCreateScheduler().bind();
4865 }
4866
Ben Clayton76e9e532020-03-16 20:35:04 +00004867 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004868 // Store handle in TLS so that the coroutine can grab it right away, before
4869 // any fiber switch occurs.
4870 coro::setHandleParam(coroData);
4871
Ben Claytonc3466532020-03-24 11:54:05 +00004872 ASSERT(!coroData->routineFiber);
4873 coroData->routineFiber = marl::Scheduler::Fiber::current();
4874
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004875 beginFunc();
4876
Ben Claytonc3466532020-03-24 11:54:05 +00004877 ASSERT(coroData->inRoutine);
4878 coroData->done = true; // coroutine is done.
4879 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4880 coroData->inRoutine = false;
4881 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004882 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004883
Ben Claytonc3466532020-03-24 11:54:05 +00004884 ASSERT(!coroData->mainFiber);
4885 coroData->mainFiber = marl::Scheduler::Fiber::current();
4886
4887 // block until the first yield or coroutine end
4888 ASSERT(!coroData->inRoutine);
4889 coroData->inRoutine = true;
4890 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4891 while(coroData->inRoutine)
4892 {
4893 coroData->mainFiber->wait();
4894 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004895
4896 return coroData;
4897}
4898
4899void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4900{
4901 // Start by creating a regular function
4902 createFunction(yieldType, params);
4903
4904 // Save in case yield() is called
4905 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4906 ::coroYieldType = yieldType;
4907}
4908
4909void Nucleus::yield(Value *val)
4910{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004911 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004912 Variable::materializeAll();
4913
4914 // On first yield, we start generating coroutine functions
4915 if(!::coroGen)
4916 {
4917 ::coroGen = std::make_shared<CoroutineGenerator>();
4918 ::coroGen->generateCoroutineBegin();
4919 }
4920
4921 ASSERT(::coroGen);
4922 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004923}
4924
Ben Clayton713b8d32019-12-17 20:37:56 +00004925static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4926{
4927 return false;
4928}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004929
4930static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4931{
4932}
Nicolas Capens157ba262019-12-10 17:49:14 -05004933
4934std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4935{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004936 if(::coroGen)
4937 {
4938 // Finish generating coroutine functions
4939 {
4940 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004941 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004942 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004943
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004944 auto awaitFunc = ::coroGen->generateAwaitFunction();
4945 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004946
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004947 // At this point, we no longer need the CoroutineGenerator.
4948 ::coroGen.reset();
4949 ::coroYieldType = nullptr;
4950
4951 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4952 { name, "await", "destroy" },
4953 cfgEdit);
4954
4955 return routine;
4956 }
4957 else
4958 {
4959 {
4960 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004961 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004962 }
4963
4964 ::coroYieldType = nullptr;
4965
4966 // Not an actual coroutine (no yields), so return stubs for await and destroy
4967 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4968
4969 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4970 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4971 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4972 return routine;
4973 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004974}
4975
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004976Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004977{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004978 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4979
4980 if(isCoroutine)
4981 {
4982 return rr::invokeCoroutineBegin(func);
4983 }
4984 else
4985 {
4986 // For regular routines, just invoke the begin func directly
4987 return func();
4988 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004989}
Nicolas Capens157ba262019-12-10 17:49:14 -05004990
4991} // namespace rr