blob: 9a83795d4b85994064f5ce5abd2e4f3f57330bf1 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050017#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000018#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050019#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040020
Nicolas Capens1a3ce872018-10-10 10:42:36 -040021#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000022#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040023
Nicolas Capens598f8d82016-09-26 15:09:10 -040024#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000027#include "src/IceELFStreamer.h"
28#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040029#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031
Ben Clayton713b8d32019-12-17 20:37:56 +000032#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040033#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040035
Antonio Maiorano8bce0672020-02-28 13:13:45 -050036#include "marl/event.h"
37
Nicolas Capens6a990f82018-07-06 15:54:07 -040038#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040041
Nicolas Capensbd65da92017-01-05 16:31:06 -050042#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000043# ifndef WIN32_LEAN_AND_MEAN
44# define WIN32_LEAN_AND_MEAN
45# endif // !WIN32_LEAN_AND_MEAN
46# ifndef NOMINMAX
47# define NOMINMAX
48# endif // !NOMINMAX
49# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050050#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Ben Clayton683bad82020-02-10 23:57:09 +000052#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040053#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000054#include <limits>
55#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040056
Antonio Maiorano02a39532020-01-21 15:15:34 -050057// Subzero utility functions
58// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050059namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050060namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061void replaceEntryNode(Ice::Cfg *function, Ice::CfgNode *newEntryNode)
62{
63 ASSERT_MSG(function->getEntryNode() != nullptr, "Function should have an entry node");
64
65 if(function->getEntryNode() == newEntryNode)
66 {
67 return;
68 }
69
70 // Make this the new entry node
71 function->setEntryNode(newEntryNode);
72
73 // Reorder nodes so that new entry block comes first. This is required
74 // by Cfg::renumberInstructions, which expects the first node in the list
75 // to be the entry node.
76 {
77 auto nodes = function->getNodes();
78
79 // TODO(amaiorano): Fast path if newEntryNode is last? Can avoid linear search.
80
81 auto iter = std::find(nodes.begin(), nodes.end(), newEntryNode);
82 ASSERT_MSG(iter != nodes.end(), "New node should be in the function's node list");
83
84 nodes.erase(iter);
85 nodes.insert(nodes.begin(), newEntryNode);
86
87 // swapNodes replaces its nodes with the input one, and renumbers them,
88 // so our new entry node will be 0, and the previous will be 1.
89 function->swapNodes(nodes);
90 }
91}
92
93Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
94{
95 uint32_t sequenceNumber = 0;
96 auto function = Ice::Cfg::create(context, sequenceNumber).release();
97
98 Ice::CfgLocalAllocatorScope allocScope{ function };
99
100 for(auto type : paramTypes)
101 {
102 Ice::Variable *arg = function->makeVariable(type);
103 function->addArg(arg);
104 }
105
106 Ice::CfgNode *node = function->makeNode();
107 function->setEntryNode(node);
108
109 return function;
110}
111
112Ice::Type getPointerType(Ice::Type elementType)
113{
114 if(sizeof(void *) == 8)
115 {
116 return Ice::IceType_i64;
117 }
118 else
119 {
120 return Ice::IceType_i32;
121 }
122}
123
124Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
125{
126 int typeSize = Ice::typeWidthInBytes(type);
127 int totalSize = typeSize * (arraySize ? arraySize : 1);
128
129 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
130 auto address = function->makeVariable(getPointerType(type));
131 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
132 function->getEntryNode()->getInsts().push_front(alloca);
133
134 return address;
135}
136
137Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500138{
139 if(sizeof(void *) == 8)
140 {
141 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
142 }
143 else
144 {
145 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
146 }
147}
148
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400149// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
150Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500151
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400152// Wrapper for calls on C functions with Ice types
153Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
154{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500155 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400156
157 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
158 // then truncate result to bool.
159 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
160 const bool returningBool = (retTy == Ice::IceType_i1);
161 if(returningBool)
162 {
163 ret = function->makeVariable(Ice::IceType_i32);
164 }
165 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500166 {
167 ret = function->makeVariable(retTy);
168 }
169
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400170 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500171 for(auto arg : iceArgs)
172 {
173 call->addArg(arg);
174 }
175
176 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400177
178 if(returningBool)
179 {
180 // Truncate result to bool so that if any (lsb) bits were set, result will be true
181 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
182 }
183
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500184 return ret;
185}
186
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400187Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
188{
189 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
190 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
191}
192
Antonio Maiorano62427e02020-02-13 09:18:05 -0500193// Wrapper for calls on C functions with Ice types
194template<typename Return, typename... CArgs, typename... RArgs>
195Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
196{
197 Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
198 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500199 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500200}
201
Antonio Maiorano02a39532020-01-21 15:15:34 -0500202// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500203Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500204{
205 Ice::Variable *result = function->makeVariable(v->getType());
206 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
207 basicBlock->appendInst(cast);
208 return result;
209}
210
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400211Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
212{
213 Ice::Variable *to = function->makeVariable(toType);
214 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
215 basicBlock->appendInst(cast);
216 return to;
217}
218
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500219Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500220{
221 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
222 // absolute address. We circumvent this by casting to a non-const variable, and loading
223 // from that.
224 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
225 {
226 ptr = sz::createUnconstCast(function, basicBlock, cptr);
227 }
228
229 Ice::Variable *result = function->makeVariable(type);
230 auto load = Ice::InstLoad::create(function, result, ptr, align);
231 basicBlock->appendInst(load);
232
233 return result;
234}
235
236} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500237} // namespace
238
Ben Clayton713b8d32019-12-17 20:37:56 +0000239namespace rr {
240class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500241class CoroutineGenerator;
242} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500243
244namespace {
245
246// Default configuration settings. Must be accessed under mutex lock.
247std::mutex defaultConfigLock;
248rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000249{
Nicolas Capens157ba262019-12-10 17:49:14 -0500250 // This uses a static in a function to avoid the cost of a global static
251 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
252 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000253 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500254 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000255}
256
Nicolas Capens157ba262019-12-10 17:49:14 -0500257Ice::GlobalContext *context = nullptr;
258Ice::Cfg *function = nullptr;
259Ice::CfgNode *basicBlock = nullptr;
260Ice::CfgLocalAllocatorScope *allocator = nullptr;
261rr::ELFMemoryStreamer *routine = nullptr;
262
263std::mutex codegenMutex;
264
265Ice::ELFFileStreamer *elfFile = nullptr;
266Ice::Fdstream *out = nullptr;
267
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500268// Coroutine globals
269rr::Type *coroYieldType = nullptr;
270std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500271marl::Scheduler &getOrCreateScheduler()
272{
273 static auto scheduler = [] {
274 auto s = std::make_unique<marl::Scheduler>();
275 s->setWorkerThreadCount(8);
276 return s;
277 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500278
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500279 return *scheduler;
280}
Nicolas Capens157ba262019-12-10 17:49:14 -0500281} // Anonymous namespace
282
283namespace {
284
285#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000286# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500287#endif
288
Ben Clayton713b8d32019-12-17 20:37:56 +0000289#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
290# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500291#endif
292
Antonio Maiorano370cba52019-12-31 11:36:07 -0500293Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400294{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500295 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100296 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500297 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000298 case rr::Optimization::Level::None: return Ice::Opt_m1;
299 case rr::Optimization::Level::Less: return Ice::Opt_m1;
300 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500301 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
302 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100303 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500304 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400305}
306
Antonio Maiorano370cba52019-12-31 11:36:07 -0500307Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
308{
309 switch(memoryOrder)
310 {
311 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
312 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
313 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
314 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
315 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
316 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
317 }
318 return Ice::Intrinsics::MemoryOrderInvalid;
319}
320
Nicolas Capens157ba262019-12-10 17:49:14 -0500321class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500322{
Nicolas Capens157ba262019-12-10 17:49:14 -0500323public:
324 const static bool ARM;
325 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400326
Nicolas Capens157ba262019-12-10 17:49:14 -0500327private:
328 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100329 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000330#if defined(__i386__) || defined(__x86_64__)
331# if defined(_WIN32)
332 __cpuid(registers, info);
333# else
334 __asm volatile("cpuid"
335 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
336 : "a"(info));
337# endif
338#else
339 registers[0] = 0;
340 registers[1] = 0;
341 registers[2] = 0;
342 registers[3] = 0;
343#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100344 }
345
Nicolas Capens157ba262019-12-10 17:49:14 -0500346 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500347 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000348#if defined(__arm__) || defined(__aarch64__)
349 return true;
350#elif defined(__i386__) || defined(__x86_64__)
351 return false;
352#elif defined(__mips__)
353 return false;
354#else
355# error "Unknown architecture"
356#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500357 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500358
Nicolas Capens157ba262019-12-10 17:49:14 -0500359 static bool detectSSE4_1()
360 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000361#if defined(__i386__) || defined(__x86_64__)
362 int registers[4];
363 cpuid(registers, 1);
364 return (registers[2] & 0x00080000) != 0;
365#else
366 return false;
367#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500368 }
369};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500370
Nicolas Capens157ba262019-12-10 17:49:14 -0500371const bool CPUID::ARM = CPUID::detectARM();
372const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
373const bool emulateIntrinsics = false;
374const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400375
Nicolas Capens157ba262019-12-10 17:49:14 -0500376constexpr bool subzeroDumpEnabled = false;
377constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500378
379#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500380static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
381static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500382#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500383
384} // anonymous namespace
385
386namespace rr {
387
Antonio Maioranoab210f92019-12-13 16:26:24 -0500388std::string BackendName()
389{
390 return "Subzero";
391}
392
Ben Clayton713b8d32019-12-17 20:37:56 +0000393const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500394 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500395};
396
397enum EmulatedType
398{
399 EmulatedShift = 16,
400 EmulatedV2 = 2 << EmulatedShift,
401 EmulatedV4 = 4 << EmulatedShift,
402 EmulatedV8 = 8 << EmulatedShift,
403 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
404
405 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
406 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
407 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000408 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
409 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500410 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
411};
412
Ben Clayton713b8d32019-12-17 20:37:56 +0000413class Value : public Ice::Operand
414{};
415class SwitchCases : public Ice::InstSwitch
416{};
417class BasicBlock : public Ice::CfgNode
418{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500419
420Ice::Type T(Type *t)
421{
422 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
423 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500424}
425
Nicolas Capens157ba262019-12-10 17:49:14 -0500426Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400427{
Ben Clayton713b8d32019-12-17 20:37:56 +0000428 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500429}
430
431Type *T(EmulatedType t)
432{
Ben Clayton713b8d32019-12-17 20:37:56 +0000433 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500434}
435
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500436std::vector<Ice::Type> T(const std::vector<Type *> &types)
437{
438 std::vector<Ice::Type> result;
439 result.reserve(types.size());
440 for(auto &t : types)
441 {
442 result.push_back(T(t));
443 }
444 return result;
445}
446
Nicolas Capens157ba262019-12-10 17:49:14 -0500447Value *V(Ice::Operand *v)
448{
Ben Clayton713b8d32019-12-17 20:37:56 +0000449 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500450}
451
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500452Ice::Operand *V(Value *v)
453{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500454 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500455}
456
Antonio Maiorano62427e02020-02-13 09:18:05 -0500457std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
458{
459 std::vector<Ice::Operand *> result;
460 result.reserve(values.size());
461 for(auto &v : values)
462 {
463 result.push_back(V(v));
464 }
465 return result;
466}
467
Nicolas Capens157ba262019-12-10 17:49:14 -0500468BasicBlock *B(Ice::CfgNode *b)
469{
Ben Clayton713b8d32019-12-17 20:37:56 +0000470 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500471}
472
473static size_t typeSize(Type *type)
474{
475 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400476 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500477 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800478 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000479 case Type_v2i32: return 8;
480 case Type_v4i16: return 8;
481 case Type_v2i16: return 4;
482 case Type_v8i8: return 8;
483 case Type_v4i8: return 4;
484 case Type_v2f32: return 8;
485 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500486 }
487 }
488
489 return Ice::typeWidthInBytes(T(type));
490}
491
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500492static void createRetVoidIfNoRet()
493{
494 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
495 {
496 Nucleus::createRetVoid();
497 }
498}
499
Ben Clayton713b8d32019-12-17 20:37:56 +0000500using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
501using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500502
503inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
504{
Ben Clayton713b8d32019-12-17 20:37:56 +0000505 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500506}
507
508inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
509{
510 return &sectionHeader(elfHeader)[index];
511}
512
513static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
514{
515 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
516
517 uint32_t index = relocation.getSymbol();
518 int table = relocationTable.sh_link;
519 void *symbolValue = nullptr;
520
521 if(index != SHN_UNDEF)
522 {
523 if(table == SHN_UNDEF) return nullptr;
524 const SectionHeader *symbolTable = elfSection(elfHeader, table);
525
526 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
527 if(index >= symtab_entries)
528 {
529 ASSERT(index < symtab_entries && "Symbol Index out of range");
530 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800531 }
532
Nicolas Capens157ba262019-12-10 17:49:14 -0500533 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000534 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500535 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800536
Nicolas Capens157ba262019-12-10 17:49:14 -0500537 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400538 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500539 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000540 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400541 }
542 else
543 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500544 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400545 }
Nicolas Capens66478362016-10-13 15:36:36 -0400546 }
547
Nicolas Capens157ba262019-12-10 17:49:14 -0500548 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000549 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500550
551 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400552 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400553 switch(relocation.getType())
554 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000555 case R_ARM_NONE:
556 // No relocation
557 break;
558 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500559 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000560 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500561 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
562 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
563 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400564 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000565 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500566 {
567 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
568 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
569 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400570 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000571 default:
572 ASSERT(false && "Unsupported relocation type");
573 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400574 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500575 }
576 else
577 {
578 switch(relocation.getType())
579 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000580 case R_386_NONE:
581 // No relocation
582 break;
583 case R_386_32:
584 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
585 break;
586 case R_386_PC32:
587 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
588 break;
589 default:
590 ASSERT(false && "Unsupported relocation type");
591 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500592 }
Nicolas Capens66478362016-10-13 15:36:36 -0400593 }
594
Nicolas Capens157ba262019-12-10 17:49:14 -0500595 return symbolValue;
596}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400597
Nicolas Capens157ba262019-12-10 17:49:14 -0500598static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
599{
600 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
601
602 uint32_t index = relocation.getSymbol();
603 int table = relocationTable.sh_link;
604 void *symbolValue = nullptr;
605
606 if(index != SHN_UNDEF)
607 {
608 if(table == SHN_UNDEF) return nullptr;
609 const SectionHeader *symbolTable = elfSection(elfHeader, table);
610
611 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
612 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400613 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500614 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400615 return nullptr;
616 }
617
Nicolas Capens157ba262019-12-10 17:49:14 -0500618 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000619 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500620 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400621
Nicolas Capens157ba262019-12-10 17:49:14 -0500622 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400623 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500624 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000625 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500626 }
627 else
628 {
629 return nullptr;
630 }
631 }
Nicolas Capens66478362016-10-13 15:36:36 -0400632
Nicolas Capens157ba262019-12-10 17:49:14 -0500633 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000634 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
635 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400636
Nicolas Capens157ba262019-12-10 17:49:14 -0500637 switch(relocation.getType())
638 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000639 case R_X86_64_NONE:
640 // No relocation
641 break;
642 case R_X86_64_64:
643 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
644 break;
645 case R_X86_64_PC32:
646 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
647 break;
648 case R_X86_64_32S:
649 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
650 break;
651 default:
652 ASSERT(false && "Unsupported relocation type");
653 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500654 }
655
656 return symbolValue;
657}
658
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500659void *loadImage(uint8_t *const elfImage, size_t &codeSize, const char *functionName = nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -0500660{
Ben Clayton713b8d32019-12-17 20:37:56 +0000661 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500662
663 if(!elfHeader->checkMagic())
664 {
665 return nullptr;
666 }
667
668 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000669 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
670#if defined(__i386__)
671 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
672#elif defined(__x86_64__)
673 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
674#elif defined(__arm__)
675 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
676#elif defined(__aarch64__)
677 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
678#elif defined(__mips__)
679 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
680#else
681# error "Unsupported platform"
682#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500683
Ben Clayton713b8d32019-12-17 20:37:56 +0000684 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500685 void *entry = nullptr;
686
687 for(int i = 0; i < elfHeader->e_shnum; i++)
688 {
689 if(sectionHeader[i].sh_type == SHT_PROGBITS)
690 {
691 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
692 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500693 auto getCurrSectionName = [&]() {
694 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
695 return reinterpret_cast<const char *>(elfImage + sectionNameOffset);
696 };
697 if(functionName && strstr(getCurrSectionName(), functionName) == nullptr)
698 {
699 continue;
700 }
701
Nicolas Capens157ba262019-12-10 17:49:14 -0500702 entry = elfImage + sectionHeader[i].sh_offset;
703 codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400704 }
705 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500706 else if(sectionHeader[i].sh_type == SHT_REL)
707 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000708 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400709
Nicolas Capens157ba262019-12-10 17:49:14 -0500710 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
711 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000712 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500713 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
714 }
715 }
716 else if(sectionHeader[i].sh_type == SHT_RELA)
717 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000718 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500719
720 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
721 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000722 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500723 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
724 }
725 }
726 }
727
728 return entry;
729}
730
731template<typename T>
732struct ExecutableAllocator
733{
734 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000735 template<class U>
736 ExecutableAllocator(const ExecutableAllocator<U> &other)
737 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500738
739 using value_type = T;
740 using size_type = std::size_t;
741
742 T *allocate(size_type n)
743 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000744 return (T *)allocateMemoryPages(
745 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500746 }
747
748 void deallocate(T *p, size_type n)
749 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800750 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500751 }
752};
753
754class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
755{
756 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
757 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
758
759public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000760 ELFMemoryStreamer()
761 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500762 {
763 position = 0;
764 buffer.reserve(0x1000);
765 }
766
767 ~ELFMemoryStreamer() override
768 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500769 }
770
771 void write8(uint8_t Value) override
772 {
773 if(position == (uint64_t)buffer.size())
774 {
775 buffer.push_back(Value);
776 position++;
777 }
778 else if(position < (uint64_t)buffer.size())
779 {
780 buffer[position] = Value;
781 position++;
782 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000783 else
784 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500785 }
786
787 void writeBytes(llvm::StringRef Bytes) override
788 {
789 std::size_t oldSize = buffer.size();
790 buffer.resize(oldSize + Bytes.size());
791 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
792 position += Bytes.size();
793 }
794
795 uint64_t tell() const override { return position; }
796
797 void seek(uint64_t Off) override { position = Off; }
798
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500799 const void *getEntryByName(const char *name)
Nicolas Capens157ba262019-12-10 17:49:14 -0500800 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500801 size_t codeSize = 0;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500802 const void *entry = loadImage(&buffer[0], codeSize, name);
Nicolas Capens157ba262019-12-10 17:49:14 -0500803
804#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500805 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
806#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000807 __builtin___clear_cache((char *)entry, (char *)entry + codeSize);
Nicolas Capens157ba262019-12-10 17:49:14 -0500808#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500809
Nicolas Capens598f8d82016-09-26 15:09:10 -0400810 return entry;
811 }
812
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500813 void finalize()
814 {
815 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
816
817 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
818 }
819
Ben Clayton713b8d32019-12-17 20:37:56 +0000820 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400821 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500822 ASSERT(func);
823 funcs[index] = func;
824 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400825
Nicolas Capens157ba262019-12-10 17:49:14 -0500826 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400827 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500828 ASSERT(funcs[index]);
829 return funcs[index];
830 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400831
Antonio Maiorano02a39532020-01-21 15:15:34 -0500832 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500833 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500834 // TODO(b/148086935): Replace with a buffer allocator.
835 size_t space = size + alignment;
836 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
837 void *ptr = buf.get();
838 void *alignedPtr = std::align(alignment, size, ptr, space);
839 ASSERT(alignedPtr);
840 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500841 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500842 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500843 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400844
Nicolas Capens157ba262019-12-10 17:49:14 -0500845private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000846 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500847 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
848 std::size_t position;
849 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500850};
851
Antonio Maiorano62427e02020-02-13 09:18:05 -0500852#ifdef ENABLE_RR_PRINT
853void VPrintf(const std::vector<Value *> &vals)
854{
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500855 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(::printf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500856}
857#endif // ENABLE_RR_PRINT
858
Nicolas Capens157ba262019-12-10 17:49:14 -0500859Nucleus::Nucleus()
860{
Ben Clayton713b8d32019-12-17 20:37:56 +0000861 ::codegenMutex.lock(); // Reactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500862
863 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
864 Ice::ClFlags::getParsedClFlags(Flags);
865
Ben Clayton713b8d32019-12-17 20:37:56 +0000866#if defined(__arm__)
867 Flags.setTargetArch(Ice::Target_ARM32);
868 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
869#elif defined(__mips__)
870 Flags.setTargetArch(Ice::Target_MIPS32);
871 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
872#else // x86
873 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
874 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
875#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500876 Flags.setOutFileType(Ice::FT_Elf);
877 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
878 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
879 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
880 Flags.setDisableHybridAssembly(true);
881
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500882 // Emit functions into separate sections in the ELF so we can find them by name
883 Flags.setFunctionSections(true);
884
Nicolas Capens157ba262019-12-10 17:49:14 -0500885 static llvm::raw_os_ostream cout(std::cout);
886 static llvm::raw_os_ostream cerr(std::cerr);
887
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500888 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500889 {
890 // Decorate text asm with liveness info
891 Flags.setDecorateAsm(true);
892 }
893
Ben Clayton713b8d32019-12-17 20:37:56 +0000894 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500895 {
896 std::error_code errorCode;
897 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
898 ::elfFile = new Ice::ELFFileStreamer(*out);
899 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
900 }
901 else
902 {
903 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
904 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
905 ::routine = elfMemory;
906 }
907}
908
909Nucleus::~Nucleus()
910{
911 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500912 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500913
914 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500915 ::allocator = nullptr;
916
Nicolas Capens157ba262019-12-10 17:49:14 -0500917 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500918 ::function = nullptr;
919
Nicolas Capens157ba262019-12-10 17:49:14 -0500920 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500921 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500922
923 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500924 ::elfFile = nullptr;
925
Nicolas Capens157ba262019-12-10 17:49:14 -0500926 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500927 ::out = nullptr;
928
929 ::basicBlock = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500930
931 ::codegenMutex.unlock();
932}
933
934void Nucleus::setDefaultConfig(const Config &cfg)
935{
936 std::unique_lock<std::mutex> lock(::defaultConfigLock);
937 ::defaultConfig() = cfg;
938}
939
940void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
941{
942 std::unique_lock<std::mutex> lock(::defaultConfigLock);
943 auto &config = ::defaultConfig();
944 config = cfgEdit.apply(config);
945}
946
947Config Nucleus::getDefaultConfig()
948{
949 std::unique_lock<std::mutex> lock(::defaultConfigLock);
950 return ::defaultConfig();
951}
952
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500953// This function lowers and produces executable binary code in memory for the input functions,
954// and returns a Routine with the entry points to these functions.
955template<size_t Count>
956static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500957{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500958 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
959 // and GlobalContext::emitItems.
960
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500961 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500962 {
963 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500964 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500965 }
966
967 ::context->emitFileHeader();
968
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500969 // Translate
970
971 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500972 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500973 Ice::Cfg *currFunc = functions[i];
974
975 // Install function allocator in TLS for Cfg-specific container allocators
976 Ice::CfgLocalAllocatorScope allocScope(currFunc);
977
978 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
979
980 rr::optimize(currFunc);
981
982 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500983 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500984
985 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500986 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500987
988 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
989
990 if(subzeroEmitTextAsm)
991 {
992 currFunc->emit();
993 }
994
995 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -0500996 }
997
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500998 // Emit items
999
1000 ::context->lowerGlobals("");
1001
Nicolas Capens157ba262019-12-10 17:49:14 -05001002 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001003
1004 for(size_t i = 0; i < Count; ++i)
1005 {
1006 Ice::Cfg *currFunc = functions[i];
1007
1008 // Accumulate globals from functions to emit into the "last" section at the end
1009 auto globals = currFunc->getGlobalInits();
1010 if(globals && !globals->empty())
1011 {
1012 ::context->getGlobals()->merge(globals.get());
1013 }
1014
1015 auto assembler = currFunc->releaseAssembler();
1016 assembler->alignFunction();
1017 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1018 }
1019
Nicolas Capens157ba262019-12-10 17:49:14 -05001020 ::context->lowerGlobals("last");
1021 ::context->lowerConstants();
1022 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001023
Nicolas Capens157ba262019-12-10 17:49:14 -05001024 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001025 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001026 objectWriter->writeNonUserSections();
1027
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001028 // Done compiling functions, get entry pointers to each of them
1029 for(size_t i = 0; i < Count; ++i)
1030 {
1031 const void *entry = ::routine->getEntryByName(names[i]);
1032 ::routine->setEntry(i, entry);
1033 }
1034
1035 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001036
1037 Routine *handoffRoutine = ::routine;
1038 ::routine = nullptr;
1039
1040 return std::shared_ptr<Routine>(handoffRoutine);
1041}
1042
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001043std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1044{
1045 createRetVoidIfNoRet();
1046 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1047}
1048
Nicolas Capens157ba262019-12-10 17:49:14 -05001049Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1050{
1051 Ice::Type type = T(t);
1052 int typeSize = Ice::typeWidthInBytes(type);
1053 int totalSize = typeSize * (arraySize ? arraySize : 1);
1054
1055 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1056 auto address = ::function->makeVariable(T(getPointerType(t)));
1057 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1058 ::function->getEntryNode()->getInsts().push_front(alloca);
1059
1060 return V(address);
1061}
1062
1063BasicBlock *Nucleus::createBasicBlock()
1064{
1065 return B(::function->makeNode());
1066}
1067
1068BasicBlock *Nucleus::getInsertBlock()
1069{
1070 return B(::basicBlock);
1071}
1072
1073void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1074{
Ben Clayton713b8d32019-12-17 20:37:56 +00001075 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001076
1077 Variable::materializeAll();
1078
1079 ::basicBlock = basicBlock;
1080}
1081
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001082void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001083{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001084 ASSERT(::function == nullptr);
1085 ASSERT(::allocator == nullptr);
1086 ASSERT(::basicBlock == nullptr);
1087
1088 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1089
1090 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1091 // becomes invalid if another one is created; for example, when creating await and destroy functions
1092 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1093 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001094 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1095
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001096 ::basicBlock = ::function->getEntryNode();
Nicolas Capens157ba262019-12-10 17:49:14 -05001097}
1098
1099Value *Nucleus::getArgument(unsigned int index)
1100{
1101 return V(::function->getArgs()[index]);
1102}
1103
1104void Nucleus::createRetVoid()
1105{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001106 RR_DEBUG_INFO_UPDATE_LOC();
1107
Nicolas Capens157ba262019-12-10 17:49:14 -05001108 // Code generated after this point is unreachable, so any variables
1109 // being read can safely return an undefined value. We have to avoid
1110 // materializing variables after the terminator ret instruction.
1111 Variable::killUnmaterialized();
1112
1113 Ice::InstRet *ret = Ice::InstRet::create(::function);
1114 ::basicBlock->appendInst(ret);
1115}
1116
1117void Nucleus::createRet(Value *v)
1118{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001119 RR_DEBUG_INFO_UPDATE_LOC();
1120
Nicolas Capens157ba262019-12-10 17:49:14 -05001121 // Code generated after this point is unreachable, so any variables
1122 // being read can safely return an undefined value. We have to avoid
1123 // materializing variables after the terminator ret instruction.
1124 Variable::killUnmaterialized();
1125
1126 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1127 ::basicBlock->appendInst(ret);
1128}
1129
1130void Nucleus::createBr(BasicBlock *dest)
1131{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001132 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001133 Variable::materializeAll();
1134
1135 auto br = Ice::InstBr::create(::function, dest);
1136 ::basicBlock->appendInst(br);
1137}
1138
1139void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1140{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001141 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001142 Variable::materializeAll();
1143
1144 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1145 ::basicBlock->appendInst(br);
1146}
1147
1148static bool isCommutative(Ice::InstArithmetic::OpKind op)
1149{
1150 switch(op)
1151 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001152 case Ice::InstArithmetic::Add:
1153 case Ice::InstArithmetic::Fadd:
1154 case Ice::InstArithmetic::Mul:
1155 case Ice::InstArithmetic::Fmul:
1156 case Ice::InstArithmetic::And:
1157 case Ice::InstArithmetic::Or:
1158 case Ice::InstArithmetic::Xor:
1159 return true;
1160 default:
1161 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001162 }
1163}
1164
1165static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1166{
1167 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1168
1169 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1170
1171 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1172 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1173 ::basicBlock->appendInst(arithmetic);
1174
1175 return V(result);
1176}
1177
1178Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1179{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001180 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001181 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1182}
1183
1184Value *Nucleus::createSub(Value *lhs, Value *rhs)
1185{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001186 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001187 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1188}
1189
1190Value *Nucleus::createMul(Value *lhs, Value *rhs)
1191{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001192 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001193 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1194}
1195
1196Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1197{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001198 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001199 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1200}
1201
1202Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1203{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001204 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001205 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1206}
1207
1208Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1209{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001210 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001211 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1212}
1213
1214Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1215{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001216 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001217 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1218}
1219
1220Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1221{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001222 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001223 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1224}
1225
1226Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1227{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001228 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001229 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1230}
1231
1232Value *Nucleus::createURem(Value *lhs, Value *rhs)
1233{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001234 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001235 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1236}
1237
1238Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1239{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001240 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001241 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1242}
1243
1244Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1245{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001246 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001247 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1248 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001249 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001250 return nullptr;
1251}
1252
1253RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1254{
1255 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001256}
1257
1258Value *Nucleus::createShl(Value *lhs, Value *rhs)
1259{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001260 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001261 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1262}
1263
1264Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1265{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001266 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001267 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1268}
1269
1270Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1271{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001272 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001273 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1274}
1275
1276Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1277{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001278 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001279 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1280}
1281
1282Value *Nucleus::createOr(Value *lhs, Value *rhs)
1283{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001284 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001285 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1286}
1287
1288Value *Nucleus::createXor(Value *lhs, Value *rhs)
1289{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001290 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001291 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1292}
1293
1294Value *Nucleus::createNeg(Value *v)
1295{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001296 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001297 return createSub(createNullValue(T(v->getType())), v);
1298}
1299
1300Value *Nucleus::createFNeg(Value *v)
1301{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001302 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001303 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1304 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001305
1306 return createFSub(negativeZero, v);
1307}
1308
1309Value *Nucleus::createNot(Value *v)
1310{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001311 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001312 if(Ice::isScalarIntegerType(v->getType()))
1313 {
1314 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1315 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001316 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001317 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001318 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001319 return createXor(v, createConstantVector(c, T(v->getType())));
1320 }
1321}
1322
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001323static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1324{
1325#if defined(__i386__) || defined(__x86_64__)
1326 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1327 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1328 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1329#else
1330 if(atomic)
1331 {
1332 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1333 }
1334 if(memoryOrder != std::memory_order_relaxed)
1335 {
1336 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1337 }
1338#endif
1339
1340 // Vulkan doesn't allow sequential memory order
1341 ASSERT(memoryOrder != std::memory_order_seq_cst);
1342}
1343
Nicolas Capens157ba262019-12-10 17:49:14 -05001344Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1345{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001346 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001347 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001348
1349 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001350 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001351
Ben Clayton713b8d32019-12-17 20:37:56 +00001352 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001353 {
1354 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001355 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001356 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001357 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001358 auto pointer = RValue<Pointer<Byte>>(ptr);
1359 Int x = *Pointer<Int>(pointer);
1360
1361 Int4 vector;
1362 vector = Insert(vector, x, 0);
1363
Antonio Maiorano02a39532020-01-21 15:15:34 -05001364 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001365 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1366 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001367 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001368 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001369 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001370 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001371 auto pointer = RValue<Pointer<Byte>>(ptr);
1372 Int x = *Pointer<Int>(pointer);
1373 Int y = *Pointer<Int>(pointer + 4);
1374
1375 Int4 vector;
1376 vector = Insert(vector, x, 0);
1377 vector = Insert(vector, y, 1);
1378
Antonio Maiorano02a39532020-01-21 15:15:34 -05001379 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001380 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1381 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001382 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001383 else
1384 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001385 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001386 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001387 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001388 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001389 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001390 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001391 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1392 load->addArg(ptr);
1393 load->addArg(::context->getConstantInt32(typeSize(type)));
1394 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001395 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001396 }
1397 else
1398 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001399 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001400 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001401
Antonio Maiorano02a39532020-01-21 15:15:34 -05001402 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001403 return V(result);
1404}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001405
Nicolas Capens157ba262019-12-10 17:49:14 -05001406Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1407{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001408 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001409 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001410
Ben Clayton713b8d32019-12-17 20:37:56 +00001411#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001412 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001413 if(align != 0)
1414 {
1415 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1416 call->addArg(ptr);
1417 call->addArg(::context->getConstantInt64(typeSize(type)));
1418 ::basicBlock->appendInst(call);
1419 }
1420#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001421
Nicolas Capens157ba262019-12-10 17:49:14 -05001422 int valueType = (int)reinterpret_cast<intptr_t>(type);
1423
Ben Clayton713b8d32019-12-17 20:37:56 +00001424 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001425 {
1426 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001427 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001428 if(typeSize(type) == 4)
1429 {
1430 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1431 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1432 ::basicBlock->appendInst(bitcast);
1433
1434 RValue<Int4> v(V(vector));
1435
1436 auto pointer = RValue<Pointer<Byte>>(ptr);
1437 Int x = Extract(v, 0);
1438 *Pointer<Int>(pointer) = x;
1439 }
1440 else if(typeSize(type) == 8)
1441 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001442 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001443 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1444 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1445 ::basicBlock->appendInst(bitcast);
1446
1447 RValue<Int4> v(V(vector));
1448
1449 auto pointer = RValue<Pointer<Byte>>(ptr);
1450 Int x = Extract(v, 0);
1451 *Pointer<Int>(pointer) = x;
1452 Int y = Extract(v, 1);
1453 *Pointer<Int>(pointer + 4) = y;
1454 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001455 else
1456 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001457 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001458 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001459 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001460 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001461 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1462 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1463 store->addArg(value);
1464 store->addArg(ptr);
1465 store->addArg(::context->getConstantInt32(typeSize(type)));
1466 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001467 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001468 }
1469 else
1470 {
1471 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001472
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001473 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001474 ::basicBlock->appendInst(store);
1475 }
1476
1477 return value;
1478}
1479
1480Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1481{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001482 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001483 ASSERT(index->getType() == Ice::IceType_i32);
1484
1485 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1486 {
1487 int32_t offset = constant->getValue() * (int)typeSize(type);
1488
1489 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001490 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001491 return ptr;
1492 }
1493
Nicolas Capens157ba262019-12-10 17:49:14 -05001494 return createAdd(ptr, createConstantInt(offset));
1495 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001496
Nicolas Capens157ba262019-12-10 17:49:14 -05001497 if(!Ice::isByteSizedType(T(type)))
1498 {
1499 index = createMul(index, createConstantInt((int)typeSize(type)));
1500 }
1501
Ben Clayton713b8d32019-12-17 20:37:56 +00001502 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001503 {
1504 if(unsignedIndex)
1505 {
1506 index = createZExt(index, T(Ice::IceType_i64));
1507 }
1508 else
1509 {
1510 index = createSExt(index, T(Ice::IceType_i64));
1511 }
1512 }
1513
1514 return createAdd(ptr, index);
1515}
1516
Antonio Maiorano370cba52019-12-31 11:36:07 -05001517static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1518{
1519 Ice::Variable *result = ::function->makeVariable(value->getType());
1520
1521 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1522 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1523 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1524 auto op = ::context->getConstantInt32(rmwOp);
1525 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1526 inst->addArg(op);
1527 inst->addArg(ptr);
1528 inst->addArg(value);
1529 inst->addArg(order);
1530 ::basicBlock->appendInst(inst);
1531
1532 return V(result);
1533}
1534
Nicolas Capens157ba262019-12-10 17:49:14 -05001535Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1536{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001537 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001538 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001539}
1540
1541Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1542{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001543 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001544 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001545}
1546
1547Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1548{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001549 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001550 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001551}
1552
1553Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1554{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001555 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001556 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001557}
1558
1559Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1560{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001561 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001562 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001563}
1564
1565Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1566{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001567 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001568 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001569}
1570
1571Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1572{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001573 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001574 Ice::Variable *result = ::function->makeVariable(value->getType());
1575
1576 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1577 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1578 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1579 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1580 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1581 inst->addArg(ptr);
1582 inst->addArg(compare);
1583 inst->addArg(value);
1584 inst->addArg(orderEq);
1585 inst->addArg(orderNeq);
1586 ::basicBlock->appendInst(inst);
1587
1588 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001589}
1590
1591static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1592{
1593 if(v->getType() == T(destType))
1594 {
1595 return v;
1596 }
1597
1598 Ice::Variable *result = ::function->makeVariable(T(destType));
1599 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1600 ::basicBlock->appendInst(cast);
1601
1602 return V(result);
1603}
1604
1605Value *Nucleus::createTrunc(Value *v, Type *destType)
1606{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001607 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001608 return createCast(Ice::InstCast::Trunc, v, destType);
1609}
1610
1611Value *Nucleus::createZExt(Value *v, Type *destType)
1612{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001613 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001614 return createCast(Ice::InstCast::Zext, v, destType);
1615}
1616
1617Value *Nucleus::createSExt(Value *v, Type *destType)
1618{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001619 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001620 return createCast(Ice::InstCast::Sext, v, destType);
1621}
1622
1623Value *Nucleus::createFPToUI(Value *v, Type *destType)
1624{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001625 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001626 return createCast(Ice::InstCast::Fptoui, v, destType);
1627}
1628
1629Value *Nucleus::createFPToSI(Value *v, Type *destType)
1630{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001631 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001632 return createCast(Ice::InstCast::Fptosi, v, destType);
1633}
1634
1635Value *Nucleus::createSIToFP(Value *v, Type *destType)
1636{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001637 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001638 return createCast(Ice::InstCast::Sitofp, v, destType);
1639}
1640
1641Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1642{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001643 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001644 return createCast(Ice::InstCast::Fptrunc, v, destType);
1645}
1646
1647Value *Nucleus::createFPExt(Value *v, Type *destType)
1648{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001649 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001650 return createCast(Ice::InstCast::Fpext, v, destType);
1651}
1652
1653Value *Nucleus::createBitCast(Value *v, Type *destType)
1654{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001655 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001656 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1657 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1658 // emulate them by writing to the stack and reading back as the destination type.
1659 if(emulateMismatchedBitCast)
1660 {
1661 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1662 {
1663 Value *address = allocateStackVariable(destType);
1664 createStore(v, address, T(v->getType()));
1665 return createLoad(address, destType);
1666 }
1667 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1668 {
1669 Value *address = allocateStackVariable(T(v->getType()));
1670 createStore(v, address, T(v->getType()));
1671 return createLoad(address, destType);
1672 }
1673 }
1674
1675 return createCast(Ice::InstCast::Bitcast, v, destType);
1676}
1677
1678static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1679{
1680 ASSERT(lhs->getType() == rhs->getType());
1681
1682 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1683 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1684 ::basicBlock->appendInst(cmp);
1685
1686 return V(result);
1687}
1688
1689Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1690{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001691 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001692 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1693}
1694
1695Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1696{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001697 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001698 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1699}
1700
1701Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1702{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001703 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001704 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1705}
1706
1707Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1708{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001709 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001710 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1711}
1712
1713Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1714{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001715 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001716 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1717}
1718
1719Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1720{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001721 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001722 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1723}
1724
1725Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1726{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001727 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001728 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1729}
1730
1731Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1732{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001733 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001734 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1735}
1736
1737Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1738{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001739 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001740 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1741}
1742
1743Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1744{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001745 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001746 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1747}
1748
1749Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1750{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001751 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001752 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1753}
1754
1755static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1756{
1757 ASSERT(lhs->getType() == rhs->getType());
1758 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1759
1760 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1761 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1762 ::basicBlock->appendInst(cmp);
1763
1764 return V(result);
1765}
1766
1767Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1768{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001769 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001770 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1771}
1772
1773Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1774{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001775 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001776 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1777}
1778
1779Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1780{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001781 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001782 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1783}
1784
1785Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1786{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001787 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001788 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1789}
1790
1791Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1792{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001793 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001794 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1795}
1796
1797Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1798{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001799 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001800 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1801}
1802
1803Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1804{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001805 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001806 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1807}
1808
1809Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1810{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001811 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001812 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1813}
1814
1815Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1816{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001817 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001818 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1819}
1820
1821Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1822{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001823 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001824 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1825}
1826
1827Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1828{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001829 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001830 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1831}
1832
1833Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1834{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001835 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001836 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1837}
1838
1839Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1840{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001841 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001842 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1843}
1844
1845Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1846{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001847 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001848 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1849}
1850
1851Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001854 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001855 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001856 ::basicBlock->appendInst(extract);
1857
1858 return V(result);
1859}
1860
1861Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001864 auto result = ::function->makeVariable(vector->getType());
1865 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1866 ::basicBlock->appendInst(insert);
1867
1868 return V(result);
1869}
1870
1871Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1872{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001873 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001874 ASSERT(V1->getType() == V2->getType());
1875
1876 int size = Ice::typeNumElements(V1->getType());
1877 auto result = ::function->makeVariable(V1->getType());
1878 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1879
1880 for(int i = 0; i < size; i++)
1881 {
1882 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1883 }
1884
1885 ::basicBlock->appendInst(shuffle);
1886
1887 return V(result);
1888}
1889
1890Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1891{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001892 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001893 ASSERT(ifTrue->getType() == ifFalse->getType());
1894
1895 auto result = ::function->makeVariable(ifTrue->getType());
1896 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1897 ::basicBlock->appendInst(select);
1898
1899 return V(result);
1900}
1901
1902SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1903{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001904 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001905 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1906 ::basicBlock->appendInst(switchInst);
1907
Ben Clayton713b8d32019-12-17 20:37:56 +00001908 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001909}
1910
1911void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1912{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001913 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001914 switchCases->addBranch(label, label, branch);
1915}
1916
1917void Nucleus::createUnreachable()
1918{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001919 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001920 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1921 ::basicBlock->appendInst(unreachable);
1922}
1923
Antonio Maiorano62427e02020-02-13 09:18:05 -05001924Type *Nucleus::getType(Value *value)
1925{
1926 return T(V(value)->getType());
1927}
1928
1929Type *Nucleus::getContainedType(Type *vectorType)
1930{
1931 Ice::Type vecTy = T(vectorType);
1932 switch(vecTy)
1933 {
1934 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1935 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1936 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1937 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1938 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1939 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1940 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1941 default:
1942 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1943 return {};
1944 }
1945}
1946
Nicolas Capens157ba262019-12-10 17:49:14 -05001947Type *Nucleus::getPointerType(Type *ElementType)
1948{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001949 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001950}
1951
Antonio Maiorano62427e02020-02-13 09:18:05 -05001952static constexpr Ice::Type getNaturalIntType()
1953{
1954 constexpr size_t intSize = sizeof(int);
1955 static_assert(intSize == 4 || intSize == 8, "");
1956 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1957}
1958
1959Type *Nucleus::getPrintfStorageType(Type *valueType)
1960{
1961 Ice::Type valueTy = T(valueType);
1962 switch(valueTy)
1963 {
1964 case Ice::IceType_i32:
1965 return T(getNaturalIntType());
1966
1967 case Ice::IceType_f32:
1968 return T(Ice::IceType_f64);
1969
1970 default:
1971 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1972 return {};
1973 }
1974}
1975
Nicolas Capens157ba262019-12-10 17:49:14 -05001976Value *Nucleus::createNullValue(Type *Ty)
1977{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001978 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001979 if(Ice::isVectorType(T(Ty)))
1980 {
1981 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001982 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001983 return createConstantVector(c, Ty);
1984 }
1985 else
1986 {
1987 return V(::context->getConstantZero(T(Ty)));
1988 }
1989}
1990
1991Value *Nucleus::createConstantLong(int64_t i)
1992{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001993 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001994 return V(::context->getConstantInt64(i));
1995}
1996
1997Value *Nucleus::createConstantInt(int i)
1998{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001999 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002000 return V(::context->getConstantInt32(i));
2001}
2002
2003Value *Nucleus::createConstantInt(unsigned int i)
2004{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002005 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002006 return V(::context->getConstantInt32(i));
2007}
2008
2009Value *Nucleus::createConstantBool(bool b)
2010{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002011 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002012 return V(::context->getConstantInt1(b));
2013}
2014
2015Value *Nucleus::createConstantByte(signed char i)
2016{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002017 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002018 return V(::context->getConstantInt8(i));
2019}
2020
2021Value *Nucleus::createConstantByte(unsigned char i)
2022{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002023 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002024 return V(::context->getConstantInt8(i));
2025}
2026
2027Value *Nucleus::createConstantShort(short i)
2028{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002029 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002030 return V(::context->getConstantInt16(i));
2031}
2032
2033Value *Nucleus::createConstantShort(unsigned short i)
2034{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002035 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002036 return V(::context->getConstantInt16(i));
2037}
2038
2039Value *Nucleus::createConstantFloat(float x)
2040{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002041 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002042 return V(::context->getConstantFloat(x));
2043}
2044
2045Value *Nucleus::createNullPointer(Type *Ty)
2046{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002047 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002048 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002049}
2050
Antonio Maiorano02a39532020-01-21 15:15:34 -05002051static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2052{
2053 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2054}
2055
Nicolas Capens157ba262019-12-10 17:49:14 -05002056Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2057{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002058 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002059 const int vectorSize = 16;
2060 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2061 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002062
2063 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002064 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002065
Antonio Maioranoa0957112020-03-04 15:06:19 -05002066 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002067 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002068
2069 switch((int)reinterpret_cast<intptr_t>(type))
2070 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002071 case Ice::IceType_v4i32:
2072 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002073 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002074 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002075 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002076 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002077 }
2078 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002079 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002080 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002081 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002082 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002083 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002084 }
2085 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002086 case Ice::IceType_v8i16:
2087 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002088 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002089 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002090 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002091 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002092 }
2093 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002094 case Ice::IceType_v16i8:
2095 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002096 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002097 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002098 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002099 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002100 }
2101 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002102 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002103 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002104 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002105 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002106 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002107 }
2108 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002109 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002110 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002111 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002112 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002113 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002114 }
2115 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002116 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002117 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002118 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002119 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002120 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002121 }
2122 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002123 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002124 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002125 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002126 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002127 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002128 }
2129 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002130 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002131 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002132 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002133 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002134 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002135 }
2136 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002137 default:
2138 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002139 }
2140
Antonio Maiorano02a39532020-01-21 15:15:34 -05002141 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002142
Antonio Maiorano02a39532020-01-21 15:15:34 -05002143 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002144 return V(result);
2145}
2146
2147Value *Nucleus::createConstantVector(const double *constants, Type *type)
2148{
Ben Clayton713b8d32019-12-17 20:37:56 +00002149 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002150}
2151
Antonio Maiorano62427e02020-02-13 09:18:05 -05002152Value *Nucleus::createConstantString(const char *v)
2153{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002154 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002155 return V(IceConstantData(v, strlen(v) + 1));
2156}
2157
Nicolas Capens157ba262019-12-10 17:49:14 -05002158Type *Void::getType()
2159{
2160 return T(Ice::IceType_void);
2161}
2162
2163Type *Bool::getType()
2164{
2165 return T(Ice::IceType_i1);
2166}
2167
2168Type *Byte::getType()
2169{
2170 return T(Ice::IceType_i8);
2171}
2172
2173Type *SByte::getType()
2174{
2175 return T(Ice::IceType_i8);
2176}
2177
2178Type *Short::getType()
2179{
2180 return T(Ice::IceType_i16);
2181}
2182
2183Type *UShort::getType()
2184{
2185 return T(Ice::IceType_i16);
2186}
2187
2188Type *Byte4::getType()
2189{
2190 return T(Type_v4i8);
2191}
2192
2193Type *SByte4::getType()
2194{
2195 return T(Type_v4i8);
2196}
2197
Ben Clayton713b8d32019-12-17 20:37:56 +00002198namespace {
2199RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002200{
Ben Clayton713b8d32019-12-17 20:37:56 +00002201 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002202}
2203
Ben Clayton713b8d32019-12-17 20:37:56 +00002204RValue<Byte> Extract(RValue<Byte8> val, int i)
2205{
2206 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
2207}
2208
2209RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2210{
2211 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
2212}
2213} // namespace
2214
Nicolas Capens157ba262019-12-10 17:49:14 -05002215RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2216{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002217 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002218 if(emulateIntrinsics)
2219 {
2220 Byte8 result;
2221 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2222 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2223 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2224 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2225 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2226 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2227 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2228 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2229
2230 return result;
2231 }
2232 else
2233 {
2234 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002235 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002236 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2237 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2238 paddusb->addArg(x.value);
2239 paddusb->addArg(y.value);
2240 ::basicBlock->appendInst(paddusb);
2241
2242 return RValue<Byte8>(V(result));
2243 }
2244}
2245
2246RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2247{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002248 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002249 if(emulateIntrinsics)
2250 {
2251 Byte8 result;
2252 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2253 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2254 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2255 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2256 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2257 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2258 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2259 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2260
2261 return result;
2262 }
2263 else
2264 {
2265 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002266 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002267 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2268 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2269 psubusw->addArg(x.value);
2270 psubusw->addArg(y.value);
2271 ::basicBlock->appendInst(psubusw);
2272
2273 return RValue<Byte8>(V(result));
2274 }
2275}
2276
2277RValue<SByte> Extract(RValue<SByte8> val, int i)
2278{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002279 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002280 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
2281}
2282
2283RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002285 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002286 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2287}
2288
2289RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002292 if(emulateIntrinsics)
2293 {
2294 SByte8 result;
2295 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2296 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2297 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2298 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2299 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2300 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2301 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2302 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2303
2304 return result;
2305 }
2306 else
2307 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002308#if defined(__i386__) || defined(__x86_64__)
2309 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2310 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2311 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002312
Ben Clayton713b8d32019-12-17 20:37:56 +00002313 return As<SByte8>(hi | lo);
2314#else
2315 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2316#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002317 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002318}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002319
Nicolas Capens157ba262019-12-10 17:49:14 -05002320RValue<Int> SignMask(RValue<Byte8> x)
2321{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002322 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002323 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002324 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002325 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2326 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002327 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002328 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002329 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002330 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002331 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002332 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2333 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2334 movmsk->addArg(x.value);
2335 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002336
Nicolas Capens157ba262019-12-10 17:49:14 -05002337 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002338 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002339}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002340
2341// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2342// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05002343// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002344// }
2345
Nicolas Capens157ba262019-12-10 17:49:14 -05002346RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002348 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002349 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2350}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002351
Nicolas Capens157ba262019-12-10 17:49:14 -05002352Type *Byte8::getType()
2353{
2354 return T(Type_v8i8);
2355}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002356
Nicolas Capens598f8d82016-09-26 15:09:10 -04002357// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2358// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002359// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002360// }
2361
2362// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2363// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002364// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002365// }
2366
Nicolas Capens157ba262019-12-10 17:49:14 -05002367RValue<SByte> SaturateSigned(RValue<Short> x)
2368{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002369 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002370 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2371}
2372
2373RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2374{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002375 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002376 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002377 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002378 SByte8 result;
2379 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2380 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2381 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2382 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2383 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2384 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2385 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2386 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002387
Nicolas Capens157ba262019-12-10 17:49:14 -05002388 return result;
2389 }
2390 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002391 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002392 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002393 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002394 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2395 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2396 paddsb->addArg(x.value);
2397 paddsb->addArg(y.value);
2398 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002399
Nicolas Capens157ba262019-12-10 17:49:14 -05002400 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002401 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002402}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002403
Nicolas Capens157ba262019-12-10 17:49:14 -05002404RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2405{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002406 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002407 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002408 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002409 SByte8 result;
2410 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2411 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2412 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2413 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2414 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2415 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2416 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2417 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002418
Nicolas Capens157ba262019-12-10 17:49:14 -05002419 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002420 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002421 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002422 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002423 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002424 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002425 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2426 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2427 psubsb->addArg(x.value);
2428 psubsb->addArg(y.value);
2429 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002430
Nicolas Capens157ba262019-12-10 17:49:14 -05002431 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002432 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002433}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002434
Nicolas Capens157ba262019-12-10 17:49:14 -05002435RValue<Int> SignMask(RValue<SByte8> x)
2436{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002437 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002438 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002439 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002440 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2441 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002442 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002443 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002444 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002445 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002446 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002447 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2448 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2449 movmsk->addArg(x.value);
2450 ::basicBlock->appendInst(movmsk);
2451
2452 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002453 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002454}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002455
Nicolas Capens157ba262019-12-10 17:49:14 -05002456RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2457{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002458 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002459 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2460}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002461
Nicolas Capens157ba262019-12-10 17:49:14 -05002462RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2463{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002464 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002465 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2466}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002467
Nicolas Capens157ba262019-12-10 17:49:14 -05002468Type *SByte8::getType()
2469{
2470 return T(Type_v8i8);
2471}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002472
Nicolas Capens157ba262019-12-10 17:49:14 -05002473Type *Byte16::getType()
2474{
2475 return T(Ice::IceType_v16i8);
2476}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002477
Nicolas Capens157ba262019-12-10 17:49:14 -05002478Type *SByte16::getType()
2479{
2480 return T(Ice::IceType_v16i8);
2481}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002482
Nicolas Capens157ba262019-12-10 17:49:14 -05002483Type *Short2::getType()
2484{
2485 return T(Type_v2i16);
2486}
Nicolas Capensd4227962016-11-09 14:24:25 -05002487
Nicolas Capens157ba262019-12-10 17:49:14 -05002488Type *UShort2::getType()
2489{
2490 return T(Type_v2i16);
2491}
Nicolas Capensd4227962016-11-09 14:24:25 -05002492
Nicolas Capens157ba262019-12-10 17:49:14 -05002493Short4::Short4(RValue<Int4> cast)
2494{
Ben Clayton713b8d32019-12-17 20:37:56 +00002495 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002496 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2497 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2498
2499 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
2500 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2501
2502 storeValue(short4);
2503}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002504
2505// Short4::Short4(RValue<Float> cast)
2506// {
2507// }
2508
Nicolas Capens157ba262019-12-10 17:49:14 -05002509Short4::Short4(RValue<Float4> cast)
2510{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002511 // TODO(b/150791192): Generalize and optimize
2512 auto smin = std::numeric_limits<short>::min();
2513 auto smax = std::numeric_limits<short>::max();
2514 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002515}
2516
2517RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2518{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002519 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002520 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002521 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002522 Short4 result;
2523 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2524 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2525 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2526 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002527
2528 return result;
2529 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002530 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002531 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002532 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2533 }
2534}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002535
Nicolas Capens157ba262019-12-10 17:49:14 -05002536RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2537{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002538 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002539 if(emulateIntrinsics)
2540 {
2541 Short4 result;
2542 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2543 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2544 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2545 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2546
2547 return result;
2548 }
2549 else
2550 {
2551 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2552 }
2553}
2554
2555RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2556{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002557 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002558 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2559 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2560 ::basicBlock->appendInst(cmp);
2561
2562 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2563 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2564 ::basicBlock->appendInst(select);
2565
2566 return RValue<Short4>(V(result));
2567}
2568
2569RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2570{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002571 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002572 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2573 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2574 ::basicBlock->appendInst(cmp);
2575
2576 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2577 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2578 ::basicBlock->appendInst(select);
2579
2580 return RValue<Short4>(V(result));
2581}
2582
2583RValue<Short> SaturateSigned(RValue<Int> x)
2584{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002585 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002586 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2587}
2588
2589RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2590{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002591 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002592 if(emulateIntrinsics)
2593 {
2594 Short4 result;
2595 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2596 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2597 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2598 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2599
2600 return result;
2601 }
2602 else
2603 {
2604 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002605 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002606 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2607 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2608 paddsw->addArg(x.value);
2609 paddsw->addArg(y.value);
2610 ::basicBlock->appendInst(paddsw);
2611
2612 return RValue<Short4>(V(result));
2613 }
2614}
2615
2616RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2617{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002618 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002619 if(emulateIntrinsics)
2620 {
2621 Short4 result;
2622 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2623 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2624 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2625 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2626
2627 return result;
2628 }
2629 else
2630 {
2631 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002632 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002633 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2634 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2635 psubsw->addArg(x.value);
2636 psubsw->addArg(y.value);
2637 ::basicBlock->appendInst(psubsw);
2638
2639 return RValue<Short4>(V(result));
2640 }
2641}
2642
2643RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2644{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002645 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002646 if(emulateIntrinsics)
2647 {
2648 Short4 result;
2649 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2650 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2651 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2652 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2653
2654 return result;
2655 }
2656 else
2657 {
2658 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002659 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002660 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2661 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2662 pmulhw->addArg(x.value);
2663 pmulhw->addArg(y.value);
2664 ::basicBlock->appendInst(pmulhw);
2665
2666 return RValue<Short4>(V(result));
2667 }
2668}
2669
2670RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2671{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002672 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002673 if(emulateIntrinsics)
2674 {
2675 Int2 result;
2676 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2677 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2678
2679 return result;
2680 }
2681 else
2682 {
2683 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002684 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002685 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2686 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2687 pmaddwd->addArg(x.value);
2688 pmaddwd->addArg(y.value);
2689 ::basicBlock->appendInst(pmaddwd);
2690
2691 return As<Int2>(V(result));
2692 }
2693}
2694
2695RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2696{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002697 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002698 if(emulateIntrinsics)
2699 {
2700 SByte8 result;
2701 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2702 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2703 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2704 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2705 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2706 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2707 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2708 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2709
2710 return result;
2711 }
2712 else
2713 {
2714 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002715 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002716 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2717 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2718 pack->addArg(x.value);
2719 pack->addArg(y.value);
2720 ::basicBlock->appendInst(pack);
2721
2722 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2723 }
2724}
2725
2726RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2727{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002728 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002729 if(emulateIntrinsics)
2730 {
2731 Byte8 result;
2732 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2733 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2734 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2735 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2736 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2737 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2738 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2739 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2740
2741 return result;
2742 }
2743 else
2744 {
2745 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002746 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002747 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2748 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2749 pack->addArg(x.value);
2750 pack->addArg(y.value);
2751 ::basicBlock->appendInst(pack);
2752
2753 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2754 }
2755}
2756
2757RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2758{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002759 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002760 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2761}
2762
2763RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2764{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002765 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002766 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2767}
2768
2769Type *Short4::getType()
2770{
2771 return T(Type_v4i16);
2772}
2773
2774UShort4::UShort4(RValue<Float4> cast, bool saturate)
2775{
2776 if(saturate)
2777 {
2778 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002779 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002780 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2781 // PackUnsigned takes care of 0x0000 saturation.
2782 Int4 int4(Min(cast, Float4(0xFFFF)));
2783 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002784 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002785 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002786 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002787 // ARM saturates the 32-bit integer result on overflow/undeflow.
2788 Int4 int4(cast);
2789 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002790 }
2791 else
2792 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002793 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002794 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002795 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002796 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002797 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002798 *this = Short4(Int4(cast));
2799 }
2800}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002801
Nicolas Capens157ba262019-12-10 17:49:14 -05002802RValue<UShort> Extract(RValue<UShort4> val, int i)
2803{
2804 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2805}
2806
2807RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2808{
2809 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2810}
2811
2812RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2813{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002814 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002815 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002816
Nicolas Capens157ba262019-12-10 17:49:14 -05002817 {
2818 UShort4 result;
2819 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2820 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2821 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2822 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2823
2824 return result;
2825 }
2826 else
2827 {
2828 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2829 }
2830}
2831
2832RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2833{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002834 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002835 if(emulateIntrinsics)
2836 {
2837 UShort4 result;
2838 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2839 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2840 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2841 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2842
2843 return result;
2844 }
2845 else
2846 {
2847 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2848 }
2849}
2850
2851RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002854 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2855 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2856 ::basicBlock->appendInst(cmp);
2857
2858 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2859 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2860 ::basicBlock->appendInst(select);
2861
2862 return RValue<UShort4>(V(result));
2863}
2864
2865RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2866{
2867 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2868 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2869 ::basicBlock->appendInst(cmp);
2870
2871 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2872 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2873 ::basicBlock->appendInst(select);
2874
2875 return RValue<UShort4>(V(result));
2876}
2877
2878RValue<UShort> SaturateUnsigned(RValue<Int> x)
2879{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002880 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002881 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2882}
2883
2884RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2885{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002886 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002887 if(emulateIntrinsics)
2888 {
2889 UShort4 result;
2890 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2891 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2892 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2893 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2894
2895 return result;
2896 }
2897 else
2898 {
2899 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002900 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002901 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2902 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2903 paddusw->addArg(x.value);
2904 paddusw->addArg(y.value);
2905 ::basicBlock->appendInst(paddusw);
2906
2907 return RValue<UShort4>(V(result));
2908 }
2909}
2910
2911RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2912{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002913 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002914 if(emulateIntrinsics)
2915 {
2916 UShort4 result;
2917 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2918 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2919 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2920 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2921
2922 return result;
2923 }
2924 else
2925 {
2926 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002927 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002928 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2929 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2930 psubusw->addArg(x.value);
2931 psubusw->addArg(y.value);
2932 ::basicBlock->appendInst(psubusw);
2933
2934 return RValue<UShort4>(V(result));
2935 }
2936}
2937
2938RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2939{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002940 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002941 if(emulateIntrinsics)
2942 {
2943 UShort4 result;
2944 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2945 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2946 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2947 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2948
2949 return result;
2950 }
2951 else
2952 {
2953 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002954 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002955 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2956 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2957 pmulhuw->addArg(x.value);
2958 pmulhuw->addArg(y.value);
2959 ::basicBlock->appendInst(pmulhuw);
2960
2961 return RValue<UShort4>(V(result));
2962 }
2963}
2964
2965RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2966{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002967 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002968 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2969
2970 // Scalarized implementation.
2971 Int4 result;
2972 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2973 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2974 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2975 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2976
2977 return result;
2978}
2979
2980RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2981{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002982 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002983 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2984
2985 if(false) // Partial product based implementation.
2986 {
2987 auto xh = x >> 16;
2988 auto yh = y >> 16;
2989 auto xl = x & UInt4(0x0000FFFF);
2990 auto yl = y & UInt4(0x0000FFFF);
2991 auto xlyh = xl * yh;
2992 auto xhyl = xh * yl;
2993 auto xlyhh = xlyh >> 16;
2994 auto xhylh = xhyl >> 16;
2995 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2996 auto xhyll = xhyl & UInt4(0x0000FFFF);
2997 auto xlylh = (xl * yl) >> 16;
2998 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2999
3000 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003001 }
3002
Nicolas Capens157ba262019-12-10 17:49:14 -05003003 // Scalarized implementation.
3004 Int4 result;
3005 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3006 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3007 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3008 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3009
3010 return As<UInt4>(result);
3011}
3012
3013RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3014{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003015 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003016 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003017 return UShort4(0);
3018}
3019
3020Type *UShort4::getType()
3021{
3022 return T(Type_v4i16);
3023}
3024
3025RValue<Short> Extract(RValue<Short8> val, int i)
3026{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003027 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003028 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3029}
3030
3031RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3032{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003033 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003034 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
3035}
3036
3037RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3038{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003039 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003040 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003041 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003042 Short8 result;
3043 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3044 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3045 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3046 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3047 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3048 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3049 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3050 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003051
Nicolas Capens157ba262019-12-10 17:49:14 -05003052 return result;
3053 }
3054 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003055 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003056 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003057 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003058}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003059
Nicolas Capens157ba262019-12-10 17:49:14 -05003060RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3061{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003062 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003063 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003064 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003065 Short8 result;
3066 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3067 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3068 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3069 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3070 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3071 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3072 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3073 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003074
Nicolas Capens157ba262019-12-10 17:49:14 -05003075 return result;
3076 }
3077 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003078 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003079 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003080 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003081}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003082
Nicolas Capens157ba262019-12-10 17:49:14 -05003083RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3084{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003085 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003086 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003087 return Int4(0);
3088}
3089
3090RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3091{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003092 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003093 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003094 return Short8(0);
3095}
3096
3097Type *Short8::getType()
3098{
3099 return T(Ice::IceType_v8i16);
3100}
3101
3102RValue<UShort> Extract(RValue<UShort8> val, int i)
3103{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003104 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003105 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
3106}
3107
3108RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3109{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003110 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003111 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
3112}
3113
3114RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3115{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003116 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003117 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003118 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003119 UShort8 result;
3120 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3121 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3122 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3123 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3124 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3125 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3126 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3127 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003128
Nicolas Capens157ba262019-12-10 17:49:14 -05003129 return result;
3130 }
3131 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003132 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003133 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003134 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003135}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003136
Nicolas Capens157ba262019-12-10 17:49:14 -05003137RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3138{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003139 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003140 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003141 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003142 UShort8 result;
3143 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3144 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3145 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3146 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3147 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3148 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3149 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3150 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003151
Nicolas Capens157ba262019-12-10 17:49:14 -05003152 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003153 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003154 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003155 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003156 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003157 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003158}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003159
Nicolas Capens157ba262019-12-10 17:49:14 -05003160RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3161{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003162 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003163 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003164 return UShort8(0);
3165}
3166
Nicolas Capens157ba262019-12-10 17:49:14 -05003167Type *UShort8::getType()
3168{
3169 return T(Ice::IceType_v8i16);
3170}
3171
Ben Clayton713b8d32019-12-17 20:37:56 +00003172RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003173{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003174 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003175 RValue<Int> res = val;
3176 val += 1;
3177 return res;
3178}
3179
Ben Clayton713b8d32019-12-17 20:37:56 +00003180const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003181{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003182 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003183 val += 1;
3184 return val;
3185}
3186
Ben Clayton713b8d32019-12-17 20:37:56 +00003187RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003188{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003189 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003190 RValue<Int> res = val;
3191 val -= 1;
3192 return res;
3193}
3194
Ben Clayton713b8d32019-12-17 20:37:56 +00003195const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003196{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003197 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003198 val -= 1;
3199 return val;
3200}
3201
3202RValue<Int> RoundInt(RValue<Float> cast)
3203{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003204 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003205 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003206 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003207 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3208 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003209 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003210 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003211 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003212 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003213 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003214 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3215 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3216 nearbyint->addArg(cast.value);
3217 ::basicBlock->appendInst(nearbyint);
3218
3219 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003220 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003221}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003222
Nicolas Capens157ba262019-12-10 17:49:14 -05003223Type *Int::getType()
3224{
3225 return T(Ice::IceType_i32);
3226}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003227
Nicolas Capens157ba262019-12-10 17:49:14 -05003228Type *Long::getType()
3229{
3230 return T(Ice::IceType_i64);
3231}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003232
Nicolas Capens157ba262019-12-10 17:49:14 -05003233UInt::UInt(RValue<Float> cast)
3234{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003235 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003236 // Smallest positive value representable in UInt, but not in Int
3237 const unsigned int ustart = 0x80000000u;
3238 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003239
Nicolas Capens157ba262019-12-10 17:49:14 -05003240 // If the value is negative, store 0, otherwise store the result of the conversion
3241 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003242 // Check if the value can be represented as an Int
3243 IfThenElse(cast >= ustartf,
3244 // If the value is too large, subtract ustart and re-add it after conversion.
3245 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3246 // Otherwise, just convert normally
3247 Int(cast)))
3248 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05003249}
Nicolas Capensa8086512016-11-07 17:32:17 -05003250
Ben Clayton713b8d32019-12-17 20:37:56 +00003251RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003252{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003253 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003254 RValue<UInt> res = val;
3255 val += 1;
3256 return res;
3257}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003258
Ben Clayton713b8d32019-12-17 20:37:56 +00003259const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003260{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003261 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003262 val += 1;
3263 return val;
3264}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003265
Ben Clayton713b8d32019-12-17 20:37:56 +00003266RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003268 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003269 RValue<UInt> res = val;
3270 val -= 1;
3271 return res;
3272}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003273
Ben Clayton713b8d32019-12-17 20:37:56 +00003274const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003277 val -= 1;
3278 return val;
3279}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003280
Nicolas Capens598f8d82016-09-26 15:09:10 -04003281// RValue<UInt> RoundUInt(RValue<Float> cast)
3282// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003283// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003284// }
3285
Nicolas Capens157ba262019-12-10 17:49:14 -05003286Type *UInt::getType()
3287{
3288 return T(Ice::IceType_i32);
3289}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003290
3291// Int2::Int2(RValue<Int> cast)
3292// {
3293// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
3294// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
3295//
3296// Constant *shuffle[2];
3297// shuffle[0] = Nucleus::createConstantInt(0);
3298// shuffle[1] = Nucleus::createConstantInt(0);
3299//
3300// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
3301//
3302// storeValue(replicate);
3303// }
3304
Nicolas Capens157ba262019-12-10 17:49:14 -05003305RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3306{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003307 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003308 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003309 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003310 Int2 result;
3311 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3312 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003313
Nicolas Capens157ba262019-12-10 17:49:14 -05003314 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003315 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003316 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003317 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003318 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003319 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003320}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003321
Nicolas Capens157ba262019-12-10 17:49:14 -05003322RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003325 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003326 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003327 Int2 result;
3328 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3329 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3330
3331 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003332 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003333 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003334 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003335 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003336 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003337}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003338
Nicolas Capens157ba262019-12-10 17:49:14 -05003339Type *Int2::getType()
3340{
3341 return T(Type_v2i32);
3342}
3343
3344RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3345{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003346 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003347 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003348 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003349 UInt2 result;
3350 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3351 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003352
Nicolas Capens157ba262019-12-10 17:49:14 -05003353 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003354 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003355 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003356 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003357 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003358 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003359}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003360
Nicolas Capens157ba262019-12-10 17:49:14 -05003361RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3362{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003363 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003364 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003365 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003366 UInt2 result;
3367 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3368 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003369
Nicolas Capens157ba262019-12-10 17:49:14 -05003370 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003371 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003372 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003373 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003374 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003375 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003376}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003377
Nicolas Capens157ba262019-12-10 17:49:14 -05003378Type *UInt2::getType()
3379{
3380 return T(Type_v2i32);
3381}
3382
Ben Clayton713b8d32019-12-17 20:37:56 +00003383Int4::Int4(RValue<Byte4> cast)
3384 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003385{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003386 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003387 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3388 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3389
3390 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003391 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003392 Value *b = Nucleus::createBitCast(a, Byte16::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003393 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003394
Ben Clayton713b8d32019-12-17 20:37:56 +00003395 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003396 Value *d = Nucleus::createBitCast(c, Short8::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003397 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003398
3399 Value *f = Nucleus::createBitCast(e, Int4::getType());
3400 storeValue(f);
3401}
3402
Ben Clayton713b8d32019-12-17 20:37:56 +00003403Int4::Int4(RValue<SByte4> cast)
3404 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003405{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003406 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003407 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3408 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3409
Ben Clayton713b8d32019-12-17 20:37:56 +00003410 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003411 Value *b = Nucleus::createBitCast(a, Byte16::getType());
3412 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3413
Ben Clayton713b8d32019-12-17 20:37:56 +00003414 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003415 Value *d = Nucleus::createBitCast(c, Short8::getType());
3416 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3417
3418 *this = As<Int4>(e) >> 24;
3419}
3420
Ben Clayton713b8d32019-12-17 20:37:56 +00003421Int4::Int4(RValue<Short4> cast)
3422 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003423{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003424 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003425 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003426 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3427
3428 *this = As<Int4>(c) >> 16;
3429}
3430
Ben Clayton713b8d32019-12-17 20:37:56 +00003431Int4::Int4(RValue<UShort4> cast)
3432 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003433{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003434 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003435 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003436 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
3437 Value *d = Nucleus::createBitCast(c, Int4::getType());
3438 storeValue(d);
3439}
3440
Ben Clayton713b8d32019-12-17 20:37:56 +00003441Int4::Int4(RValue<Int> rhs)
3442 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003443{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003444 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003445 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
3446
Ben Clayton713b8d32019-12-17 20:37:56 +00003447 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003448 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3449
3450 storeValue(replicate);
3451}
3452
3453RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3454{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003455 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003456 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003457 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003458 Int4 result;
3459 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3460 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3461 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3462 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003463
Nicolas Capens157ba262019-12-10 17:49:14 -05003464 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003465 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003466 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003467 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003468 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003469 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003470}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003471
Nicolas Capens157ba262019-12-10 17:49:14 -05003472RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3473{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003474 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003475 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003476 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003477 Int4 result;
3478 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3479 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3480 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3481 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003482
Nicolas Capens157ba262019-12-10 17:49:14 -05003483 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003484 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003485 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003486 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003487 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003488 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003489}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003490
Nicolas Capens157ba262019-12-10 17:49:14 -05003491RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3492{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003493 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003494 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
3495}
3496
3497RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3498{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003499 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003500 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
3501}
3502
3503RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3504{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003505 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003506 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
3507}
3508
3509RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3510{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003511 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003512 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
3513}
3514
3515RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3516{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003517 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003518 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
3519}
3520
3521RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3522{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003523 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003524 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
3525}
3526
3527RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3528{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003529 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003530 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3531 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3532 ::basicBlock->appendInst(cmp);
3533
3534 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3535 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3536 ::basicBlock->appendInst(select);
3537
3538 return RValue<Int4>(V(result));
3539}
3540
3541RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3542{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003543 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003544 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3545 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3546 ::basicBlock->appendInst(cmp);
3547
3548 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3549 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3550 ::basicBlock->appendInst(select);
3551
3552 return RValue<Int4>(V(result));
3553}
3554
3555RValue<Int4> RoundInt(RValue<Float4> cast)
3556{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003557 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003558 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003559 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003560 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3561 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003562 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003563 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003564 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003565 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003566 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003567 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3568 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3569 nearbyint->addArg(cast.value);
3570 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003571
3572 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003573 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003574}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003575
Nicolas Capens157ba262019-12-10 17:49:14 -05003576RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3577{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003578 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003579 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003580 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003581 Short8 result;
3582 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3583 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3584 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3585 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3586 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3587 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3588 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3589 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003590
Nicolas Capens157ba262019-12-10 17:49:14 -05003591 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003592 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003593 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003594 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003595 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003596 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003597 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3598 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3599 pack->addArg(x.value);
3600 pack->addArg(y.value);
3601 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003602
Nicolas Capens157ba262019-12-10 17:49:14 -05003603 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003604 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003605}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003606
Nicolas Capens157ba262019-12-10 17:49:14 -05003607RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3608{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003609 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003610 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003611 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003612 RValue<Int4> sx = As<Int4>(x);
3613 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003614
Nicolas Capens157ba262019-12-10 17:49:14 -05003615 RValue<Int4> sy = As<Int4>(y);
3616 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003617
Nicolas Capens157ba262019-12-10 17:49:14 -05003618 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003619 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003620 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003621 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003622 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003623 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003624 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3625 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3626 pack->addArg(x.value);
3627 pack->addArg(y.value);
3628 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003629
Nicolas Capens157ba262019-12-10 17:49:14 -05003630 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003631 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003632}
Nicolas Capens33438a62017-09-27 11:47:35 -04003633
Nicolas Capens157ba262019-12-10 17:49:14 -05003634RValue<Int> SignMask(RValue<Int4> x)
3635{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003636 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003637 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003638 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003639 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3640 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003641 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003642 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003643 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003644 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003645 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003646 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3647 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3648 movmsk->addArg(x.value);
3649 ::basicBlock->appendInst(movmsk);
3650
3651 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003652 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003653}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003654
Nicolas Capens157ba262019-12-10 17:49:14 -05003655Type *Int4::getType()
3656{
3657 return T(Ice::IceType_v4i32);
3658}
3659
Ben Clayton713b8d32019-12-17 20:37:56 +00003660UInt4::UInt4(RValue<Float4> cast)
3661 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003662{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003663 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003664 // Smallest positive value representable in UInt, but not in Int
3665 const unsigned int ustart = 0x80000000u;
3666 const float ustartf = float(ustart);
3667
3668 // Check if the value can be represented as an Int
3669 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3670 // If the value is too large, subtract ustart and re-add it after conversion.
3671 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003672 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003673 (~uiValue & Int4(cast));
3674 // If the value is negative, store 0, otherwise store the result of the conversion
3675 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3676}
3677
Ben Clayton713b8d32019-12-17 20:37:56 +00003678UInt4::UInt4(RValue<UInt> rhs)
3679 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003680{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003681 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003682 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3683
Ben Clayton713b8d32019-12-17 20:37:56 +00003684 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003685 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3686
3687 storeValue(replicate);
3688}
3689
3690RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3691{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003692 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003693 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003694 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003695 UInt4 result;
3696 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3697 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3698 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3699 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003700
Nicolas Capens157ba262019-12-10 17:49:14 -05003701 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003702 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003703 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003704 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003705 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003706 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003707}
Ben Clayton88816fa2019-05-15 17:08:14 +01003708
Nicolas Capens157ba262019-12-10 17:49:14 -05003709RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3710{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003711 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003712 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003713 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003714 UInt4 result;
3715 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3716 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3717 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3718 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003719
Nicolas Capens157ba262019-12-10 17:49:14 -05003720 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003721 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003722 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003723 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003724 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003725 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003726}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003727
Nicolas Capens157ba262019-12-10 17:49:14 -05003728RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3729{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003730 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003731 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3732}
3733
3734RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3735{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003736 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003737 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3738}
3739
3740RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3741{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003742 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003743 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3744}
3745
3746RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3747{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003748 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003749 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3750}
3751
3752RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3753{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003754 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003755 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3756}
3757
3758RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3759{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003760 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003761 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3762}
3763
3764RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3765{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003766 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003767 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3768 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3769 ::basicBlock->appendInst(cmp);
3770
3771 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3772 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3773 ::basicBlock->appendInst(select);
3774
3775 return RValue<UInt4>(V(result));
3776}
3777
3778RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3779{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003780 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003781 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3782 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3783 ::basicBlock->appendInst(cmp);
3784
3785 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3786 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3787 ::basicBlock->appendInst(select);
3788
3789 return RValue<UInt4>(V(result));
3790}
3791
3792Type *UInt4::getType()
3793{
3794 return T(Ice::IceType_v4i32);
3795}
3796
3797Type *Half::getType()
3798{
3799 return T(Ice::IceType_i16);
3800}
3801
3802RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3803{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003804 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003805 return 1.0f / x;
3806}
3807
3808RValue<Float> RcpSqrt_pp(RValue<Float> x)
3809{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003810 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003811 return Rcp_pp(Sqrt(x));
3812}
3813
3814RValue<Float> Sqrt(RValue<Float> x)
3815{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003816 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003817 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003818 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003819 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3820 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3821 sqrt->addArg(x.value);
3822 ::basicBlock->appendInst(sqrt);
3823
3824 return RValue<Float>(V(result));
3825}
3826
3827RValue<Float> Round(RValue<Float> x)
3828{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003829 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003830 return Float4(Round(Float4(x))).x;
3831}
3832
3833RValue<Float> Trunc(RValue<Float> x)
3834{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003835 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003836 return Float4(Trunc(Float4(x))).x;
3837}
3838
3839RValue<Float> Frac(RValue<Float> x)
3840{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003841 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003842 return Float4(Frac(Float4(x))).x;
3843}
3844
3845RValue<Float> Floor(RValue<Float> x)
3846{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003847 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003848 return Float4(Floor(Float4(x))).x;
3849}
3850
3851RValue<Float> Ceil(RValue<Float> x)
3852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003854 return Float4(Ceil(Float4(x))).x;
3855}
3856
3857Type *Float::getType()
3858{
3859 return T(Ice::IceType_f32);
3860}
3861
3862Type *Float2::getType()
3863{
3864 return T(Type_v2f32);
3865}
3866
Ben Clayton713b8d32019-12-17 20:37:56 +00003867Float4::Float4(RValue<Float> rhs)
3868 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003869{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003870 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003871 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3872
Ben Clayton713b8d32019-12-17 20:37:56 +00003873 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003874 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3875
3876 storeValue(replicate);
3877}
3878
3879RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3880{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003881 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003882 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3883 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3884 ::basicBlock->appendInst(cmp);
3885
3886 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3887 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3888 ::basicBlock->appendInst(select);
3889
3890 return RValue<Float4>(V(result));
3891}
3892
3893RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3894{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003895 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003896 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3897 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3898 ::basicBlock->appendInst(cmp);
3899
3900 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3901 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3902 ::basicBlock->appendInst(select);
3903
3904 return RValue<Float4>(V(result));
3905}
3906
3907RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3908{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003909 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003910 return Float4(1.0f) / x;
3911}
3912
3913RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3914{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003915 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003916 return Rcp_pp(Sqrt(x));
3917}
3918
3919RValue<Float4> Sqrt(RValue<Float4> x)
3920{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003921 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003922 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003923 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003924 Float4 result;
3925 result.x = Sqrt(Float(Float4(x).x));
3926 result.y = Sqrt(Float(Float4(x).y));
3927 result.z = Sqrt(Float(Float4(x).z));
3928 result.w = Sqrt(Float(Float4(x).w));
3929
3930 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003931 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003932 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003933 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003934 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003935 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003936 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3937 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3938 sqrt->addArg(x.value);
3939 ::basicBlock->appendInst(sqrt);
3940
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003941 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003942 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003943}
Nicolas Capens157ba262019-12-10 17:49:14 -05003944
3945RValue<Int> SignMask(RValue<Float4> x)
3946{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003947 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003948 if(emulateIntrinsics || CPUID::ARM)
3949 {
3950 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3951 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3952 }
3953 else
3954 {
3955 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003956 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003957 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3958 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3959 movmsk->addArg(x.value);
3960 ::basicBlock->appendInst(movmsk);
3961
3962 return RValue<Int>(V(result));
3963 }
3964}
3965
3966RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3967{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003968 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003969 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3970}
3971
3972RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3973{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003974 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003975 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3976}
3977
3978RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3979{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003980 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003981 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3982}
3983
3984RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3985{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003986 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003987 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3988}
3989
3990RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3991{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003992 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003993 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3994}
3995
3996RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3997{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003998 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003999 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
4000}
4001
4002RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4003{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004004 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004005 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
4006}
4007
4008RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4009{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004010 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004011 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
4012}
4013
4014RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4015{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004016 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004017 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
4018}
4019
4020RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4021{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004022 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004023 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
4024}
4025
4026RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4027{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004028 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004029 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
4030}
4031
4032RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4033{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004034 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004035 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
4036}
4037
4038RValue<Float4> Round(RValue<Float4> x)
4039{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004040 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004041 if(emulateIntrinsics || CPUID::ARM)
4042 {
4043 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4044 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4045 }
4046 else if(CPUID::SSE4_1)
4047 {
4048 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004049 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004050 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4051 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4052 round->addArg(x.value);
4053 round->addArg(::context->getConstantInt32(0));
4054 ::basicBlock->appendInst(round);
4055
4056 return RValue<Float4>(V(result));
4057 }
4058 else
4059 {
4060 return Float4(RoundInt(x));
4061 }
4062}
4063
4064RValue<Float4> Trunc(RValue<Float4> x)
4065{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004066 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004067 if(CPUID::SSE4_1)
4068 {
4069 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004070 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004071 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4072 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4073 round->addArg(x.value);
4074 round->addArg(::context->getConstantInt32(3));
4075 ::basicBlock->appendInst(round);
4076
4077 return RValue<Float4>(V(result));
4078 }
4079 else
4080 {
4081 return Float4(Int4(x));
4082 }
4083}
4084
4085RValue<Float4> Frac(RValue<Float4> x)
4086{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004087 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004088 Float4 frc;
4089
4090 if(CPUID::SSE4_1)
4091 {
4092 frc = x - Floor(x);
4093 }
4094 else
4095 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004096 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004097
Ben Clayton713b8d32019-12-17 20:37:56 +00004098 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004099 }
4100
4101 // x - floor(x) can be 1.0 for very small negative x.
4102 // Clamp against the value just below 1.0.
4103 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4104}
4105
4106RValue<Float4> Floor(RValue<Float4> x)
4107{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004108 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004109 if(CPUID::SSE4_1)
4110 {
4111 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004112 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004113 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4114 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4115 round->addArg(x.value);
4116 round->addArg(::context->getConstantInt32(1));
4117 ::basicBlock->appendInst(round);
4118
4119 return RValue<Float4>(V(result));
4120 }
4121 else
4122 {
4123 return x - Frac(x);
4124 }
4125}
4126
4127RValue<Float4> Ceil(RValue<Float4> x)
4128{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004129 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004130 if(CPUID::SSE4_1)
4131 {
4132 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004133 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004134 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4135 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4136 round->addArg(x.value);
4137 round->addArg(::context->getConstantInt32(2));
4138 ::basicBlock->appendInst(round);
4139
4140 return RValue<Float4>(V(result));
4141 }
4142 else
4143 {
4144 return -Floor(-x);
4145 }
4146}
4147
4148Type *Float4::getType()
4149{
4150 return T(Ice::IceType_v4f32);
4151}
4152
4153RValue<Long> Ticks()
4154{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004155 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004156 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004157 return Long(Int(0));
4158}
4159
Ben Clayton713b8d32019-12-17 20:37:56 +00004160RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004161{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004162 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004163 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004164}
4165
Ben Clayton713b8d32019-12-17 20:37:56 +00004166RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004167{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004168 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004169 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004170}
4171
Ben Clayton713b8d32019-12-17 20:37:56 +00004172Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004173{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004174 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano16ae92a2020-03-10 10:53:24 -04004175 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004176}
4177
4178void Breakpoint()
4179{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004180 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004181 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004182 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4183 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4184 ::basicBlock->appendInst(trap);
4185}
4186
Ben Clayton713b8d32019-12-17 20:37:56 +00004187void Nucleus::createFence(std::memory_order memoryOrder)
4188{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004189 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004190 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4191 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4192 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4193 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4194 inst->addArg(order);
4195 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004196}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004197
Ben Clayton713b8d32019-12-17 20:37:56 +00004198Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4199{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004200 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004201 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004202 return nullptr;
4203}
4204void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4205{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004206 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004207 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004208}
Nicolas Capens157ba262019-12-10 17:49:14 -05004209
4210RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4211{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004212 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004213 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4214}
4215
4216RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4217{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004218 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004219 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4220}
4221
4222void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4223{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004224 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004225 return emulated::Scatter(base, val, offsets, mask, alignment);
4226}
4227
4228void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4229{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004230 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004231 return emulated::Scatter(base, val, offsets, mask, alignment);
4232}
4233
4234RValue<Float> Exp2(RValue<Float> x)
4235{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004236 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004237 return emulated::Exp2(x);
4238}
4239
4240RValue<Float> Log2(RValue<Float> x)
4241{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004242 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004243 return emulated::Log2(x);
4244}
4245
4246RValue<Float4> Sin(RValue<Float4> x)
4247{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004248 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004249 return emulated::Sin(x);
4250}
4251
4252RValue<Float4> Cos(RValue<Float4> x)
4253{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004254 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004255 return emulated::Cos(x);
4256}
4257
4258RValue<Float4> Tan(RValue<Float4> x)
4259{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004260 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004261 return emulated::Tan(x);
4262}
4263
4264RValue<Float4> Asin(RValue<Float4> x)
4265{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004266 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004267 return emulated::Asin(x);
4268}
4269
4270RValue<Float4> Acos(RValue<Float4> x)
4271{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004272 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004273 return emulated::Acos(x);
4274}
4275
4276RValue<Float4> Atan(RValue<Float4> x)
4277{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004278 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004279 return emulated::Atan(x);
4280}
4281
4282RValue<Float4> Sinh(RValue<Float4> x)
4283{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004284 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004285 return emulated::Sinh(x);
4286}
4287
4288RValue<Float4> Cosh(RValue<Float4> x)
4289{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004290 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004291 return emulated::Cosh(x);
4292}
4293
4294RValue<Float4> Tanh(RValue<Float4> x)
4295{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004296 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004297 return emulated::Tanh(x);
4298}
4299
4300RValue<Float4> Asinh(RValue<Float4> x)
4301{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004302 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004303 return emulated::Asinh(x);
4304}
4305
4306RValue<Float4> Acosh(RValue<Float4> x)
4307{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004308 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004309 return emulated::Acosh(x);
4310}
4311
4312RValue<Float4> Atanh(RValue<Float4> x)
4313{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004314 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004315 return emulated::Atanh(x);
4316}
4317
4318RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4319{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004320 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004321 return emulated::Atan2(x, y);
4322}
4323
4324RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4325{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004326 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004327 return emulated::Pow(x, y);
4328}
4329
4330RValue<Float4> Exp(RValue<Float4> x)
4331{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004332 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004333 return emulated::Exp(x);
4334}
4335
4336RValue<Float4> Log(RValue<Float4> x)
4337{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004338 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004339 return emulated::Log(x);
4340}
4341
4342RValue<Float4> Exp2(RValue<Float4> x)
4343{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004344 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004345 return emulated::Exp2(x);
4346}
4347
4348RValue<Float4> Log2(RValue<Float4> x)
4349{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004350 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004351 return emulated::Log2(x);
4352}
4353
4354RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4355{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004356 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004357 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004358 {
Ben Claytonce54c592020-02-07 11:30:51 +00004359 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004360 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004361 }
4362 else
4363 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004364 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004365 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4366 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4367 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4368 ctlz->addArg(x.value);
4369 ::basicBlock->appendInst(ctlz);
4370
4371 return RValue<UInt>(V(result));
4372 }
4373}
4374
4375RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4376{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004377 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004378 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004379 {
Ben Claytonce54c592020-02-07 11:30:51 +00004380 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004381 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004382 }
4383 else
4384 {
4385 // TODO: implement vectorized version in Subzero
4386 UInt4 result;
4387 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4388 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4389 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4390 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4391 return result;
4392 }
4393}
4394
4395RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4396{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004397 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004398 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004399 {
Ben Claytonce54c592020-02-07 11:30:51 +00004400 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004401 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004402 }
4403 else
4404 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004405 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004406 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4407 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4408 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4409 ctlz->addArg(x.value);
4410 ::basicBlock->appendInst(ctlz);
4411
4412 return RValue<UInt>(V(result));
4413 }
4414}
4415
4416RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004418 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004419 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004420 {
Ben Claytonce54c592020-02-07 11:30:51 +00004421 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004422 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004423 }
4424 else
4425 {
4426 // TODO: implement vectorized version in Subzero
4427 UInt4 result;
4428 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4429 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4430 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4431 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4432 return result;
4433 }
4434}
4435
Antonio Maiorano370cba52019-12-31 11:36:07 -05004436RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4437{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004438 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004439 return emulated::MinAtomic(x, y, memoryOrder);
4440}
4441
4442RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4443{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004444 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004445 return emulated::MinAtomic(x, y, memoryOrder);
4446}
4447
4448RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4449{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004450 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004451 return emulated::MaxAtomic(x, y, memoryOrder);
4452}
4453
4454RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4455{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004456 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004457 return emulated::MaxAtomic(x, y, memoryOrder);
4458}
4459
Antonio Maioranoaae33732020-02-14 14:52:34 -05004460void EmitDebugLocation()
4461{
4462#ifdef ENABLE_RR_DEBUG_INFO
4463# ifdef ENABLE_RR_EMIT_PRINT_LOCATION
4464 emitPrintLocation(getCallerBacktrace());
4465# endif // ENABLE_RR_EMIT_PRINT_LOCATION
4466#endif // ENABLE_RR_DEBUG_INFO
4467}
Ben Clayton713b8d32019-12-17 20:37:56 +00004468void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004469void FlushDebug() {}
4470
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004471namespace {
4472namespace coro {
4473
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004474// Instance data per generated coroutine
4475// This is the "handle" type used for Coroutine functions
4476// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4477struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004478{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004479 bool useInternalScheduler = false;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004480 marl::Event suspended; // the coroutine is suspended on a yield()
4481 marl::Event resumed; // the caller is suspended on an await()
4482 marl::Event done{ marl::Event::Mode::Manual }; // the coroutine should stop at the next yield()
4483 marl::Event terminated{ marl::Event::Mode::Manual }; // the coroutine has finished.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004484 void *promisePtr = nullptr;
4485};
4486
4487CoroutineData *createCoroutineData()
4488{
4489 return new CoroutineData{};
4490}
4491
4492void destroyCoroutineData(CoroutineData *coroData)
4493{
4494 delete coroData;
4495}
4496
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004497// suspend() pauses execution of the coroutine, and resumes execution from the
4498// caller's call to await().
4499// Returns true if await() is called again, or false if coroutine_destroy()
4500// is called.
4501bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004502{
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004503 auto *data = reinterpret_cast<CoroutineData *>(handle);
4504 data->suspended.signal();
4505 data->resumed.wait();
4506 return !data->done.test();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004507}
4508
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004509// resume() is called by await(), blocking until the coroutine calls yield()
4510// or the coroutine terminates.
4511void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004512{
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004513 auto *data = reinterpret_cast<CoroutineData *>(handle);
4514 data->resumed.signal();
4515 data->suspended.wait();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004516}
4517
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004518// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4519// until the coroutine ends, and deletes the coroutine data.
4520void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004521{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004522 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004523 coroData->done.signal(); // signal that the coroutine should stop at next (or current) yield.
4524 coroData->resumed.signal(); // wake the coroutine if blocked on a yield.
4525 coroData->terminated.wait(); // wait for the coroutine to return.
4526 if(coroData->useInternalScheduler)
4527 {
4528 ::getOrCreateScheduler().unbind();
4529 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004530 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004531}
4532
4533namespace detail {
4534thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4535} // namespace detail
4536
4537void setHandleParam(Nucleus::CoroutineHandle handle)
4538{
4539 ASSERT(!detail::coroHandle);
4540 detail::coroHandle = handle;
4541}
4542
4543Nucleus::CoroutineHandle getHandleParam()
4544{
4545 ASSERT(detail::coroHandle);
4546 auto handle = detail::coroHandle;
4547 detail::coroHandle = {};
4548 return handle;
4549}
4550
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004551bool isDone(Nucleus::CoroutineHandle handle)
4552{
4553 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004554 return coroData->done.test();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004555}
4556
4557void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4558{
4559 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4560 coroData->promisePtr = promisePtr;
4561}
4562
4563void *getPromisePtr(Nucleus::CoroutineHandle handle)
4564{
4565 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4566 return coroData->promisePtr;
4567}
4568
4569} // namespace coro
4570} // namespace
4571
4572// Used to generate coroutines.
4573// Lifetime: from yield to acquireCoroutine
4574class CoroutineGenerator
4575{
4576public:
4577 CoroutineGenerator()
4578 {
4579 }
4580
4581 // Inserts instructions at the top of the current function to make it a coroutine.
4582 void generateCoroutineBegin()
4583 {
4584 // Begin building the main coroutine_begin() function.
4585 // We insert these instructions at the top of the entry node,
4586 // before existing reactor-generated instructions.
4587
4588 // CoroutineHandle coroutine_begin(<Arguments>)
4589 // {
4590 // this->handle = coro::getHandleParam();
4591 //
4592 // YieldType promise;
4593 // coro::setPromisePtr(handle, &promise); // For await
4594 //
4595 // ... <REACTOR CODE> ...
4596 //
4597
4598 // Save original entry block and current block, and create a new entry block and make it current.
4599 // This new block will be used to inject code above the begin routine's existing code. We make
4600 // this block branch to the original entry block as the last instruction.
4601 auto origEntryBB = ::function->getEntryNode();
4602 auto origCurrBB = ::basicBlock;
4603 auto newBB = ::function->makeNode();
4604 sz::replaceEntryNode(::function, newBB);
4605 ::basicBlock = newBB;
4606
4607 // this->handle = coro::getHandleParam();
4608 this->handle = sz::Call(::function, ::basicBlock, coro::getHandleParam);
4609
4610 // YieldType promise;
4611 // coro::setPromisePtr(handle, &promise); // For await
4612 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
4613 sz::Call(::function, ::basicBlock, coro::setPromisePtr, this->handle, this->promise);
4614
4615 // Branch to original entry block
4616 auto br = Ice::InstBr::create(::function, origEntryBB);
4617 ::basicBlock->appendInst(br);
4618
4619 // Restore current block for future instructions
4620 ::basicBlock = origCurrBB;
4621 }
4622
4623 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4624 void generateYield(Value *val)
4625 {
4626 // ... <REACTOR CODE> ...
4627 //
4628 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004629 // if (!coro::suspend(handle)) {
4630 // return false; // coroutine has been stopped by the caller.
4631 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004632 //
4633 // ... <REACTOR CODE> ...
4634
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004635 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004636 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004637
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004638 // if (!coro::suspend(handle)) {
4639 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4640 auto doneBlock = Nucleus::createBasicBlock();
4641 auto resumeBlock = Nucleus::createBasicBlock();
4642 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4643
4644 // return false; // coroutine has been stopped by the caller.
4645 ::basicBlock = doneBlock;
4646 Nucleus::createRetVoid(); // coroutine return value is ignored.
4647
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004648 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004649 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004650 }
4651
4652 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4653
4654 // Generates the await function for the current coroutine.
4655 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4656 static FunctionUniquePtr generateAwaitFunction()
4657 {
4658 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4659 // {
4660 // if (coro::isDone())
4661 // {
4662 // return false;
4663 // }
4664 // else // resume
4665 // {
4666 // YieldType* promise = coro::getPromisePtr(handle);
4667 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004668 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004669 // return true;
4670 // }
4671 // }
4672
4673 // Subzero doesn't support bool types (IceType_i1) as return type
4674 const Ice::Type ReturnType = Ice::IceType_i32;
4675 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4676 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4677
4678 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4679 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4680
4681 Ice::Variable *handle = awaitFunc->getArgs()[0];
4682 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4683
4684 auto doneBlock = awaitFunc->makeNode();
4685 {
4686 // return false;
4687 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4688 doneBlock->appendInst(ret);
4689 }
4690
4691 auto resumeBlock = awaitFunc->makeNode();
4692 {
4693 // YieldType* promise = coro::getPromisePtr(handle);
4694 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4695
4696 // *out = *promise;
4697 // Load promise value
4698 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4699 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4700 resumeBlock->appendInst(load);
4701 // Then store it in output param
4702 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4703 resumeBlock->appendInst(store);
4704
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004705 // coro::resume(handle);
4706 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004707
4708 // return true;
4709 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4710 resumeBlock->appendInst(ret);
4711 }
4712
4713 // if (coro::isDone())
4714 // {
4715 // <doneBlock>
4716 // }
4717 // else // resume
4718 // {
4719 // <resumeBlock>
4720 // }
4721 Ice::CfgNode *bb = awaitFunc->getEntryNode();
4722 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone);
4723 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4724 bb->appendInst(br);
4725
4726 return FunctionUniquePtr{ awaitFunc };
4727 }
4728
4729 // Generates the destroy function for the current coroutine.
4730 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4731 static FunctionUniquePtr generateDestroyFunction()
4732 {
4733 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4734 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004735 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004736 // return;
4737 // }
4738
4739 const Ice::Type ReturnType = Ice::IceType_void;
4740 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4741
4742 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4743 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4744
4745 Ice::Variable *handle = destroyFunc->getArgs()[0];
4746
4747 auto *bb = destroyFunc->getEntryNode();
4748
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004749 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4750 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004751
4752 // return;
4753 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4754 bb->appendInst(ret);
4755
4756 return FunctionUniquePtr{ destroyFunc };
4757 }
4758
4759private:
4760 Ice::Variable *handle{};
4761 Ice::Variable *promise{};
4762};
4763
4764static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4765{
4766 // This doubles up as our coroutine handle
4767 auto coroData = coro::createCoroutineData();
4768
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004769 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4770 if(coroData->useInternalScheduler)
4771 {
4772 ::getOrCreateScheduler().bind();
4773 }
4774
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004775 marl::schedule([=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004776 // Store handle in TLS so that the coroutine can grab it right away, before
4777 // any fiber switch occurs.
4778 coro::setHandleParam(coroData);
4779
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004780 beginFunc();
4781
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004782 coroData->done.signal(); // coroutine is done.
4783 coroData->suspended.signal(); // resume any blocking await() call.
4784 coroData->terminated.signal(); // signal that the coroutine data is ready for freeing.
4785 });
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004786
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004787 coroData->suspended.wait(); // block until the first yield or coroutine end
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004788
4789 return coroData;
4790}
4791
4792void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4793{
4794 // Start by creating a regular function
4795 createFunction(yieldType, params);
4796
4797 // Save in case yield() is called
4798 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4799 ::coroYieldType = yieldType;
4800}
4801
4802void Nucleus::yield(Value *val)
4803{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004804 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004805 Variable::materializeAll();
4806
4807 // On first yield, we start generating coroutine functions
4808 if(!::coroGen)
4809 {
4810 ::coroGen = std::make_shared<CoroutineGenerator>();
4811 ::coroGen->generateCoroutineBegin();
4812 }
4813
4814 ASSERT(::coroGen);
4815 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004816}
4817
Ben Clayton713b8d32019-12-17 20:37:56 +00004818static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4819{
4820 return false;
4821}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004822
4823static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4824{
4825}
Nicolas Capens157ba262019-12-10 17:49:14 -05004826
4827std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4828{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004829 if(::coroGen)
4830 {
4831 // Finish generating coroutine functions
4832 {
4833 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004834 createRetVoidIfNoRet();
4835 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004836
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004837 auto awaitFunc = ::coroGen->generateAwaitFunction();
4838 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004839
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004840 // At this point, we no longer need the CoroutineGenerator.
4841 ::coroGen.reset();
4842 ::coroYieldType = nullptr;
4843
4844 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4845 { name, "await", "destroy" },
4846 cfgEdit);
4847
4848 return routine;
4849 }
4850 else
4851 {
4852 {
4853 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4854 createRetVoidIfNoRet();
4855 }
4856
4857 ::coroYieldType = nullptr;
4858
4859 // Not an actual coroutine (no yields), so return stubs for await and destroy
4860 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4861
4862 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4863 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4864 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4865 return routine;
4866 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004867}
4868
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004869Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004870{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004871 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4872
4873 if(isCoroutine)
4874 {
4875 return rr::invokeCoroutineBegin(func);
4876 }
4877 else
4878 {
4879 // For regular routines, just invoke the begin func directly
4880 return func();
4881 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004882}
Nicolas Capens157ba262019-12-10 17:49:14 -05004883
4884} // namespace rr