blob: 4607f9c946313f0388998c9dc50519066e014166 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050017#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000018#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050019#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040020
Nicolas Capens1a3ce872018-10-10 10:42:36 -040021#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000022#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040023
Nicolas Capens598f8d82016-09-26 15:09:10 -040024#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000027#include "src/IceELFStreamer.h"
28#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040029#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031
Ben Clayton713b8d32019-12-17 20:37:56 +000032#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040033#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040035
Antonio Maiorano8bce0672020-02-28 13:13:45 -050036#include "marl/event.h"
37
Nicolas Capens6a990f82018-07-06 15:54:07 -040038#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040041
Nicolas Capensbd65da92017-01-05 16:31:06 -050042#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000043# ifndef WIN32_LEAN_AND_MEAN
44# define WIN32_LEAN_AND_MEAN
45# endif // !WIN32_LEAN_AND_MEAN
46# ifndef NOMINMAX
47# define NOMINMAX
48# endif // !NOMINMAX
49# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050050#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Ben Clayton683bad82020-02-10 23:57:09 +000052#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040053#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000054#include <limits>
55#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040056
Antonio Maiorano02a39532020-01-21 15:15:34 -050057// Subzero utility functions
58// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050059namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050060namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061void replaceEntryNode(Ice::Cfg *function, Ice::CfgNode *newEntryNode)
62{
63 ASSERT_MSG(function->getEntryNode() != nullptr, "Function should have an entry node");
64
65 if(function->getEntryNode() == newEntryNode)
66 {
67 return;
68 }
69
70 // Make this the new entry node
71 function->setEntryNode(newEntryNode);
72
73 // Reorder nodes so that new entry block comes first. This is required
74 // by Cfg::renumberInstructions, which expects the first node in the list
75 // to be the entry node.
76 {
77 auto nodes = function->getNodes();
78
79 // TODO(amaiorano): Fast path if newEntryNode is last? Can avoid linear search.
80
81 auto iter = std::find(nodes.begin(), nodes.end(), newEntryNode);
82 ASSERT_MSG(iter != nodes.end(), "New node should be in the function's node list");
83
84 nodes.erase(iter);
85 nodes.insert(nodes.begin(), newEntryNode);
86
87 // swapNodes replaces its nodes with the input one, and renumbers them,
88 // so our new entry node will be 0, and the previous will be 1.
89 function->swapNodes(nodes);
90 }
91}
92
93Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
94{
95 uint32_t sequenceNumber = 0;
96 auto function = Ice::Cfg::create(context, sequenceNumber).release();
97
98 Ice::CfgLocalAllocatorScope allocScope{ function };
99
100 for(auto type : paramTypes)
101 {
102 Ice::Variable *arg = function->makeVariable(type);
103 function->addArg(arg);
104 }
105
106 Ice::CfgNode *node = function->makeNode();
107 function->setEntryNode(node);
108
109 return function;
110}
111
112Ice::Type getPointerType(Ice::Type elementType)
113{
114 if(sizeof(void *) == 8)
115 {
116 return Ice::IceType_i64;
117 }
118 else
119 {
120 return Ice::IceType_i32;
121 }
122}
123
124Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
125{
126 int typeSize = Ice::typeWidthInBytes(type);
127 int totalSize = typeSize * (arraySize ? arraySize : 1);
128
129 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
130 auto address = function->makeVariable(getPointerType(type));
131 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
132 function->getEntryNode()->getInsts().push_front(alloca);
133
134 return address;
135}
136
137Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500138{
139 if(sizeof(void *) == 8)
140 {
141 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
142 }
143 else
144 {
145 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
146 }
147}
148
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400149// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
150Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500151
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400152// Wrapper for calls on C functions with Ice types
153Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
154{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500155 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400156
157 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
158 // then truncate result to bool.
159 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
160 const bool returningBool = (retTy == Ice::IceType_i1);
161 if(returningBool)
162 {
163 ret = function->makeVariable(Ice::IceType_i32);
164 }
165 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500166 {
167 ret = function->makeVariable(retTy);
168 }
169
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400170 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500171 for(auto arg : iceArgs)
172 {
173 call->addArg(arg);
174 }
175
176 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400177
178 if(returningBool)
179 {
180 // Truncate result to bool so that if any (lsb) bits were set, result will be true
181 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
182 }
183
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500184 return ret;
185}
186
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400187Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
188{
189 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
190 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
191}
192
Antonio Maiorano62427e02020-02-13 09:18:05 -0500193// Wrapper for calls on C functions with Ice types
194template<typename Return, typename... CArgs, typename... RArgs>
195Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
196{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400197 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
198
Antonio Maiorano62427e02020-02-13 09:18:05 -0500199 Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
200 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500201 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500202}
203
Antonio Maiorano02a39532020-01-21 15:15:34 -0500204// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500205Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500206{
207 Ice::Variable *result = function->makeVariable(v->getType());
208 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
209 basicBlock->appendInst(cast);
210 return result;
211}
212
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400213Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
214{
215 Ice::Variable *to = function->makeVariable(toType);
216 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
217 basicBlock->appendInst(cast);
218 return to;
219}
220
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500221Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500222{
223 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
224 // absolute address. We circumvent this by casting to a non-const variable, and loading
225 // from that.
226 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
227 {
228 ptr = sz::createUnconstCast(function, basicBlock, cptr);
229 }
230
231 Ice::Variable *result = function->makeVariable(type);
232 auto load = Ice::InstLoad::create(function, result, ptr, align);
233 basicBlock->appendInst(load);
234
235 return result;
236}
237
238} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500239} // namespace
240
Ben Clayton713b8d32019-12-17 20:37:56 +0000241namespace rr {
242class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500243class CoroutineGenerator;
244} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500245
246namespace {
247
248// Default configuration settings. Must be accessed under mutex lock.
249std::mutex defaultConfigLock;
250rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000251{
Nicolas Capens157ba262019-12-10 17:49:14 -0500252 // This uses a static in a function to avoid the cost of a global static
253 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
254 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000255 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500256 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000257}
258
Nicolas Capens157ba262019-12-10 17:49:14 -0500259Ice::GlobalContext *context = nullptr;
260Ice::Cfg *function = nullptr;
261Ice::CfgNode *basicBlock = nullptr;
262Ice::CfgLocalAllocatorScope *allocator = nullptr;
263rr::ELFMemoryStreamer *routine = nullptr;
264
265std::mutex codegenMutex;
266
267Ice::ELFFileStreamer *elfFile = nullptr;
268Ice::Fdstream *out = nullptr;
269
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500270// Coroutine globals
271rr::Type *coroYieldType = nullptr;
272std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500273marl::Scheduler &getOrCreateScheduler()
274{
275 static auto scheduler = [] {
276 auto s = std::make_unique<marl::Scheduler>();
277 s->setWorkerThreadCount(8);
278 return s;
279 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500280
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500281 return *scheduler;
282}
Nicolas Capens157ba262019-12-10 17:49:14 -0500283} // Anonymous namespace
284
285namespace {
286
287#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000288# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500289#endif
290
Ben Clayton713b8d32019-12-17 20:37:56 +0000291#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
292# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500293#endif
294
Antonio Maiorano370cba52019-12-31 11:36:07 -0500295Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400296{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500297 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100298 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500299 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000300 case rr::Optimization::Level::None: return Ice::Opt_m1;
301 case rr::Optimization::Level::Less: return Ice::Opt_m1;
302 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500303 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
304 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100305 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500306 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400307}
308
Antonio Maiorano370cba52019-12-31 11:36:07 -0500309Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
310{
311 switch(memoryOrder)
312 {
313 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
314 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
315 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
316 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
317 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
318 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
319 }
320 return Ice::Intrinsics::MemoryOrderInvalid;
321}
322
Nicolas Capens157ba262019-12-10 17:49:14 -0500323class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500324{
Nicolas Capens157ba262019-12-10 17:49:14 -0500325public:
326 const static bool ARM;
327 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400328
Nicolas Capens157ba262019-12-10 17:49:14 -0500329private:
330 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100331 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000332#if defined(__i386__) || defined(__x86_64__)
333# if defined(_WIN32)
334 __cpuid(registers, info);
335# else
336 __asm volatile("cpuid"
337 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
338 : "a"(info));
339# endif
340#else
341 registers[0] = 0;
342 registers[1] = 0;
343 registers[2] = 0;
344 registers[3] = 0;
345#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100346 }
347
Nicolas Capens157ba262019-12-10 17:49:14 -0500348 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500349 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000350#if defined(__arm__) || defined(__aarch64__)
351 return true;
352#elif defined(__i386__) || defined(__x86_64__)
353 return false;
354#elif defined(__mips__)
355 return false;
356#else
357# error "Unknown architecture"
358#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500359 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500360
Nicolas Capens157ba262019-12-10 17:49:14 -0500361 static bool detectSSE4_1()
362 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000363#if defined(__i386__) || defined(__x86_64__)
364 int registers[4];
365 cpuid(registers, 1);
366 return (registers[2] & 0x00080000) != 0;
367#else
368 return false;
369#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500370 }
371};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500372
Nicolas Capens157ba262019-12-10 17:49:14 -0500373const bool CPUID::ARM = CPUID::detectARM();
374const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
375const bool emulateIntrinsics = false;
376const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400377
Nicolas Capens157ba262019-12-10 17:49:14 -0500378constexpr bool subzeroDumpEnabled = false;
379constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500380
381#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500382static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
383static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500384#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500385
386} // anonymous namespace
387
388namespace rr {
389
Antonio Maioranoab210f92019-12-13 16:26:24 -0500390std::string BackendName()
391{
392 return "Subzero";
393}
394
Ben Clayton713b8d32019-12-17 20:37:56 +0000395const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500396 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500397};
398
399enum EmulatedType
400{
401 EmulatedShift = 16,
402 EmulatedV2 = 2 << EmulatedShift,
403 EmulatedV4 = 4 << EmulatedShift,
404 EmulatedV8 = 8 << EmulatedShift,
405 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
406
407 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
408 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
409 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000410 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
411 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500412 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
413};
414
Ben Clayton713b8d32019-12-17 20:37:56 +0000415class Value : public Ice::Operand
416{};
417class SwitchCases : public Ice::InstSwitch
418{};
419class BasicBlock : public Ice::CfgNode
420{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500421
422Ice::Type T(Type *t)
423{
424 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
425 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500426}
427
Nicolas Capens157ba262019-12-10 17:49:14 -0500428Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400429{
Ben Clayton713b8d32019-12-17 20:37:56 +0000430 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500431}
432
433Type *T(EmulatedType t)
434{
Ben Clayton713b8d32019-12-17 20:37:56 +0000435 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500436}
437
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500438std::vector<Ice::Type> T(const std::vector<Type *> &types)
439{
440 std::vector<Ice::Type> result;
441 result.reserve(types.size());
442 for(auto &t : types)
443 {
444 result.push_back(T(t));
445 }
446 return result;
447}
448
Nicolas Capens157ba262019-12-10 17:49:14 -0500449Value *V(Ice::Operand *v)
450{
Ben Clayton713b8d32019-12-17 20:37:56 +0000451 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500452}
453
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500454Ice::Operand *V(Value *v)
455{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500456 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500457}
458
Antonio Maiorano62427e02020-02-13 09:18:05 -0500459std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
460{
461 std::vector<Ice::Operand *> result;
462 result.reserve(values.size());
463 for(auto &v : values)
464 {
465 result.push_back(V(v));
466 }
467 return result;
468}
469
Nicolas Capens157ba262019-12-10 17:49:14 -0500470BasicBlock *B(Ice::CfgNode *b)
471{
Ben Clayton713b8d32019-12-17 20:37:56 +0000472 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500473}
474
475static size_t typeSize(Type *type)
476{
477 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400478 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500479 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800480 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000481 case Type_v2i32: return 8;
482 case Type_v4i16: return 8;
483 case Type_v2i16: return 4;
484 case Type_v8i8: return 8;
485 case Type_v4i8: return 4;
486 case Type_v2f32: return 8;
487 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500488 }
489 }
490
491 return Ice::typeWidthInBytes(T(type));
492}
493
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500494static void createRetVoidIfNoRet()
495{
496 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
497 {
498 Nucleus::createRetVoid();
499 }
500}
501
Ben Clayton713b8d32019-12-17 20:37:56 +0000502using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
503using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500504
505inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
506{
Ben Clayton713b8d32019-12-17 20:37:56 +0000507 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500508}
509
510inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
511{
512 return &sectionHeader(elfHeader)[index];
513}
514
515static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
516{
517 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
518
519 uint32_t index = relocation.getSymbol();
520 int table = relocationTable.sh_link;
521 void *symbolValue = nullptr;
522
523 if(index != SHN_UNDEF)
524 {
525 if(table == SHN_UNDEF) return nullptr;
526 const SectionHeader *symbolTable = elfSection(elfHeader, table);
527
528 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
529 if(index >= symtab_entries)
530 {
531 ASSERT(index < symtab_entries && "Symbol Index out of range");
532 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800533 }
534
Nicolas Capens157ba262019-12-10 17:49:14 -0500535 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000536 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500537 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800538
Nicolas Capens157ba262019-12-10 17:49:14 -0500539 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400540 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500541 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000542 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400543 }
544 else
545 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500546 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400547 }
Nicolas Capens66478362016-10-13 15:36:36 -0400548 }
549
Nicolas Capens157ba262019-12-10 17:49:14 -0500550 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000551 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500552
553 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400554 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400555 switch(relocation.getType())
556 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000557 case R_ARM_NONE:
558 // No relocation
559 break;
560 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500561 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000562 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500563 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
564 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
565 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400566 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000567 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500568 {
569 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
570 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
571 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400572 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000573 default:
574 ASSERT(false && "Unsupported relocation type");
575 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400576 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500577 }
578 else
579 {
580 switch(relocation.getType())
581 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000582 case R_386_NONE:
583 // No relocation
584 break;
585 case R_386_32:
586 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
587 break;
588 case R_386_PC32:
589 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
590 break;
591 default:
592 ASSERT(false && "Unsupported relocation type");
593 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500594 }
Nicolas Capens66478362016-10-13 15:36:36 -0400595 }
596
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 return symbolValue;
598}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400599
Nicolas Capens157ba262019-12-10 17:49:14 -0500600static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
601{
602 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
603
604 uint32_t index = relocation.getSymbol();
605 int table = relocationTable.sh_link;
606 void *symbolValue = nullptr;
607
608 if(index != SHN_UNDEF)
609 {
610 if(table == SHN_UNDEF) return nullptr;
611 const SectionHeader *symbolTable = elfSection(elfHeader, table);
612
613 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
614 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400615 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500616 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400617 return nullptr;
618 }
619
Nicolas Capens157ba262019-12-10 17:49:14 -0500620 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000621 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500622 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400623
Nicolas Capens157ba262019-12-10 17:49:14 -0500624 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400625 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500626 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000627 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500628 }
629 else
630 {
631 return nullptr;
632 }
633 }
Nicolas Capens66478362016-10-13 15:36:36 -0400634
Nicolas Capens157ba262019-12-10 17:49:14 -0500635 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000636 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
637 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400638
Nicolas Capens157ba262019-12-10 17:49:14 -0500639 switch(relocation.getType())
640 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000641 case R_X86_64_NONE:
642 // No relocation
643 break;
644 case R_X86_64_64:
645 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
646 break;
647 case R_X86_64_PC32:
648 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
649 break;
650 case R_X86_64_32S:
651 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
652 break;
653 default:
654 ASSERT(false && "Unsupported relocation type");
655 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500656 }
657
658 return symbolValue;
659}
660
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400661struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500662{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400663 const void *entry;
664 size_t codeSize = 0;
665};
666
667std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
668{
669 ASSERT(functionNames.size() > 0);
670 std::vector<EntryPoint> entryPoints(functionNames.size());
671
Ben Clayton713b8d32019-12-17 20:37:56 +0000672 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500673
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400674 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500675 if(!elfHeader->checkMagic())
676 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400677 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500678 }
679
680 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000681 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
682#if defined(__i386__)
683 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
684#elif defined(__x86_64__)
685 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
686#elif defined(__arm__)
687 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
688#elif defined(__aarch64__)
689 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
690#elif defined(__mips__)
691 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
692#else
693# error "Unsupported platform"
694#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500695
Ben Clayton713b8d32019-12-17 20:37:56 +0000696 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500697
698 for(int i = 0; i < elfHeader->e_shnum; i++)
699 {
700 if(sectionHeader[i].sh_type == SHT_PROGBITS)
701 {
702 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
703 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400704 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500705 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400706 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500707
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400708 for(size_t j = 0; j < functionNames.size(); ++j)
709 {
710 if(strstr(sectionName, functionNames[j]) != nullptr)
711 {
712 return j;
713 }
714 }
715
716 UNREACHABLE("Failed to find executable section that matches input function names");
717 return static_cast<size_t>(-1);
718 };
719
720 size_t index = findSectionNameEntryIndex();
721 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
722 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400723 }
724 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500725 else if(sectionHeader[i].sh_type == SHT_REL)
726 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000727 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400728
Nicolas Capens157ba262019-12-10 17:49:14 -0500729 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
730 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000731 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500732 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
733 }
734 }
735 else if(sectionHeader[i].sh_type == SHT_RELA)
736 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000737 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500738
739 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
740 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000741 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500742 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
743 }
744 }
745 }
746
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400747 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500748}
749
750template<typename T>
751struct ExecutableAllocator
752{
753 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000754 template<class U>
755 ExecutableAllocator(const ExecutableAllocator<U> &other)
756 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500757
758 using value_type = T;
759 using size_type = std::size_t;
760
761 T *allocate(size_type n)
762 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000763 return (T *)allocateMemoryPages(
764 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500765 }
766
767 void deallocate(T *p, size_type n)
768 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800769 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500770 }
771};
772
773class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
774{
775 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
776 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
777
778public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000779 ELFMemoryStreamer()
780 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500781 {
782 position = 0;
783 buffer.reserve(0x1000);
784 }
785
786 ~ELFMemoryStreamer() override
787 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500788 }
789
790 void write8(uint8_t Value) override
791 {
792 if(position == (uint64_t)buffer.size())
793 {
794 buffer.push_back(Value);
795 position++;
796 }
797 else if(position < (uint64_t)buffer.size())
798 {
799 buffer[position] = Value;
800 position++;
801 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000802 else
803 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500804 }
805
806 void writeBytes(llvm::StringRef Bytes) override
807 {
808 std::size_t oldSize = buffer.size();
809 buffer.resize(oldSize + Bytes.size());
810 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
811 position += Bytes.size();
812 }
813
814 uint64_t tell() const override { return position; }
815
816 void seek(uint64_t Off) override { position = Off; }
817
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400818 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500819 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400820 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500821
822#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500823 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
824#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400825 for(auto &entryPoint : entryPoints)
826 {
827 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
828 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500829#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500830
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400831 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400832 }
833
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500834 void finalize()
835 {
836 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
837
838 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
839 }
840
Ben Clayton713b8d32019-12-17 20:37:56 +0000841 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400842 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500843 ASSERT(func);
844 funcs[index] = func;
845 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400846
Nicolas Capens157ba262019-12-10 17:49:14 -0500847 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400848 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500849 ASSERT(funcs[index]);
850 return funcs[index];
851 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400852
Antonio Maiorano02a39532020-01-21 15:15:34 -0500853 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500854 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500855 // TODO(b/148086935): Replace with a buffer allocator.
856 size_t space = size + alignment;
857 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
858 void *ptr = buf.get();
859 void *alignedPtr = std::align(alignment, size, ptr, space);
860 ASSERT(alignedPtr);
861 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500862 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500863 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500864 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400865
Nicolas Capens157ba262019-12-10 17:49:14 -0500866private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000867 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500868 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
869 std::size_t position;
870 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500871};
872
Antonio Maiorano62427e02020-02-13 09:18:05 -0500873#ifdef ENABLE_RR_PRINT
874void VPrintf(const std::vector<Value *> &vals)
875{
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500876 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(::printf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500877}
878#endif // ENABLE_RR_PRINT
879
Nicolas Capens157ba262019-12-10 17:49:14 -0500880Nucleus::Nucleus()
881{
Ben Clayton713b8d32019-12-17 20:37:56 +0000882 ::codegenMutex.lock(); // Reactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500883
884 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
885 Ice::ClFlags::getParsedClFlags(Flags);
886
Ben Clayton713b8d32019-12-17 20:37:56 +0000887#if defined(__arm__)
888 Flags.setTargetArch(Ice::Target_ARM32);
889 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
890#elif defined(__mips__)
891 Flags.setTargetArch(Ice::Target_MIPS32);
892 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
893#else // x86
894 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
895 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
896#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500897 Flags.setOutFileType(Ice::FT_Elf);
898 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
899 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
900 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
901 Flags.setDisableHybridAssembly(true);
902
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500903 // Emit functions into separate sections in the ELF so we can find them by name
904 Flags.setFunctionSections(true);
905
Nicolas Capens157ba262019-12-10 17:49:14 -0500906 static llvm::raw_os_ostream cout(std::cout);
907 static llvm::raw_os_ostream cerr(std::cerr);
908
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500909 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500910 {
911 // Decorate text asm with liveness info
912 Flags.setDecorateAsm(true);
913 }
914
Ben Clayton713b8d32019-12-17 20:37:56 +0000915 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500916 {
917 std::error_code errorCode;
918 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
919 ::elfFile = new Ice::ELFFileStreamer(*out);
920 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
921 }
922 else
923 {
924 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
925 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
926 ::routine = elfMemory;
927 }
928}
929
930Nucleus::~Nucleus()
931{
932 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500933 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500934
935 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500936 ::allocator = nullptr;
937
Nicolas Capens157ba262019-12-10 17:49:14 -0500938 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500939 ::function = nullptr;
940
Nicolas Capens157ba262019-12-10 17:49:14 -0500941 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500942 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500943
944 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500945 ::elfFile = nullptr;
946
Nicolas Capens157ba262019-12-10 17:49:14 -0500947 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500948 ::out = nullptr;
949
950 ::basicBlock = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500951
952 ::codegenMutex.unlock();
953}
954
955void Nucleus::setDefaultConfig(const Config &cfg)
956{
957 std::unique_lock<std::mutex> lock(::defaultConfigLock);
958 ::defaultConfig() = cfg;
959}
960
961void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
962{
963 std::unique_lock<std::mutex> lock(::defaultConfigLock);
964 auto &config = ::defaultConfig();
965 config = cfgEdit.apply(config);
966}
967
968Config Nucleus::getDefaultConfig()
969{
970 std::unique_lock<std::mutex> lock(::defaultConfigLock);
971 return ::defaultConfig();
972}
973
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500974// This function lowers and produces executable binary code in memory for the input functions,
975// and returns a Routine with the entry points to these functions.
976template<size_t Count>
977static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500978{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500979 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
980 // and GlobalContext::emitItems.
981
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500982 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500983 {
984 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500985 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500986 }
987
988 ::context->emitFileHeader();
989
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500990 // Translate
991
992 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500993 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500994 Ice::Cfg *currFunc = functions[i];
995
996 // Install function allocator in TLS for Cfg-specific container allocators
997 Ice::CfgLocalAllocatorScope allocScope(currFunc);
998
999 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
1000
1001 rr::optimize(currFunc);
1002
1003 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001004 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001005
1006 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001007 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001008
1009 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1010
1011 if(subzeroEmitTextAsm)
1012 {
1013 currFunc->emit();
1014 }
1015
1016 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -05001017 }
1018
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001019 // Emit items
1020
1021 ::context->lowerGlobals("");
1022
Nicolas Capens157ba262019-12-10 17:49:14 -05001023 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001024
1025 for(size_t i = 0; i < Count; ++i)
1026 {
1027 Ice::Cfg *currFunc = functions[i];
1028
1029 // Accumulate globals from functions to emit into the "last" section at the end
1030 auto globals = currFunc->getGlobalInits();
1031 if(globals && !globals->empty())
1032 {
1033 ::context->getGlobals()->merge(globals.get());
1034 }
1035
1036 auto assembler = currFunc->releaseAssembler();
1037 assembler->alignFunction();
1038 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1039 }
1040
Nicolas Capens157ba262019-12-10 17:49:14 -05001041 ::context->lowerGlobals("last");
1042 ::context->lowerConstants();
1043 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001044
Nicolas Capens157ba262019-12-10 17:49:14 -05001045 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001046 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001047 objectWriter->writeNonUserSections();
1048
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001049 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001050 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1051 ASSERT(entryPoints.size() == Count);
1052 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001053 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001054 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001055 }
1056
1057 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001058
1059 Routine *handoffRoutine = ::routine;
1060 ::routine = nullptr;
1061
1062 return std::shared_ptr<Routine>(handoffRoutine);
1063}
1064
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001065std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1066{
1067 createRetVoidIfNoRet();
1068 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1069}
1070
Nicolas Capens157ba262019-12-10 17:49:14 -05001071Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1072{
1073 Ice::Type type = T(t);
1074 int typeSize = Ice::typeWidthInBytes(type);
1075 int totalSize = typeSize * (arraySize ? arraySize : 1);
1076
1077 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1078 auto address = ::function->makeVariable(T(getPointerType(t)));
1079 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1080 ::function->getEntryNode()->getInsts().push_front(alloca);
1081
1082 return V(address);
1083}
1084
1085BasicBlock *Nucleus::createBasicBlock()
1086{
1087 return B(::function->makeNode());
1088}
1089
1090BasicBlock *Nucleus::getInsertBlock()
1091{
1092 return B(::basicBlock);
1093}
1094
1095void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1096{
Ben Clayton713b8d32019-12-17 20:37:56 +00001097 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001098
1099 Variable::materializeAll();
1100
1101 ::basicBlock = basicBlock;
1102}
1103
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001104void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001105{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001106 ASSERT(::function == nullptr);
1107 ASSERT(::allocator == nullptr);
1108 ASSERT(::basicBlock == nullptr);
1109
1110 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1111
1112 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1113 // becomes invalid if another one is created; for example, when creating await and destroy functions
1114 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1115 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001116 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1117
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001118 ::basicBlock = ::function->getEntryNode();
Nicolas Capens157ba262019-12-10 17:49:14 -05001119}
1120
1121Value *Nucleus::getArgument(unsigned int index)
1122{
1123 return V(::function->getArgs()[index]);
1124}
1125
1126void Nucleus::createRetVoid()
1127{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001128 RR_DEBUG_INFO_UPDATE_LOC();
1129
Nicolas Capens157ba262019-12-10 17:49:14 -05001130 // Code generated after this point is unreachable, so any variables
1131 // being read can safely return an undefined value. We have to avoid
1132 // materializing variables after the terminator ret instruction.
1133 Variable::killUnmaterialized();
1134
1135 Ice::InstRet *ret = Ice::InstRet::create(::function);
1136 ::basicBlock->appendInst(ret);
1137}
1138
1139void Nucleus::createRet(Value *v)
1140{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001141 RR_DEBUG_INFO_UPDATE_LOC();
1142
Nicolas Capens157ba262019-12-10 17:49:14 -05001143 // Code generated after this point is unreachable, so any variables
1144 // being read can safely return an undefined value. We have to avoid
1145 // materializing variables after the terminator ret instruction.
1146 Variable::killUnmaterialized();
1147
1148 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1149 ::basicBlock->appendInst(ret);
1150}
1151
1152void Nucleus::createBr(BasicBlock *dest)
1153{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001154 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001155 Variable::materializeAll();
1156
1157 auto br = Ice::InstBr::create(::function, dest);
1158 ::basicBlock->appendInst(br);
1159}
1160
1161void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1162{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001163 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001164 Variable::materializeAll();
1165
1166 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1167 ::basicBlock->appendInst(br);
1168}
1169
1170static bool isCommutative(Ice::InstArithmetic::OpKind op)
1171{
1172 switch(op)
1173 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001174 case Ice::InstArithmetic::Add:
1175 case Ice::InstArithmetic::Fadd:
1176 case Ice::InstArithmetic::Mul:
1177 case Ice::InstArithmetic::Fmul:
1178 case Ice::InstArithmetic::And:
1179 case Ice::InstArithmetic::Or:
1180 case Ice::InstArithmetic::Xor:
1181 return true;
1182 default:
1183 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001184 }
1185}
1186
1187static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1188{
1189 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1190
1191 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1192
1193 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1194 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1195 ::basicBlock->appendInst(arithmetic);
1196
1197 return V(result);
1198}
1199
1200Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1201{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001202 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001203 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1204}
1205
1206Value *Nucleus::createSub(Value *lhs, Value *rhs)
1207{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001208 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001209 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1210}
1211
1212Value *Nucleus::createMul(Value *lhs, Value *rhs)
1213{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001214 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001215 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1216}
1217
1218Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1219{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001220 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001221 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1222}
1223
1224Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1225{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001226 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001227 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1228}
1229
1230Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1231{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001232 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001233 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1234}
1235
1236Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1237{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001238 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001239 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1240}
1241
1242Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1243{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001244 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001245 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1246}
1247
1248Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1249{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001250 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001251 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1252}
1253
1254Value *Nucleus::createURem(Value *lhs, Value *rhs)
1255{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001256 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001257 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1258}
1259
1260Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1261{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001262 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001263 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1264}
1265
1266Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001268 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001269 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1270 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001271 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001272 return nullptr;
1273}
1274
1275RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1276{
1277 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001278}
1279
1280Value *Nucleus::createShl(Value *lhs, Value *rhs)
1281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001282 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001283 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1284}
1285
1286Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001289 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1290}
1291
1292Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001295 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1296}
1297
1298Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001301 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1302}
1303
1304Value *Nucleus::createOr(Value *lhs, Value *rhs)
1305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001307 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1308}
1309
1310Value *Nucleus::createXor(Value *lhs, Value *rhs)
1311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001313 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1314}
1315
1316Value *Nucleus::createNeg(Value *v)
1317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001319 return createSub(createNullValue(T(v->getType())), v);
1320}
1321
1322Value *Nucleus::createFNeg(Value *v)
1323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001324 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001325 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1326 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001327
1328 return createFSub(negativeZero, v);
1329}
1330
1331Value *Nucleus::createNot(Value *v)
1332{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001333 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001334 if(Ice::isScalarIntegerType(v->getType()))
1335 {
1336 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1337 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001338 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001339 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001340 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001341 return createXor(v, createConstantVector(c, T(v->getType())));
1342 }
1343}
1344
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001345static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1346{
1347#if defined(__i386__) || defined(__x86_64__)
1348 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1349 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1350 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1351#else
1352 if(atomic)
1353 {
1354 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1355 }
1356 if(memoryOrder != std::memory_order_relaxed)
1357 {
1358 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1359 }
1360#endif
1361
1362 // Vulkan doesn't allow sequential memory order
1363 ASSERT(memoryOrder != std::memory_order_seq_cst);
1364}
1365
Nicolas Capens157ba262019-12-10 17:49:14 -05001366Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1367{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001368 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001369 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001370
1371 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001372 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001373
Ben Clayton713b8d32019-12-17 20:37:56 +00001374 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001375 {
1376 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001377 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001378 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001379 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001380 auto pointer = RValue<Pointer<Byte>>(ptr);
1381 Int x = *Pointer<Int>(pointer);
1382
1383 Int4 vector;
1384 vector = Insert(vector, x, 0);
1385
Antonio Maiorano02a39532020-01-21 15:15:34 -05001386 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001387 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1388 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001389 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001390 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001391 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001392 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001393 auto pointer = RValue<Pointer<Byte>>(ptr);
1394 Int x = *Pointer<Int>(pointer);
1395 Int y = *Pointer<Int>(pointer + 4);
1396
1397 Int4 vector;
1398 vector = Insert(vector, x, 0);
1399 vector = Insert(vector, y, 1);
1400
Antonio Maiorano02a39532020-01-21 15:15:34 -05001401 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001402 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1403 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001404 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001405 else
1406 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001407 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001408 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001409 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001410 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001411 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001412 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001413 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1414 load->addArg(ptr);
1415 load->addArg(::context->getConstantInt32(typeSize(type)));
1416 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001417 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001418 }
1419 else
1420 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001421 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001422 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001423
Antonio Maiorano02a39532020-01-21 15:15:34 -05001424 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001425 return V(result);
1426}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001427
Nicolas Capens157ba262019-12-10 17:49:14 -05001428Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1429{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001430 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001431 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001432
Ben Clayton713b8d32019-12-17 20:37:56 +00001433#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001434 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001435 if(align != 0)
1436 {
1437 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1438 call->addArg(ptr);
1439 call->addArg(::context->getConstantInt64(typeSize(type)));
1440 ::basicBlock->appendInst(call);
1441 }
1442#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001443
Nicolas Capens157ba262019-12-10 17:49:14 -05001444 int valueType = (int)reinterpret_cast<intptr_t>(type);
1445
Ben Clayton713b8d32019-12-17 20:37:56 +00001446 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001447 {
1448 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001449 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001450 if(typeSize(type) == 4)
1451 {
1452 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1453 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1454 ::basicBlock->appendInst(bitcast);
1455
1456 RValue<Int4> v(V(vector));
1457
1458 auto pointer = RValue<Pointer<Byte>>(ptr);
1459 Int x = Extract(v, 0);
1460 *Pointer<Int>(pointer) = x;
1461 }
1462 else if(typeSize(type) == 8)
1463 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001464 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001465 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1466 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1467 ::basicBlock->appendInst(bitcast);
1468
1469 RValue<Int4> v(V(vector));
1470
1471 auto pointer = RValue<Pointer<Byte>>(ptr);
1472 Int x = Extract(v, 0);
1473 *Pointer<Int>(pointer) = x;
1474 Int y = Extract(v, 1);
1475 *Pointer<Int>(pointer + 4) = y;
1476 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001477 else
1478 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001479 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001480 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001481 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001482 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001483 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1484 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1485 store->addArg(value);
1486 store->addArg(ptr);
1487 store->addArg(::context->getConstantInt32(typeSize(type)));
1488 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001489 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001490 }
1491 else
1492 {
1493 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001494
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001495 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001496 ::basicBlock->appendInst(store);
1497 }
1498
1499 return value;
1500}
1501
1502Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1503{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001504 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001505 ASSERT(index->getType() == Ice::IceType_i32);
1506
1507 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1508 {
1509 int32_t offset = constant->getValue() * (int)typeSize(type);
1510
1511 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001512 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001513 return ptr;
1514 }
1515
Nicolas Capens157ba262019-12-10 17:49:14 -05001516 return createAdd(ptr, createConstantInt(offset));
1517 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001518
Nicolas Capens157ba262019-12-10 17:49:14 -05001519 if(!Ice::isByteSizedType(T(type)))
1520 {
1521 index = createMul(index, createConstantInt((int)typeSize(type)));
1522 }
1523
Ben Clayton713b8d32019-12-17 20:37:56 +00001524 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001525 {
1526 if(unsignedIndex)
1527 {
1528 index = createZExt(index, T(Ice::IceType_i64));
1529 }
1530 else
1531 {
1532 index = createSExt(index, T(Ice::IceType_i64));
1533 }
1534 }
1535
1536 return createAdd(ptr, index);
1537}
1538
Antonio Maiorano370cba52019-12-31 11:36:07 -05001539static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1540{
1541 Ice::Variable *result = ::function->makeVariable(value->getType());
1542
1543 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1544 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1545 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1546 auto op = ::context->getConstantInt32(rmwOp);
1547 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1548 inst->addArg(op);
1549 inst->addArg(ptr);
1550 inst->addArg(value);
1551 inst->addArg(order);
1552 ::basicBlock->appendInst(inst);
1553
1554 return V(result);
1555}
1556
Nicolas Capens157ba262019-12-10 17:49:14 -05001557Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1558{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001559 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001560 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001561}
1562
1563Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1564{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001565 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001566 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001567}
1568
1569Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1570{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001571 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001572 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001573}
1574
1575Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1576{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001577 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001578 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001579}
1580
1581Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1582{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001583 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001584 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001585}
1586
1587Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1588{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001589 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001590 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001591}
1592
1593Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1594{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001595 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001596 Ice::Variable *result = ::function->makeVariable(value->getType());
1597
1598 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1599 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1600 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1601 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1602 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1603 inst->addArg(ptr);
1604 inst->addArg(compare);
1605 inst->addArg(value);
1606 inst->addArg(orderEq);
1607 inst->addArg(orderNeq);
1608 ::basicBlock->appendInst(inst);
1609
1610 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001611}
1612
1613static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1614{
1615 if(v->getType() == T(destType))
1616 {
1617 return v;
1618 }
1619
1620 Ice::Variable *result = ::function->makeVariable(T(destType));
1621 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1622 ::basicBlock->appendInst(cast);
1623
1624 return V(result);
1625}
1626
1627Value *Nucleus::createTrunc(Value *v, Type *destType)
1628{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001629 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001630 return createCast(Ice::InstCast::Trunc, v, destType);
1631}
1632
1633Value *Nucleus::createZExt(Value *v, Type *destType)
1634{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001635 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001636 return createCast(Ice::InstCast::Zext, v, destType);
1637}
1638
1639Value *Nucleus::createSExt(Value *v, Type *destType)
1640{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001641 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001642 return createCast(Ice::InstCast::Sext, v, destType);
1643}
1644
1645Value *Nucleus::createFPToUI(Value *v, Type *destType)
1646{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001647 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001648 return createCast(Ice::InstCast::Fptoui, v, destType);
1649}
1650
1651Value *Nucleus::createFPToSI(Value *v, Type *destType)
1652{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001653 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001654 return createCast(Ice::InstCast::Fptosi, v, destType);
1655}
1656
1657Value *Nucleus::createSIToFP(Value *v, Type *destType)
1658{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001659 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001660 return createCast(Ice::InstCast::Sitofp, v, destType);
1661}
1662
1663Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1664{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001665 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001666 return createCast(Ice::InstCast::Fptrunc, v, destType);
1667}
1668
1669Value *Nucleus::createFPExt(Value *v, Type *destType)
1670{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001671 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001672 return createCast(Ice::InstCast::Fpext, v, destType);
1673}
1674
1675Value *Nucleus::createBitCast(Value *v, Type *destType)
1676{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001677 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001678 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1679 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1680 // emulate them by writing to the stack and reading back as the destination type.
1681 if(emulateMismatchedBitCast)
1682 {
1683 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1684 {
1685 Value *address = allocateStackVariable(destType);
1686 createStore(v, address, T(v->getType()));
1687 return createLoad(address, destType);
1688 }
1689 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1690 {
1691 Value *address = allocateStackVariable(T(v->getType()));
1692 createStore(v, address, T(v->getType()));
1693 return createLoad(address, destType);
1694 }
1695 }
1696
1697 return createCast(Ice::InstCast::Bitcast, v, destType);
1698}
1699
1700static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1701{
1702 ASSERT(lhs->getType() == rhs->getType());
1703
1704 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1705 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1706 ::basicBlock->appendInst(cmp);
1707
1708 return V(result);
1709}
1710
1711Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1712{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001713 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001714 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1715}
1716
1717Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1718{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001719 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001720 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1721}
1722
1723Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1724{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001725 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001726 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1727}
1728
1729Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1730{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001731 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001732 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1733}
1734
1735Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1736{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001737 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001738 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1739}
1740
1741Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1742{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001743 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001744 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1745}
1746
1747Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1748{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001749 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001750 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1751}
1752
1753Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1754{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001755 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001756 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1757}
1758
1759Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1760{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001761 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001762 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1763}
1764
1765Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1766{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001767 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001768 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1769}
1770
1771Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1772{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001773 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001774 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1775}
1776
1777static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1778{
1779 ASSERT(lhs->getType() == rhs->getType());
1780 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1781
1782 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1783 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1784 ::basicBlock->appendInst(cmp);
1785
1786 return V(result);
1787}
1788
1789Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001792 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1793}
1794
1795Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001798 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1799}
1800
1801Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1802{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001803 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001804 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1805}
1806
1807Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1808{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001809 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001810 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1811}
1812
1813Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1814{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001815 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001816 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1817}
1818
1819Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1820{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001821 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001822 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1823}
1824
1825Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1826{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001827 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001828 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1829}
1830
1831Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1832{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001833 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001834 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1835}
1836
1837Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001840 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1841}
1842
1843Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001846 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1847}
1848
1849Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001852 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1853}
1854
1855Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001858 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1859}
1860
1861Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001864 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1865}
1866
1867Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1868{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001869 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001870 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1871}
1872
1873Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1874{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001875 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001876 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001877 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001878 ::basicBlock->appendInst(extract);
1879
1880 return V(result);
1881}
1882
1883Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1884{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001885 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001886 auto result = ::function->makeVariable(vector->getType());
1887 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1888 ::basicBlock->appendInst(insert);
1889
1890 return V(result);
1891}
1892
1893Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1894{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001895 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001896 ASSERT(V1->getType() == V2->getType());
1897
1898 int size = Ice::typeNumElements(V1->getType());
1899 auto result = ::function->makeVariable(V1->getType());
1900 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1901
1902 for(int i = 0; i < size; i++)
1903 {
1904 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1905 }
1906
1907 ::basicBlock->appendInst(shuffle);
1908
1909 return V(result);
1910}
1911
1912Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1913{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001914 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001915 ASSERT(ifTrue->getType() == ifFalse->getType());
1916
1917 auto result = ::function->makeVariable(ifTrue->getType());
1918 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1919 ::basicBlock->appendInst(select);
1920
1921 return V(result);
1922}
1923
1924SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1925{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001926 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001927 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1928 ::basicBlock->appendInst(switchInst);
1929
Ben Clayton713b8d32019-12-17 20:37:56 +00001930 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001931}
1932
1933void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1934{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001935 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001936 switchCases->addBranch(label, label, branch);
1937}
1938
1939void Nucleus::createUnreachable()
1940{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001941 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001942 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1943 ::basicBlock->appendInst(unreachable);
1944}
1945
Antonio Maiorano62427e02020-02-13 09:18:05 -05001946Type *Nucleus::getType(Value *value)
1947{
1948 return T(V(value)->getType());
1949}
1950
1951Type *Nucleus::getContainedType(Type *vectorType)
1952{
1953 Ice::Type vecTy = T(vectorType);
1954 switch(vecTy)
1955 {
1956 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1957 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1958 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1959 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1960 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1961 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1962 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1963 default:
1964 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1965 return {};
1966 }
1967}
1968
Nicolas Capens157ba262019-12-10 17:49:14 -05001969Type *Nucleus::getPointerType(Type *ElementType)
1970{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001971 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001972}
1973
Antonio Maiorano62427e02020-02-13 09:18:05 -05001974static constexpr Ice::Type getNaturalIntType()
1975{
1976 constexpr size_t intSize = sizeof(int);
1977 static_assert(intSize == 4 || intSize == 8, "");
1978 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1979}
1980
1981Type *Nucleus::getPrintfStorageType(Type *valueType)
1982{
1983 Ice::Type valueTy = T(valueType);
1984 switch(valueTy)
1985 {
1986 case Ice::IceType_i32:
1987 return T(getNaturalIntType());
1988
1989 case Ice::IceType_f32:
1990 return T(Ice::IceType_f64);
1991
1992 default:
1993 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1994 return {};
1995 }
1996}
1997
Nicolas Capens157ba262019-12-10 17:49:14 -05001998Value *Nucleus::createNullValue(Type *Ty)
1999{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002000 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002001 if(Ice::isVectorType(T(Ty)))
2002 {
2003 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002004 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002005 return createConstantVector(c, Ty);
2006 }
2007 else
2008 {
2009 return V(::context->getConstantZero(T(Ty)));
2010 }
2011}
2012
2013Value *Nucleus::createConstantLong(int64_t i)
2014{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002015 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002016 return V(::context->getConstantInt64(i));
2017}
2018
2019Value *Nucleus::createConstantInt(int i)
2020{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002021 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002022 return V(::context->getConstantInt32(i));
2023}
2024
2025Value *Nucleus::createConstantInt(unsigned int i)
2026{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002027 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002028 return V(::context->getConstantInt32(i));
2029}
2030
2031Value *Nucleus::createConstantBool(bool b)
2032{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002033 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002034 return V(::context->getConstantInt1(b));
2035}
2036
2037Value *Nucleus::createConstantByte(signed char i)
2038{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002039 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002040 return V(::context->getConstantInt8(i));
2041}
2042
2043Value *Nucleus::createConstantByte(unsigned char i)
2044{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002045 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002046 return V(::context->getConstantInt8(i));
2047}
2048
2049Value *Nucleus::createConstantShort(short i)
2050{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002051 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002052 return V(::context->getConstantInt16(i));
2053}
2054
2055Value *Nucleus::createConstantShort(unsigned short i)
2056{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002057 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002058 return V(::context->getConstantInt16(i));
2059}
2060
2061Value *Nucleus::createConstantFloat(float x)
2062{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002063 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002064 return V(::context->getConstantFloat(x));
2065}
2066
2067Value *Nucleus::createNullPointer(Type *Ty)
2068{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002069 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002070 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002071}
2072
Antonio Maiorano02a39532020-01-21 15:15:34 -05002073static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2074{
2075 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2076}
2077
Nicolas Capens157ba262019-12-10 17:49:14 -05002078Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2079{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002080 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002081 const int vectorSize = 16;
2082 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2083 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002084
2085 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002086 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002087
Antonio Maioranoa0957112020-03-04 15:06:19 -05002088 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002089 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002090
2091 switch((int)reinterpret_cast<intptr_t>(type))
2092 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002093 case Ice::IceType_v4i32:
2094 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002095 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002096 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002097 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002098 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002099 }
2100 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002101 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002102 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002103 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002104 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002105 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002106 }
2107 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002108 case Ice::IceType_v8i16:
2109 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002110 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002111 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002112 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002113 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002114 }
2115 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002116 case Ice::IceType_v16i8:
2117 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002118 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002119 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002120 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002121 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002122 }
2123 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002124 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002125 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002126 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002127 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002128 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002129 }
2130 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002131 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002132 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002133 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002134 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002135 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002136 }
2137 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002138 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002139 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002140 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002141 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002142 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002143 }
2144 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002145 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002146 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002147 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002148 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002149 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002150 }
2151 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002152 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002153 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002154 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002155 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002156 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002157 }
2158 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002159 default:
2160 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002161 }
2162
Antonio Maiorano02a39532020-01-21 15:15:34 -05002163 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002164
Antonio Maiorano02a39532020-01-21 15:15:34 -05002165 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002166 return V(result);
2167}
2168
2169Value *Nucleus::createConstantVector(const double *constants, Type *type)
2170{
Ben Clayton713b8d32019-12-17 20:37:56 +00002171 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002172}
2173
Antonio Maiorano62427e02020-02-13 09:18:05 -05002174Value *Nucleus::createConstantString(const char *v)
2175{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002176 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002177 return V(IceConstantData(v, strlen(v) + 1));
2178}
2179
Nicolas Capens157ba262019-12-10 17:49:14 -05002180Type *Void::getType()
2181{
2182 return T(Ice::IceType_void);
2183}
2184
2185Type *Bool::getType()
2186{
2187 return T(Ice::IceType_i1);
2188}
2189
2190Type *Byte::getType()
2191{
2192 return T(Ice::IceType_i8);
2193}
2194
2195Type *SByte::getType()
2196{
2197 return T(Ice::IceType_i8);
2198}
2199
2200Type *Short::getType()
2201{
2202 return T(Ice::IceType_i16);
2203}
2204
2205Type *UShort::getType()
2206{
2207 return T(Ice::IceType_i16);
2208}
2209
2210Type *Byte4::getType()
2211{
2212 return T(Type_v4i8);
2213}
2214
2215Type *SByte4::getType()
2216{
2217 return T(Type_v4i8);
2218}
2219
Ben Clayton713b8d32019-12-17 20:37:56 +00002220namespace {
2221RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002222{
Ben Clayton713b8d32019-12-17 20:37:56 +00002223 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002224}
2225
Ben Clayton713b8d32019-12-17 20:37:56 +00002226RValue<Byte> Extract(RValue<Byte8> val, int i)
2227{
2228 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
2229}
2230
2231RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2232{
2233 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
2234}
2235} // namespace
2236
Nicolas Capens157ba262019-12-10 17:49:14 -05002237RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2238{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002239 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002240 if(emulateIntrinsics)
2241 {
2242 Byte8 result;
2243 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2244 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2245 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2246 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2247 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2248 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2249 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2250 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2251
2252 return result;
2253 }
2254 else
2255 {
2256 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002257 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002258 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2259 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2260 paddusb->addArg(x.value);
2261 paddusb->addArg(y.value);
2262 ::basicBlock->appendInst(paddusb);
2263
2264 return RValue<Byte8>(V(result));
2265 }
2266}
2267
2268RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002271 if(emulateIntrinsics)
2272 {
2273 Byte8 result;
2274 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2275 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2276 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2277 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2278 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2279 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2280 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2281 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2282
2283 return result;
2284 }
2285 else
2286 {
2287 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002288 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002289 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2290 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2291 psubusw->addArg(x.value);
2292 psubusw->addArg(y.value);
2293 ::basicBlock->appendInst(psubusw);
2294
2295 return RValue<Byte8>(V(result));
2296 }
2297}
2298
2299RValue<SByte> Extract(RValue<SByte8> val, int i)
2300{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002301 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002302 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
2303}
2304
2305RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2306{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002307 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002308 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2309}
2310
2311RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2312{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002313 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002314 if(emulateIntrinsics)
2315 {
2316 SByte8 result;
2317 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2318 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2319 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2320 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2321 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2322 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2323 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2324 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2325
2326 return result;
2327 }
2328 else
2329 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002330#if defined(__i386__) || defined(__x86_64__)
2331 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2332 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2333 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002334
Ben Clayton713b8d32019-12-17 20:37:56 +00002335 return As<SByte8>(hi | lo);
2336#else
2337 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2338#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002339 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002340}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002341
Nicolas Capens157ba262019-12-10 17:49:14 -05002342RValue<Int> SignMask(RValue<Byte8> x)
2343{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002344 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002345 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002346 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002347 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2348 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002349 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002350 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002351 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002352 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002353 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002354 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2355 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2356 movmsk->addArg(x.value);
2357 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002358
Nicolas Capens157ba262019-12-10 17:49:14 -05002359 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002360 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002361}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002362
2363// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2364// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05002365// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002366// }
2367
Nicolas Capens157ba262019-12-10 17:49:14 -05002368RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2369{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002370 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002371 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2372}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002373
Nicolas Capens157ba262019-12-10 17:49:14 -05002374Type *Byte8::getType()
2375{
2376 return T(Type_v8i8);
2377}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002378
Nicolas Capens598f8d82016-09-26 15:09:10 -04002379// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2380// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002381// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002382// }
2383
2384// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2385// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002386// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002387// }
2388
Nicolas Capens157ba262019-12-10 17:49:14 -05002389RValue<SByte> SaturateSigned(RValue<Short> x)
2390{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002391 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002392 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2393}
2394
2395RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2396{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002397 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002398 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002399 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002400 SByte8 result;
2401 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2402 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2403 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2404 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2405 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2406 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2407 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2408 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002409
Nicolas Capens157ba262019-12-10 17:49:14 -05002410 return result;
2411 }
2412 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002413 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002414 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002415 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002416 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2417 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2418 paddsb->addArg(x.value);
2419 paddsb->addArg(y.value);
2420 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002421
Nicolas Capens157ba262019-12-10 17:49:14 -05002422 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002423 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002424}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002425
Nicolas Capens157ba262019-12-10 17:49:14 -05002426RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2427{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002428 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002429 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002430 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002431 SByte8 result;
2432 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2433 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2434 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2435 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2436 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2437 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2438 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2439 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002440
Nicolas Capens157ba262019-12-10 17:49:14 -05002441 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002442 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002443 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002444 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002445 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002446 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002447 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2448 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2449 psubsb->addArg(x.value);
2450 psubsb->addArg(y.value);
2451 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002452
Nicolas Capens157ba262019-12-10 17:49:14 -05002453 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002454 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002455}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002456
Nicolas Capens157ba262019-12-10 17:49:14 -05002457RValue<Int> SignMask(RValue<SByte8> x)
2458{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002459 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002460 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002461 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002462 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2463 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002464 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002465 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002466 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002467 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002468 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002469 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2470 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2471 movmsk->addArg(x.value);
2472 ::basicBlock->appendInst(movmsk);
2473
2474 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002475 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002476}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002477
Nicolas Capens157ba262019-12-10 17:49:14 -05002478RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2479{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002480 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002481 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2482}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002483
Nicolas Capens157ba262019-12-10 17:49:14 -05002484RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2485{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002486 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002487 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2488}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002489
Nicolas Capens157ba262019-12-10 17:49:14 -05002490Type *SByte8::getType()
2491{
2492 return T(Type_v8i8);
2493}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002494
Nicolas Capens157ba262019-12-10 17:49:14 -05002495Type *Byte16::getType()
2496{
2497 return T(Ice::IceType_v16i8);
2498}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002499
Nicolas Capens157ba262019-12-10 17:49:14 -05002500Type *SByte16::getType()
2501{
2502 return T(Ice::IceType_v16i8);
2503}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002504
Nicolas Capens157ba262019-12-10 17:49:14 -05002505Type *Short2::getType()
2506{
2507 return T(Type_v2i16);
2508}
Nicolas Capensd4227962016-11-09 14:24:25 -05002509
Nicolas Capens157ba262019-12-10 17:49:14 -05002510Type *UShort2::getType()
2511{
2512 return T(Type_v2i16);
2513}
Nicolas Capensd4227962016-11-09 14:24:25 -05002514
Nicolas Capens157ba262019-12-10 17:49:14 -05002515Short4::Short4(RValue<Int4> cast)
2516{
Ben Clayton713b8d32019-12-17 20:37:56 +00002517 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002518 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2519 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2520
2521 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
2522 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2523
2524 storeValue(short4);
2525}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002526
2527// Short4::Short4(RValue<Float> cast)
2528// {
2529// }
2530
Nicolas Capens157ba262019-12-10 17:49:14 -05002531Short4::Short4(RValue<Float4> cast)
2532{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002533 // TODO(b/150791192): Generalize and optimize
2534 auto smin = std::numeric_limits<short>::min();
2535 auto smax = std::numeric_limits<short>::max();
2536 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002537}
2538
2539RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2540{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002541 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002542 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002543 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002544 Short4 result;
2545 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2546 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2547 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2548 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002549
2550 return result;
2551 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002552 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002553 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002554 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2555 }
2556}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002557
Nicolas Capens157ba262019-12-10 17:49:14 -05002558RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2559{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002560 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002561 if(emulateIntrinsics)
2562 {
2563 Short4 result;
2564 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2565 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2566 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2567 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2568
2569 return result;
2570 }
2571 else
2572 {
2573 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2574 }
2575}
2576
2577RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2578{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002579 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002580 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2581 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2582 ::basicBlock->appendInst(cmp);
2583
2584 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2585 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2586 ::basicBlock->appendInst(select);
2587
2588 return RValue<Short4>(V(result));
2589}
2590
2591RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2592{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002593 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002594 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2595 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2596 ::basicBlock->appendInst(cmp);
2597
2598 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2599 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2600 ::basicBlock->appendInst(select);
2601
2602 return RValue<Short4>(V(result));
2603}
2604
2605RValue<Short> SaturateSigned(RValue<Int> x)
2606{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002607 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002608 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2609}
2610
2611RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2612{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002613 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002614 if(emulateIntrinsics)
2615 {
2616 Short4 result;
2617 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2618 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2619 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2620 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2621
2622 return result;
2623 }
2624 else
2625 {
2626 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002627 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002628 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2629 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2630 paddsw->addArg(x.value);
2631 paddsw->addArg(y.value);
2632 ::basicBlock->appendInst(paddsw);
2633
2634 return RValue<Short4>(V(result));
2635 }
2636}
2637
2638RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2639{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002640 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002641 if(emulateIntrinsics)
2642 {
2643 Short4 result;
2644 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2645 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2646 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2647 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2648
2649 return result;
2650 }
2651 else
2652 {
2653 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002654 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002655 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2656 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2657 psubsw->addArg(x.value);
2658 psubsw->addArg(y.value);
2659 ::basicBlock->appendInst(psubsw);
2660
2661 return RValue<Short4>(V(result));
2662 }
2663}
2664
2665RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2666{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002667 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002668 if(emulateIntrinsics)
2669 {
2670 Short4 result;
2671 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2672 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2673 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2674 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2675
2676 return result;
2677 }
2678 else
2679 {
2680 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002681 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002682 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2683 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2684 pmulhw->addArg(x.value);
2685 pmulhw->addArg(y.value);
2686 ::basicBlock->appendInst(pmulhw);
2687
2688 return RValue<Short4>(V(result));
2689 }
2690}
2691
2692RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2693{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002694 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002695 if(emulateIntrinsics)
2696 {
2697 Int2 result;
2698 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2699 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2700
2701 return result;
2702 }
2703 else
2704 {
2705 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002706 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002707 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2708 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2709 pmaddwd->addArg(x.value);
2710 pmaddwd->addArg(y.value);
2711 ::basicBlock->appendInst(pmaddwd);
2712
2713 return As<Int2>(V(result));
2714 }
2715}
2716
2717RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2718{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002719 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002720 if(emulateIntrinsics)
2721 {
2722 SByte8 result;
2723 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2724 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2725 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2726 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2727 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2728 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2729 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2730 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2731
2732 return result;
2733 }
2734 else
2735 {
2736 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002737 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002738 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2739 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2740 pack->addArg(x.value);
2741 pack->addArg(y.value);
2742 ::basicBlock->appendInst(pack);
2743
2744 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2745 }
2746}
2747
2748RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2749{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002750 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002751 if(emulateIntrinsics)
2752 {
2753 Byte8 result;
2754 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2755 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2756 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2757 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2758 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2759 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2760 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2761 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2762
2763 return result;
2764 }
2765 else
2766 {
2767 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002768 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002769 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2770 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2771 pack->addArg(x.value);
2772 pack->addArg(y.value);
2773 ::basicBlock->appendInst(pack);
2774
2775 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2776 }
2777}
2778
2779RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2780{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002781 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002782 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2783}
2784
2785RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2786{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002787 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002788 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2789}
2790
2791Type *Short4::getType()
2792{
2793 return T(Type_v4i16);
2794}
2795
2796UShort4::UShort4(RValue<Float4> cast, bool saturate)
2797{
2798 if(saturate)
2799 {
2800 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002801 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002802 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2803 // PackUnsigned takes care of 0x0000 saturation.
2804 Int4 int4(Min(cast, Float4(0xFFFF)));
2805 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002806 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002807 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002808 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002809 // ARM saturates the 32-bit integer result on overflow/undeflow.
2810 Int4 int4(cast);
2811 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002812 }
2813 else
2814 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002815 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002816 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002817 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002818 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002819 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002820 *this = Short4(Int4(cast));
2821 }
2822}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002823
Nicolas Capens157ba262019-12-10 17:49:14 -05002824RValue<UShort> Extract(RValue<UShort4> val, int i)
2825{
2826 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2827}
2828
2829RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2830{
2831 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2832}
2833
2834RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2835{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002836 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002837 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002838
Nicolas Capens157ba262019-12-10 17:49:14 -05002839 {
2840 UShort4 result;
2841 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2842 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2843 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2844 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2845
2846 return result;
2847 }
2848 else
2849 {
2850 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2851 }
2852}
2853
2854RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2855{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002856 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002857 if(emulateIntrinsics)
2858 {
2859 UShort4 result;
2860 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2861 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2862 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2863 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2864
2865 return result;
2866 }
2867 else
2868 {
2869 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2870 }
2871}
2872
2873RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2874{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002875 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002876 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2877 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2878 ::basicBlock->appendInst(cmp);
2879
2880 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2881 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2882 ::basicBlock->appendInst(select);
2883
2884 return RValue<UShort4>(V(result));
2885}
2886
2887RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2888{
2889 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2890 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2891 ::basicBlock->appendInst(cmp);
2892
2893 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2894 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2895 ::basicBlock->appendInst(select);
2896
2897 return RValue<UShort4>(V(result));
2898}
2899
2900RValue<UShort> SaturateUnsigned(RValue<Int> x)
2901{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002902 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002903 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2904}
2905
2906RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2907{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002908 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002909 if(emulateIntrinsics)
2910 {
2911 UShort4 result;
2912 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2913 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2914 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2915 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2916
2917 return result;
2918 }
2919 else
2920 {
2921 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002922 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002923 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2924 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2925 paddusw->addArg(x.value);
2926 paddusw->addArg(y.value);
2927 ::basicBlock->appendInst(paddusw);
2928
2929 return RValue<UShort4>(V(result));
2930 }
2931}
2932
2933RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2934{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002935 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002936 if(emulateIntrinsics)
2937 {
2938 UShort4 result;
2939 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2940 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2941 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2942 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2943
2944 return result;
2945 }
2946 else
2947 {
2948 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002949 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002950 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2951 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2952 psubusw->addArg(x.value);
2953 psubusw->addArg(y.value);
2954 ::basicBlock->appendInst(psubusw);
2955
2956 return RValue<UShort4>(V(result));
2957 }
2958}
2959
2960RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2961{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002962 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002963 if(emulateIntrinsics)
2964 {
2965 UShort4 result;
2966 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2967 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2968 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2969 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2970
2971 return result;
2972 }
2973 else
2974 {
2975 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002976 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002977 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2978 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2979 pmulhuw->addArg(x.value);
2980 pmulhuw->addArg(y.value);
2981 ::basicBlock->appendInst(pmulhuw);
2982
2983 return RValue<UShort4>(V(result));
2984 }
2985}
2986
2987RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2988{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002989 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002990 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2991
2992 // Scalarized implementation.
2993 Int4 result;
2994 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2995 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2996 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2997 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2998
2999 return result;
3000}
3001
3002RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3003{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003004 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003005 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3006
3007 if(false) // Partial product based implementation.
3008 {
3009 auto xh = x >> 16;
3010 auto yh = y >> 16;
3011 auto xl = x & UInt4(0x0000FFFF);
3012 auto yl = y & UInt4(0x0000FFFF);
3013 auto xlyh = xl * yh;
3014 auto xhyl = xh * yl;
3015 auto xlyhh = xlyh >> 16;
3016 auto xhylh = xhyl >> 16;
3017 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3018 auto xhyll = xhyl & UInt4(0x0000FFFF);
3019 auto xlylh = (xl * yl) >> 16;
3020 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3021
3022 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003023 }
3024
Nicolas Capens157ba262019-12-10 17:49:14 -05003025 // Scalarized implementation.
3026 Int4 result;
3027 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3028 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3029 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3030 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3031
3032 return As<UInt4>(result);
3033}
3034
3035RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3036{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003037 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003038 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003039 return UShort4(0);
3040}
3041
3042Type *UShort4::getType()
3043{
3044 return T(Type_v4i16);
3045}
3046
3047RValue<Short> Extract(RValue<Short8> val, int i)
3048{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003049 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003050 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3051}
3052
3053RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3054{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003055 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003056 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
3057}
3058
3059RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3060{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003061 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003062 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003063 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003064 Short8 result;
3065 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3066 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3067 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3068 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3069 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3070 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3071 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3072 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003073
Nicolas Capens157ba262019-12-10 17:49:14 -05003074 return result;
3075 }
3076 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003077 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003078 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003079 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003080}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003081
Nicolas Capens157ba262019-12-10 17:49:14 -05003082RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3083{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003084 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003085 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003086 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003087 Short8 result;
3088 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3089 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3090 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3091 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3092 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3093 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3094 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3095 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003096
Nicolas Capens157ba262019-12-10 17:49:14 -05003097 return result;
3098 }
3099 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003100 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003101 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003102 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003103}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003104
Nicolas Capens157ba262019-12-10 17:49:14 -05003105RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3106{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003107 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003108 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003109 return Int4(0);
3110}
3111
3112RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3113{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003114 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003115 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003116 return Short8(0);
3117}
3118
3119Type *Short8::getType()
3120{
3121 return T(Ice::IceType_v8i16);
3122}
3123
3124RValue<UShort> Extract(RValue<UShort8> val, int i)
3125{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003126 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003127 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
3128}
3129
3130RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3131{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003132 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003133 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
3134}
3135
3136RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3137{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003138 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003139 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003140 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003141 UShort8 result;
3142 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3143 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3144 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3145 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3146 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3147 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3148 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3149 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003150
Nicolas Capens157ba262019-12-10 17:49:14 -05003151 return result;
3152 }
3153 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003154 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003155 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003156 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003157}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003158
Nicolas Capens157ba262019-12-10 17:49:14 -05003159RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3160{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003161 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003162 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003163 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003164 UShort8 result;
3165 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3166 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3167 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3168 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3169 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3170 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3171 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3172 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003173
Nicolas Capens157ba262019-12-10 17:49:14 -05003174 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003175 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003176 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003177 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003178 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003179 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003180}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003181
Nicolas Capens157ba262019-12-10 17:49:14 -05003182RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3183{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003184 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003185 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003186 return UShort8(0);
3187}
3188
Nicolas Capens157ba262019-12-10 17:49:14 -05003189Type *UShort8::getType()
3190{
3191 return T(Ice::IceType_v8i16);
3192}
3193
Ben Clayton713b8d32019-12-17 20:37:56 +00003194RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003195{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003196 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003197 RValue<Int> res = val;
3198 val += 1;
3199 return res;
3200}
3201
Ben Clayton713b8d32019-12-17 20:37:56 +00003202const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003203{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003204 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003205 val += 1;
3206 return val;
3207}
3208
Ben Clayton713b8d32019-12-17 20:37:56 +00003209RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003210{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003211 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003212 RValue<Int> res = val;
3213 val -= 1;
3214 return res;
3215}
3216
Ben Clayton713b8d32019-12-17 20:37:56 +00003217const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003218{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003219 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003220 val -= 1;
3221 return val;
3222}
3223
3224RValue<Int> RoundInt(RValue<Float> cast)
3225{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003226 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003227 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003228 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003229 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3230 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003231 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003232 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003233 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003234 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003235 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003236 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3237 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3238 nearbyint->addArg(cast.value);
3239 ::basicBlock->appendInst(nearbyint);
3240
3241 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003242 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003243}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003244
Nicolas Capens157ba262019-12-10 17:49:14 -05003245Type *Int::getType()
3246{
3247 return T(Ice::IceType_i32);
3248}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003249
Nicolas Capens157ba262019-12-10 17:49:14 -05003250Type *Long::getType()
3251{
3252 return T(Ice::IceType_i64);
3253}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003254
Nicolas Capens157ba262019-12-10 17:49:14 -05003255UInt::UInt(RValue<Float> cast)
3256{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003257 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003258 // Smallest positive value representable in UInt, but not in Int
3259 const unsigned int ustart = 0x80000000u;
3260 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003261
Nicolas Capens157ba262019-12-10 17:49:14 -05003262 // If the value is negative, store 0, otherwise store the result of the conversion
3263 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003264 // Check if the value can be represented as an Int
3265 IfThenElse(cast >= ustartf,
3266 // If the value is too large, subtract ustart and re-add it after conversion.
3267 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3268 // Otherwise, just convert normally
3269 Int(cast)))
3270 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05003271}
Nicolas Capensa8086512016-11-07 17:32:17 -05003272
Ben Clayton713b8d32019-12-17 20:37:56 +00003273RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003274{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003275 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003276 RValue<UInt> res = val;
3277 val += 1;
3278 return res;
3279}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003280
Ben Clayton713b8d32019-12-17 20:37:56 +00003281const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003282{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003283 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003284 val += 1;
3285 return val;
3286}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003287
Ben Clayton713b8d32019-12-17 20:37:56 +00003288RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003289{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003290 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003291 RValue<UInt> res = val;
3292 val -= 1;
3293 return res;
3294}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003295
Ben Clayton713b8d32019-12-17 20:37:56 +00003296const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003297{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003298 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003299 val -= 1;
3300 return val;
3301}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003302
Nicolas Capens598f8d82016-09-26 15:09:10 -04003303// RValue<UInt> RoundUInt(RValue<Float> cast)
3304// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003305// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003306// }
3307
Nicolas Capens157ba262019-12-10 17:49:14 -05003308Type *UInt::getType()
3309{
3310 return T(Ice::IceType_i32);
3311}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003312
3313// Int2::Int2(RValue<Int> cast)
3314// {
3315// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
3316// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
3317//
3318// Constant *shuffle[2];
3319// shuffle[0] = Nucleus::createConstantInt(0);
3320// shuffle[1] = Nucleus::createConstantInt(0);
3321//
3322// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
3323//
3324// storeValue(replicate);
3325// }
3326
Nicolas Capens157ba262019-12-10 17:49:14 -05003327RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3328{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003329 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003330 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003332 Int2 result;
3333 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3334 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003335
Nicolas Capens157ba262019-12-10 17:49:14 -05003336 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003337 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003338 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003339 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003340 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003341 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003342}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003343
Nicolas Capens157ba262019-12-10 17:49:14 -05003344RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3345{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003346 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003347 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003348 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003349 Int2 result;
3350 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3351 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3352
3353 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003354 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003355 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003356 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003357 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003358 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003359}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003360
Nicolas Capens157ba262019-12-10 17:49:14 -05003361Type *Int2::getType()
3362{
3363 return T(Type_v2i32);
3364}
3365
3366RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3367{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003368 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003369 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003370 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003371 UInt2 result;
3372 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3373 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003374
Nicolas Capens157ba262019-12-10 17:49:14 -05003375 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003376 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003377 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003378 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003379 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003380 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003381}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003382
Nicolas Capens157ba262019-12-10 17:49:14 -05003383RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3384{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003385 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003386 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003387 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003388 UInt2 result;
3389 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3390 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003391
Nicolas Capens157ba262019-12-10 17:49:14 -05003392 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003393 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003394 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003395 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003396 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003397 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003398}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003399
Nicolas Capens157ba262019-12-10 17:49:14 -05003400Type *UInt2::getType()
3401{
3402 return T(Type_v2i32);
3403}
3404
Ben Clayton713b8d32019-12-17 20:37:56 +00003405Int4::Int4(RValue<Byte4> cast)
3406 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003407{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003408 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003409 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3410 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3411
3412 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003413 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003414 Value *b = Nucleus::createBitCast(a, Byte16::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003415 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003416
Ben Clayton713b8d32019-12-17 20:37:56 +00003417 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003418 Value *d = Nucleus::createBitCast(c, Short8::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003419 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003420
3421 Value *f = Nucleus::createBitCast(e, Int4::getType());
3422 storeValue(f);
3423}
3424
Ben Clayton713b8d32019-12-17 20:37:56 +00003425Int4::Int4(RValue<SByte4> cast)
3426 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003427{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003428 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003429 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3430 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3431
Ben Clayton713b8d32019-12-17 20:37:56 +00003432 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003433 Value *b = Nucleus::createBitCast(a, Byte16::getType());
3434 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3435
Ben Clayton713b8d32019-12-17 20:37:56 +00003436 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003437 Value *d = Nucleus::createBitCast(c, Short8::getType());
3438 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3439
3440 *this = As<Int4>(e) >> 24;
3441}
3442
Ben Clayton713b8d32019-12-17 20:37:56 +00003443Int4::Int4(RValue<Short4> cast)
3444 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003445{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003446 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003447 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003448 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3449
3450 *this = As<Int4>(c) >> 16;
3451}
3452
Ben Clayton713b8d32019-12-17 20:37:56 +00003453Int4::Int4(RValue<UShort4> cast)
3454 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003455{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003456 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003457 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003458 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
3459 Value *d = Nucleus::createBitCast(c, Int4::getType());
3460 storeValue(d);
3461}
3462
Ben Clayton713b8d32019-12-17 20:37:56 +00003463Int4::Int4(RValue<Int> rhs)
3464 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003465{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003466 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003467 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
3468
Ben Clayton713b8d32019-12-17 20:37:56 +00003469 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003470 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3471
3472 storeValue(replicate);
3473}
3474
3475RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3476{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003477 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003478 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003479 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003480 Int4 result;
3481 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3482 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3483 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3484 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003485
Nicolas Capens157ba262019-12-10 17:49:14 -05003486 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003487 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003488 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003489 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003490 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003491 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003492}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003493
Nicolas Capens157ba262019-12-10 17:49:14 -05003494RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3495{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003496 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003497 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003498 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003499 Int4 result;
3500 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3501 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3502 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3503 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003504
Nicolas Capens157ba262019-12-10 17:49:14 -05003505 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003506 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003507 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003508 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003509 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003510 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003511}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003512
Nicolas Capens157ba262019-12-10 17:49:14 -05003513RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3514{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003515 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003516 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
3517}
3518
3519RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3520{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003521 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003522 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
3523}
3524
3525RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3526{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003527 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003528 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
3529}
3530
3531RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3532{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003533 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003534 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
3535}
3536
3537RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3538{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003539 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003540 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
3541}
3542
3543RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3544{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003545 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003546 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
3547}
3548
3549RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3550{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003551 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003552 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3553 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3554 ::basicBlock->appendInst(cmp);
3555
3556 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3557 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3558 ::basicBlock->appendInst(select);
3559
3560 return RValue<Int4>(V(result));
3561}
3562
3563RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3564{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003565 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003566 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3567 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3568 ::basicBlock->appendInst(cmp);
3569
3570 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3571 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3572 ::basicBlock->appendInst(select);
3573
3574 return RValue<Int4>(V(result));
3575}
3576
3577RValue<Int4> RoundInt(RValue<Float4> cast)
3578{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003579 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003580 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003581 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003582 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3583 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003584 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003585 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003586 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003587 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003588 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003589 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3590 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3591 nearbyint->addArg(cast.value);
3592 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003593
3594 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003595 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003596}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003597
Nicolas Capens157ba262019-12-10 17:49:14 -05003598RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3599{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003600 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003601 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003602 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003603 Short8 result;
3604 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3605 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3606 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3607 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3608 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3609 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3610 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3611 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003612
Nicolas Capens157ba262019-12-10 17:49:14 -05003613 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003614 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003615 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003616 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003617 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003618 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003619 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3620 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3621 pack->addArg(x.value);
3622 pack->addArg(y.value);
3623 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003624
Nicolas Capens157ba262019-12-10 17:49:14 -05003625 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003626 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003627}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003628
Nicolas Capens157ba262019-12-10 17:49:14 -05003629RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3630{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003631 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003632 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003633 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003634 RValue<Int4> sx = As<Int4>(x);
3635 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003636
Nicolas Capens157ba262019-12-10 17:49:14 -05003637 RValue<Int4> sy = As<Int4>(y);
3638 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003639
Nicolas Capens157ba262019-12-10 17:49:14 -05003640 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003641 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003642 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003643 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003644 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003645 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003646 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3647 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3648 pack->addArg(x.value);
3649 pack->addArg(y.value);
3650 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003651
Nicolas Capens157ba262019-12-10 17:49:14 -05003652 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003653 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003654}
Nicolas Capens33438a62017-09-27 11:47:35 -04003655
Nicolas Capens157ba262019-12-10 17:49:14 -05003656RValue<Int> SignMask(RValue<Int4> x)
3657{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003658 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003659 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003660 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003661 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3662 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003663 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003664 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003665 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003666 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003667 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003668 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3669 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3670 movmsk->addArg(x.value);
3671 ::basicBlock->appendInst(movmsk);
3672
3673 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003674 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003675}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003676
Nicolas Capens157ba262019-12-10 17:49:14 -05003677Type *Int4::getType()
3678{
3679 return T(Ice::IceType_v4i32);
3680}
3681
Ben Clayton713b8d32019-12-17 20:37:56 +00003682UInt4::UInt4(RValue<Float4> cast)
3683 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003684{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003685 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003686 // Smallest positive value representable in UInt, but not in Int
3687 const unsigned int ustart = 0x80000000u;
3688 const float ustartf = float(ustart);
3689
3690 // Check if the value can be represented as an Int
3691 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3692 // If the value is too large, subtract ustart and re-add it after conversion.
3693 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003694 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003695 (~uiValue & Int4(cast));
3696 // If the value is negative, store 0, otherwise store the result of the conversion
3697 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3698}
3699
Ben Clayton713b8d32019-12-17 20:37:56 +00003700UInt4::UInt4(RValue<UInt> rhs)
3701 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003702{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003703 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003704 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3705
Ben Clayton713b8d32019-12-17 20:37:56 +00003706 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003707 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3708
3709 storeValue(replicate);
3710}
3711
3712RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3713{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003714 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003715 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003716 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003717 UInt4 result;
3718 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3719 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3720 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3721 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003722
Nicolas Capens157ba262019-12-10 17:49:14 -05003723 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003724 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003725 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003726 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003727 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003728 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003729}
Ben Clayton88816fa2019-05-15 17:08:14 +01003730
Nicolas Capens157ba262019-12-10 17:49:14 -05003731RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3732{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003733 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003734 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003735 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003736 UInt4 result;
3737 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3738 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3739 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3740 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003741
Nicolas Capens157ba262019-12-10 17:49:14 -05003742 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003743 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003744 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003745 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003746 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003747 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003748}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003749
Nicolas Capens157ba262019-12-10 17:49:14 -05003750RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3751{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003752 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003753 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3754}
3755
3756RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3757{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003758 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003759 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3760}
3761
3762RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3763{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003764 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003765 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3766}
3767
3768RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3769{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003770 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003771 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3772}
3773
3774RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3775{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003776 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003777 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3778}
3779
3780RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3781{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003782 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003783 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3784}
3785
3786RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3787{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003788 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003789 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3790 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3791 ::basicBlock->appendInst(cmp);
3792
3793 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3794 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3795 ::basicBlock->appendInst(select);
3796
3797 return RValue<UInt4>(V(result));
3798}
3799
3800RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3801{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003802 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003803 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3804 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3805 ::basicBlock->appendInst(cmp);
3806
3807 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3808 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3809 ::basicBlock->appendInst(select);
3810
3811 return RValue<UInt4>(V(result));
3812}
3813
3814Type *UInt4::getType()
3815{
3816 return T(Ice::IceType_v4i32);
3817}
3818
3819Type *Half::getType()
3820{
3821 return T(Ice::IceType_i16);
3822}
3823
3824RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3825{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003826 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003827 return 1.0f / x;
3828}
3829
3830RValue<Float> RcpSqrt_pp(RValue<Float> x)
3831{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003832 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003833 return Rcp_pp(Sqrt(x));
3834}
3835
3836RValue<Float> Sqrt(RValue<Float> x)
3837{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003838 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003839 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003840 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003841 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3842 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3843 sqrt->addArg(x.value);
3844 ::basicBlock->appendInst(sqrt);
3845
3846 return RValue<Float>(V(result));
3847}
3848
3849RValue<Float> Round(RValue<Float> x)
3850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003852 return Float4(Round(Float4(x))).x;
3853}
3854
3855RValue<Float> Trunc(RValue<Float> x)
3856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003858 return Float4(Trunc(Float4(x))).x;
3859}
3860
3861RValue<Float> Frac(RValue<Float> x)
3862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003864 return Float4(Frac(Float4(x))).x;
3865}
3866
3867RValue<Float> Floor(RValue<Float> x)
3868{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003869 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003870 return Float4(Floor(Float4(x))).x;
3871}
3872
3873RValue<Float> Ceil(RValue<Float> x)
3874{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003875 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003876 return Float4(Ceil(Float4(x))).x;
3877}
3878
3879Type *Float::getType()
3880{
3881 return T(Ice::IceType_f32);
3882}
3883
3884Type *Float2::getType()
3885{
3886 return T(Type_v2f32);
3887}
3888
Ben Clayton713b8d32019-12-17 20:37:56 +00003889Float4::Float4(RValue<Float> rhs)
3890 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003891{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003892 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003893 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3894
Ben Clayton713b8d32019-12-17 20:37:56 +00003895 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003896 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3897
3898 storeValue(replicate);
3899}
3900
3901RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3902{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003903 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003904 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3905 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3906 ::basicBlock->appendInst(cmp);
3907
3908 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3909 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3910 ::basicBlock->appendInst(select);
3911
3912 return RValue<Float4>(V(result));
3913}
3914
3915RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3916{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003917 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003918 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3919 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3920 ::basicBlock->appendInst(cmp);
3921
3922 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3923 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3924 ::basicBlock->appendInst(select);
3925
3926 return RValue<Float4>(V(result));
3927}
3928
3929RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003932 return Float4(1.0f) / x;
3933}
3934
3935RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3936{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003937 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003938 return Rcp_pp(Sqrt(x));
3939}
3940
3941RValue<Float4> Sqrt(RValue<Float4> x)
3942{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003943 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003944 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003945 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003946 Float4 result;
3947 result.x = Sqrt(Float(Float4(x).x));
3948 result.y = Sqrt(Float(Float4(x).y));
3949 result.z = Sqrt(Float(Float4(x).z));
3950 result.w = Sqrt(Float(Float4(x).w));
3951
3952 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003953 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003954 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003955 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003956 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003957 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003958 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3959 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3960 sqrt->addArg(x.value);
3961 ::basicBlock->appendInst(sqrt);
3962
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003963 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003964 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003965}
Nicolas Capens157ba262019-12-10 17:49:14 -05003966
3967RValue<Int> SignMask(RValue<Float4> x)
3968{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003969 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003970 if(emulateIntrinsics || CPUID::ARM)
3971 {
3972 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3973 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3974 }
3975 else
3976 {
3977 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003978 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003979 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3980 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3981 movmsk->addArg(x.value);
3982 ::basicBlock->appendInst(movmsk);
3983
3984 return RValue<Int>(V(result));
3985 }
3986}
3987
3988RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3989{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003990 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003991 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3992}
3993
3994RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3995{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003996 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003997 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3998}
3999
4000RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4001{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004002 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004003 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
4004}
4005
4006RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4007{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004008 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004009 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
4010}
4011
4012RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4013{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004014 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004015 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
4016}
4017
4018RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4019{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004020 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004021 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
4022}
4023
4024RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4025{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004026 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004027 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
4028}
4029
4030RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4031{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004032 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004033 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
4034}
4035
4036RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4037{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004038 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004039 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
4040}
4041
4042RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4043{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004044 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004045 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
4046}
4047
4048RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4049{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004050 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004051 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
4052}
4053
4054RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4055{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004056 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004057 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
4058}
4059
4060RValue<Float4> Round(RValue<Float4> x)
4061{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004062 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004063 if(emulateIntrinsics || CPUID::ARM)
4064 {
4065 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4066 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4067 }
4068 else if(CPUID::SSE4_1)
4069 {
4070 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004071 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004072 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4073 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4074 round->addArg(x.value);
4075 round->addArg(::context->getConstantInt32(0));
4076 ::basicBlock->appendInst(round);
4077
4078 return RValue<Float4>(V(result));
4079 }
4080 else
4081 {
4082 return Float4(RoundInt(x));
4083 }
4084}
4085
4086RValue<Float4> Trunc(RValue<Float4> x)
4087{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004088 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004089 if(CPUID::SSE4_1)
4090 {
4091 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004092 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004093 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4094 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4095 round->addArg(x.value);
4096 round->addArg(::context->getConstantInt32(3));
4097 ::basicBlock->appendInst(round);
4098
4099 return RValue<Float4>(V(result));
4100 }
4101 else
4102 {
4103 return Float4(Int4(x));
4104 }
4105}
4106
4107RValue<Float4> Frac(RValue<Float4> x)
4108{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004109 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004110 Float4 frc;
4111
4112 if(CPUID::SSE4_1)
4113 {
4114 frc = x - Floor(x);
4115 }
4116 else
4117 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004118 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004119
Ben Clayton713b8d32019-12-17 20:37:56 +00004120 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004121 }
4122
4123 // x - floor(x) can be 1.0 for very small negative x.
4124 // Clamp against the value just below 1.0.
4125 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4126}
4127
4128RValue<Float4> Floor(RValue<Float4> x)
4129{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004130 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004131 if(CPUID::SSE4_1)
4132 {
4133 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004134 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004135 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4136 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4137 round->addArg(x.value);
4138 round->addArg(::context->getConstantInt32(1));
4139 ::basicBlock->appendInst(round);
4140
4141 return RValue<Float4>(V(result));
4142 }
4143 else
4144 {
4145 return x - Frac(x);
4146 }
4147}
4148
4149RValue<Float4> Ceil(RValue<Float4> x)
4150{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004151 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004152 if(CPUID::SSE4_1)
4153 {
4154 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004155 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004156 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4157 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4158 round->addArg(x.value);
4159 round->addArg(::context->getConstantInt32(2));
4160 ::basicBlock->appendInst(round);
4161
4162 return RValue<Float4>(V(result));
4163 }
4164 else
4165 {
4166 return -Floor(-x);
4167 }
4168}
4169
4170Type *Float4::getType()
4171{
4172 return T(Ice::IceType_v4f32);
4173}
4174
4175RValue<Long> Ticks()
4176{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004177 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004178 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004179 return Long(Int(0));
4180}
4181
Ben Clayton713b8d32019-12-17 20:37:56 +00004182RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004183{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004184 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004185 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004186}
4187
Ben Clayton713b8d32019-12-17 20:37:56 +00004188RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004190 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004191 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004192}
4193
Ben Clayton713b8d32019-12-17 20:37:56 +00004194Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004195{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004196 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano16ae92a2020-03-10 10:53:24 -04004197 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004198}
4199
4200void Breakpoint()
4201{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004202 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004203 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004204 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4205 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4206 ::basicBlock->appendInst(trap);
4207}
4208
Ben Clayton713b8d32019-12-17 20:37:56 +00004209void Nucleus::createFence(std::memory_order memoryOrder)
4210{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004211 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004212 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4213 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4214 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4215 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4216 inst->addArg(order);
4217 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004218}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004219
Ben Clayton713b8d32019-12-17 20:37:56 +00004220Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4221{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004222 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004223 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004224 return nullptr;
4225}
4226void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4227{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004228 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004229 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004230}
Nicolas Capens157ba262019-12-10 17:49:14 -05004231
4232RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4233{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004234 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004235 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4236}
4237
4238RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4239{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004240 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004241 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4242}
4243
4244void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4245{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004246 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004247 return emulated::Scatter(base, val, offsets, mask, alignment);
4248}
4249
4250void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4251{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004252 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004253 return emulated::Scatter(base, val, offsets, mask, alignment);
4254}
4255
4256RValue<Float> Exp2(RValue<Float> x)
4257{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004258 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004259 return emulated::Exp2(x);
4260}
4261
4262RValue<Float> Log2(RValue<Float> x)
4263{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004264 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004265 return emulated::Log2(x);
4266}
4267
4268RValue<Float4> Sin(RValue<Float4> x)
4269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004271 return emulated::Sin(x);
4272}
4273
4274RValue<Float4> Cos(RValue<Float4> x)
4275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004277 return emulated::Cos(x);
4278}
4279
4280RValue<Float4> Tan(RValue<Float4> x)
4281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004282 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004283 return emulated::Tan(x);
4284}
4285
4286RValue<Float4> Asin(RValue<Float4> x)
4287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004289 return emulated::Asin(x);
4290}
4291
4292RValue<Float4> Acos(RValue<Float4> x)
4293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004295 return emulated::Acos(x);
4296}
4297
4298RValue<Float4> Atan(RValue<Float4> x)
4299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004301 return emulated::Atan(x);
4302}
4303
4304RValue<Float4> Sinh(RValue<Float4> x)
4305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004307 return emulated::Sinh(x);
4308}
4309
4310RValue<Float4> Cosh(RValue<Float4> x)
4311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004313 return emulated::Cosh(x);
4314}
4315
4316RValue<Float4> Tanh(RValue<Float4> x)
4317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004319 return emulated::Tanh(x);
4320}
4321
4322RValue<Float4> Asinh(RValue<Float4> x)
4323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004325 return emulated::Asinh(x);
4326}
4327
4328RValue<Float4> Acosh(RValue<Float4> x)
4329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004331 return emulated::Acosh(x);
4332}
4333
4334RValue<Float4> Atanh(RValue<Float4> x)
4335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004337 return emulated::Atanh(x);
4338}
4339
4340RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004343 return emulated::Atan2(x, y);
4344}
4345
4346RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004348 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004349 return emulated::Pow(x, y);
4350}
4351
4352RValue<Float4> Exp(RValue<Float4> x)
4353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004354 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004355 return emulated::Exp(x);
4356}
4357
4358RValue<Float4> Log(RValue<Float4> x)
4359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004360 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004361 return emulated::Log(x);
4362}
4363
4364RValue<Float4> Exp2(RValue<Float4> x)
4365{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004366 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004367 return emulated::Exp2(x);
4368}
4369
4370RValue<Float4> Log2(RValue<Float4> x)
4371{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004372 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004373 return emulated::Log2(x);
4374}
4375
4376RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4377{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004378 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004379 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004380 {
Ben Claytonce54c592020-02-07 11:30:51 +00004381 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004382 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004383 }
4384 else
4385 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004386 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004387 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4388 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4389 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4390 ctlz->addArg(x.value);
4391 ::basicBlock->appendInst(ctlz);
4392
4393 return RValue<UInt>(V(result));
4394 }
4395}
4396
4397RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4398{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004399 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004400 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004401 {
Ben Claytonce54c592020-02-07 11:30:51 +00004402 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004403 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004404 }
4405 else
4406 {
4407 // TODO: implement vectorized version in Subzero
4408 UInt4 result;
4409 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4410 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4411 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4412 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4413 return result;
4414 }
4415}
4416
4417RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4418{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004419 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004420 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004421 {
Ben Claytonce54c592020-02-07 11:30:51 +00004422 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004423 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004424 }
4425 else
4426 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004427 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004428 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4429 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4430 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4431 ctlz->addArg(x.value);
4432 ::basicBlock->appendInst(ctlz);
4433
4434 return RValue<UInt>(V(result));
4435 }
4436}
4437
4438RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4439{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004440 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004441 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004442 {
Ben Claytonce54c592020-02-07 11:30:51 +00004443 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004444 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004445 }
4446 else
4447 {
4448 // TODO: implement vectorized version in Subzero
4449 UInt4 result;
4450 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4451 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4452 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4453 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4454 return result;
4455 }
4456}
4457
Antonio Maiorano370cba52019-12-31 11:36:07 -05004458RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4459{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004460 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004461 return emulated::MinAtomic(x, y, memoryOrder);
4462}
4463
4464RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4465{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004466 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004467 return emulated::MinAtomic(x, y, memoryOrder);
4468}
4469
4470RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4471{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004472 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004473 return emulated::MaxAtomic(x, y, memoryOrder);
4474}
4475
4476RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4477{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004478 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004479 return emulated::MaxAtomic(x, y, memoryOrder);
4480}
4481
Antonio Maioranoaae33732020-02-14 14:52:34 -05004482void EmitDebugLocation()
4483{
4484#ifdef ENABLE_RR_DEBUG_INFO
4485# ifdef ENABLE_RR_EMIT_PRINT_LOCATION
4486 emitPrintLocation(getCallerBacktrace());
4487# endif // ENABLE_RR_EMIT_PRINT_LOCATION
4488#endif // ENABLE_RR_DEBUG_INFO
4489}
Ben Clayton713b8d32019-12-17 20:37:56 +00004490void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004491void FlushDebug() {}
4492
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004493namespace {
4494namespace coro {
4495
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004496// Instance data per generated coroutine
4497// This is the "handle" type used for Coroutine functions
4498// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4499struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004500{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004501 bool useInternalScheduler = false;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004502 marl::Event suspended; // the coroutine is suspended on a yield()
4503 marl::Event resumed; // the caller is suspended on an await()
4504 marl::Event done{ marl::Event::Mode::Manual }; // the coroutine should stop at the next yield()
4505 marl::Event terminated{ marl::Event::Mode::Manual }; // the coroutine has finished.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004506 void *promisePtr = nullptr;
4507};
4508
4509CoroutineData *createCoroutineData()
4510{
4511 return new CoroutineData{};
4512}
4513
4514void destroyCoroutineData(CoroutineData *coroData)
4515{
4516 delete coroData;
4517}
4518
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004519// suspend() pauses execution of the coroutine, and resumes execution from the
4520// caller's call to await().
4521// Returns true if await() is called again, or false if coroutine_destroy()
4522// is called.
4523bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004524{
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004525 auto *data = reinterpret_cast<CoroutineData *>(handle);
4526 data->suspended.signal();
4527 data->resumed.wait();
4528 return !data->done.test();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004529}
4530
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004531// resume() is called by await(), blocking until the coroutine calls yield()
4532// or the coroutine terminates.
4533void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004534{
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004535 auto *data = reinterpret_cast<CoroutineData *>(handle);
4536 data->resumed.signal();
4537 data->suspended.wait();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004538}
4539
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004540// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4541// until the coroutine ends, and deletes the coroutine data.
4542void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004543{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004544 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004545 coroData->done.signal(); // signal that the coroutine should stop at next (or current) yield.
4546 coroData->resumed.signal(); // wake the coroutine if blocked on a yield.
4547 coroData->terminated.wait(); // wait for the coroutine to return.
4548 if(coroData->useInternalScheduler)
4549 {
4550 ::getOrCreateScheduler().unbind();
4551 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004552 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004553}
4554
4555namespace detail {
4556thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4557} // namespace detail
4558
4559void setHandleParam(Nucleus::CoroutineHandle handle)
4560{
4561 ASSERT(!detail::coroHandle);
4562 detail::coroHandle = handle;
4563}
4564
4565Nucleus::CoroutineHandle getHandleParam()
4566{
4567 ASSERT(detail::coroHandle);
4568 auto handle = detail::coroHandle;
4569 detail::coroHandle = {};
4570 return handle;
4571}
4572
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004573bool isDone(Nucleus::CoroutineHandle handle)
4574{
4575 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004576 return coroData->done.test();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004577}
4578
4579void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4580{
4581 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4582 coroData->promisePtr = promisePtr;
4583}
4584
4585void *getPromisePtr(Nucleus::CoroutineHandle handle)
4586{
4587 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4588 return coroData->promisePtr;
4589}
4590
4591} // namespace coro
4592} // namespace
4593
4594// Used to generate coroutines.
4595// Lifetime: from yield to acquireCoroutine
4596class CoroutineGenerator
4597{
4598public:
4599 CoroutineGenerator()
4600 {
4601 }
4602
4603 // Inserts instructions at the top of the current function to make it a coroutine.
4604 void generateCoroutineBegin()
4605 {
4606 // Begin building the main coroutine_begin() function.
4607 // We insert these instructions at the top of the entry node,
4608 // before existing reactor-generated instructions.
4609
4610 // CoroutineHandle coroutine_begin(<Arguments>)
4611 // {
4612 // this->handle = coro::getHandleParam();
4613 //
4614 // YieldType promise;
4615 // coro::setPromisePtr(handle, &promise); // For await
4616 //
4617 // ... <REACTOR CODE> ...
4618 //
4619
4620 // Save original entry block and current block, and create a new entry block and make it current.
4621 // This new block will be used to inject code above the begin routine's existing code. We make
4622 // this block branch to the original entry block as the last instruction.
4623 auto origEntryBB = ::function->getEntryNode();
4624 auto origCurrBB = ::basicBlock;
4625 auto newBB = ::function->makeNode();
4626 sz::replaceEntryNode(::function, newBB);
4627 ::basicBlock = newBB;
4628
4629 // this->handle = coro::getHandleParam();
4630 this->handle = sz::Call(::function, ::basicBlock, coro::getHandleParam);
4631
4632 // YieldType promise;
4633 // coro::setPromisePtr(handle, &promise); // For await
4634 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
4635 sz::Call(::function, ::basicBlock, coro::setPromisePtr, this->handle, this->promise);
4636
4637 // Branch to original entry block
4638 auto br = Ice::InstBr::create(::function, origEntryBB);
4639 ::basicBlock->appendInst(br);
4640
4641 // Restore current block for future instructions
4642 ::basicBlock = origCurrBB;
4643 }
4644
4645 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4646 void generateYield(Value *val)
4647 {
4648 // ... <REACTOR CODE> ...
4649 //
4650 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004651 // if (!coro::suspend(handle)) {
4652 // return false; // coroutine has been stopped by the caller.
4653 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004654 //
4655 // ... <REACTOR CODE> ...
4656
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004657 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004658 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004659
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004660 // if (!coro::suspend(handle)) {
4661 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4662 auto doneBlock = Nucleus::createBasicBlock();
4663 auto resumeBlock = Nucleus::createBasicBlock();
4664 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4665
4666 // return false; // coroutine has been stopped by the caller.
4667 ::basicBlock = doneBlock;
4668 Nucleus::createRetVoid(); // coroutine return value is ignored.
4669
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004670 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004671 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004672 }
4673
4674 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4675
4676 // Generates the await function for the current coroutine.
4677 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4678 static FunctionUniquePtr generateAwaitFunction()
4679 {
4680 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4681 // {
4682 // if (coro::isDone())
4683 // {
4684 // return false;
4685 // }
4686 // else // resume
4687 // {
4688 // YieldType* promise = coro::getPromisePtr(handle);
4689 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004690 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004691 // return true;
4692 // }
4693 // }
4694
4695 // Subzero doesn't support bool types (IceType_i1) as return type
4696 const Ice::Type ReturnType = Ice::IceType_i32;
4697 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4698 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4699
4700 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4701 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4702
4703 Ice::Variable *handle = awaitFunc->getArgs()[0];
4704 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4705
4706 auto doneBlock = awaitFunc->makeNode();
4707 {
4708 // return false;
4709 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4710 doneBlock->appendInst(ret);
4711 }
4712
4713 auto resumeBlock = awaitFunc->makeNode();
4714 {
4715 // YieldType* promise = coro::getPromisePtr(handle);
4716 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4717
4718 // *out = *promise;
4719 // Load promise value
4720 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4721 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4722 resumeBlock->appendInst(load);
4723 // Then store it in output param
4724 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4725 resumeBlock->appendInst(store);
4726
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004727 // coro::resume(handle);
4728 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004729
4730 // return true;
4731 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4732 resumeBlock->appendInst(ret);
4733 }
4734
4735 // if (coro::isDone())
4736 // {
4737 // <doneBlock>
4738 // }
4739 // else // resume
4740 // {
4741 // <resumeBlock>
4742 // }
4743 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004744 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004745 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4746 bb->appendInst(br);
4747
4748 return FunctionUniquePtr{ awaitFunc };
4749 }
4750
4751 // Generates the destroy function for the current coroutine.
4752 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4753 static FunctionUniquePtr generateDestroyFunction()
4754 {
4755 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4756 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004757 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004758 // return;
4759 // }
4760
4761 const Ice::Type ReturnType = Ice::IceType_void;
4762 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4763
4764 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4765 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4766
4767 Ice::Variable *handle = destroyFunc->getArgs()[0];
4768
4769 auto *bb = destroyFunc->getEntryNode();
4770
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004771 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4772 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004773
4774 // return;
4775 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4776 bb->appendInst(ret);
4777
4778 return FunctionUniquePtr{ destroyFunc };
4779 }
4780
4781private:
4782 Ice::Variable *handle{};
4783 Ice::Variable *promise{};
4784};
4785
4786static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4787{
4788 // This doubles up as our coroutine handle
4789 auto coroData = coro::createCoroutineData();
4790
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004791 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4792 if(coroData->useInternalScheduler)
4793 {
4794 ::getOrCreateScheduler().bind();
4795 }
4796
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004797 marl::schedule([=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004798 // Store handle in TLS so that the coroutine can grab it right away, before
4799 // any fiber switch occurs.
4800 coro::setHandleParam(coroData);
4801
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004802 beginFunc();
4803
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004804 coroData->done.signal(); // coroutine is done.
4805 coroData->suspended.signal(); // resume any blocking await() call.
4806 coroData->terminated.signal(); // signal that the coroutine data is ready for freeing.
4807 });
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004808
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004809 coroData->suspended.wait(); // block until the first yield or coroutine end
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004810
4811 return coroData;
4812}
4813
4814void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4815{
4816 // Start by creating a regular function
4817 createFunction(yieldType, params);
4818
4819 // Save in case yield() is called
4820 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4821 ::coroYieldType = yieldType;
4822}
4823
4824void Nucleus::yield(Value *val)
4825{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004826 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004827 Variable::materializeAll();
4828
4829 // On first yield, we start generating coroutine functions
4830 if(!::coroGen)
4831 {
4832 ::coroGen = std::make_shared<CoroutineGenerator>();
4833 ::coroGen->generateCoroutineBegin();
4834 }
4835
4836 ASSERT(::coroGen);
4837 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004838}
4839
Ben Clayton713b8d32019-12-17 20:37:56 +00004840static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4841{
4842 return false;
4843}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004844
4845static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4846{
4847}
Nicolas Capens157ba262019-12-10 17:49:14 -05004848
4849std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4850{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004851 if(::coroGen)
4852 {
4853 // Finish generating coroutine functions
4854 {
4855 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004856 createRetVoidIfNoRet();
4857 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004858
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004859 auto awaitFunc = ::coroGen->generateAwaitFunction();
4860 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004861
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004862 // At this point, we no longer need the CoroutineGenerator.
4863 ::coroGen.reset();
4864 ::coroYieldType = nullptr;
4865
4866 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4867 { name, "await", "destroy" },
4868 cfgEdit);
4869
4870 return routine;
4871 }
4872 else
4873 {
4874 {
4875 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4876 createRetVoidIfNoRet();
4877 }
4878
4879 ::coroYieldType = nullptr;
4880
4881 // Not an actual coroutine (no yields), so return stubs for await and destroy
4882 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4883
4884 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4885 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4886 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4887 return routine;
4888 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004889}
4890
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004891Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004892{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004893 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4894
4895 if(isCoroutine)
4896 {
4897 return rr::invokeCoroutineBegin(func);
4898 }
4899 else
4900 {
4901 // For regular routines, just invoke the begin func directly
4902 return func();
4903 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004904}
Nicolas Capens157ba262019-12-10 17:49:14 -05004905
4906} // namespace rr