blob: b89fb20687fba9744e4276b7847847469cdd4ad4 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050017#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000018#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050019#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040020
Nicolas Capens1a3ce872018-10-10 10:42:36 -040021#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000022#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040023
Nicolas Capens598f8d82016-09-26 15:09:10 -040024#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000027#include "src/IceELFStreamer.h"
28#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040029#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031
Ben Clayton713b8d32019-12-17 20:37:56 +000032#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040033#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040035
Antonio Maiorano8bce0672020-02-28 13:13:45 -050036#include "marl/event.h"
37
Nicolas Capens6a990f82018-07-06 15:54:07 -040038#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040041
Nicolas Capensbd65da92017-01-05 16:31:06 -050042#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000043# ifndef WIN32_LEAN_AND_MEAN
44# define WIN32_LEAN_AND_MEAN
45# endif // !WIN32_LEAN_AND_MEAN
46# ifndef NOMINMAX
47# define NOMINMAX
48# endif // !NOMINMAX
49# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050050#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Ben Clayton683bad82020-02-10 23:57:09 +000052#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040053#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000054#include <limits>
55#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040056
Antonio Maiorano02a39532020-01-21 15:15:34 -050057// Subzero utility functions
58// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050059namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050060namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061
62Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
63{
64 uint32_t sequenceNumber = 0;
65 auto function = Ice::Cfg::create(context, sequenceNumber).release();
66
67 Ice::CfgLocalAllocatorScope allocScope{ function };
68
69 for(auto type : paramTypes)
70 {
71 Ice::Variable *arg = function->makeVariable(type);
72 function->addArg(arg);
73 }
74
75 Ice::CfgNode *node = function->makeNode();
76 function->setEntryNode(node);
77
78 return function;
79}
80
81Ice::Type getPointerType(Ice::Type elementType)
82{
83 if(sizeof(void *) == 8)
84 {
85 return Ice::IceType_i64;
86 }
87 else
88 {
89 return Ice::IceType_i32;
90 }
91}
92
93Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
94{
95 int typeSize = Ice::typeWidthInBytes(type);
96 int totalSize = typeSize * (arraySize ? arraySize : 1);
97
98 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
99 auto address = function->makeVariable(getPointerType(type));
100 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
101 function->getEntryNode()->getInsts().push_front(alloca);
102
103 return address;
104}
105
106Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500107{
108 if(sizeof(void *) == 8)
109 {
110 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
111 }
112 else
113 {
114 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
115 }
116}
117
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400118// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
119Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500120
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400121// Wrapper for calls on C functions with Ice types
122Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
123{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500124 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400125
126 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
127 // then truncate result to bool.
128 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
129 const bool returningBool = (retTy == Ice::IceType_i1);
130 if(returningBool)
131 {
132 ret = function->makeVariable(Ice::IceType_i32);
133 }
134 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500135 {
136 ret = function->makeVariable(retTy);
137 }
138
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400139 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500140 for(auto arg : iceArgs)
141 {
142 call->addArg(arg);
143 }
144
145 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400146
147 if(returningBool)
148 {
149 // Truncate result to bool so that if any (lsb) bits were set, result will be true
150 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
151 }
152
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500153 return ret;
154}
155
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400156Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
157{
158 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
159 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
160}
161
Antonio Maiorano62427e02020-02-13 09:18:05 -0500162// Wrapper for calls on C functions with Ice types
163template<typename Return, typename... CArgs, typename... RArgs>
164Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
165{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400166 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
167
Nicolas Capens519cf222020-05-08 15:27:19 -0400168 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500169 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500170 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500171}
172
Antonio Maiorano02a39532020-01-21 15:15:34 -0500173// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500174Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500175{
176 Ice::Variable *result = function->makeVariable(v->getType());
177 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
178 basicBlock->appendInst(cast);
179 return result;
180}
181
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400182Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
183{
184 Ice::Variable *to = function->makeVariable(toType);
185 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
186 basicBlock->appendInst(cast);
187 return to;
188}
189
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500190Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500191{
192 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
193 // absolute address. We circumvent this by casting to a non-const variable, and loading
194 // from that.
195 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
196 {
197 ptr = sz::createUnconstCast(function, basicBlock, cptr);
198 }
199
200 Ice::Variable *result = function->makeVariable(type);
201 auto load = Ice::InstLoad::create(function, result, ptr, align);
202 basicBlock->appendInst(load);
203
204 return result;
205}
206
207} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500208} // namespace
209
Ben Clayton713b8d32019-12-17 20:37:56 +0000210namespace rr {
211class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500212class CoroutineGenerator;
213} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500214
215namespace {
216
217// Default configuration settings. Must be accessed under mutex lock.
218std::mutex defaultConfigLock;
219rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000220{
Nicolas Capens157ba262019-12-10 17:49:14 -0500221 // This uses a static in a function to avoid the cost of a global static
222 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
223 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000224 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500225 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000226}
227
Nicolas Capens157ba262019-12-10 17:49:14 -0500228Ice::GlobalContext *context = nullptr;
229Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400230Ice::CfgNode *entryBlock = nullptr;
231Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500232Ice::CfgNode *basicBlock = nullptr;
233Ice::CfgLocalAllocatorScope *allocator = nullptr;
234rr::ELFMemoryStreamer *routine = nullptr;
235
236std::mutex codegenMutex;
237
238Ice::ELFFileStreamer *elfFile = nullptr;
239Ice::Fdstream *out = nullptr;
240
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500241// Coroutine globals
242rr::Type *coroYieldType = nullptr;
243std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500244marl::Scheduler &getOrCreateScheduler()
245{
246 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100247 marl::Scheduler::Config cfg;
248 cfg.setWorkerThreadCount(8);
249 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500250 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500251
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500252 return *scheduler;
253}
Nicolas Capens157ba262019-12-10 17:49:14 -0500254} // Anonymous namespace
255
256namespace {
257
258#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000259# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500260#endif
261
Ben Clayton713b8d32019-12-17 20:37:56 +0000262#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
263# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500264#endif
265
Antonio Maiorano370cba52019-12-31 11:36:07 -0500266Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400267{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500268 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100269 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500270 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000271 case rr::Optimization::Level::None: return Ice::Opt_m1;
272 case rr::Optimization::Level::Less: return Ice::Opt_m1;
273 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500274 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
275 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100276 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500277 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400278}
279
Antonio Maiorano370cba52019-12-31 11:36:07 -0500280Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
281{
282 switch(memoryOrder)
283 {
284 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
285 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
286 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
287 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
288 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
289 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
290 }
291 return Ice::Intrinsics::MemoryOrderInvalid;
292}
293
Nicolas Capens157ba262019-12-10 17:49:14 -0500294class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500295{
Nicolas Capens157ba262019-12-10 17:49:14 -0500296public:
297 const static bool ARM;
298 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400299
Nicolas Capens157ba262019-12-10 17:49:14 -0500300private:
301 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100302 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000303#if defined(__i386__) || defined(__x86_64__)
304# if defined(_WIN32)
305 __cpuid(registers, info);
306# else
307 __asm volatile("cpuid"
308 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
309 : "a"(info));
310# endif
311#else
312 registers[0] = 0;
313 registers[1] = 0;
314 registers[2] = 0;
315 registers[3] = 0;
316#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100317 }
318
Nicolas Capens157ba262019-12-10 17:49:14 -0500319 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500320 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000321#if defined(__arm__) || defined(__aarch64__)
322 return true;
323#elif defined(__i386__) || defined(__x86_64__)
324 return false;
325#elif defined(__mips__)
326 return false;
327#else
328# error "Unknown architecture"
329#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500330 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500331
Nicolas Capens157ba262019-12-10 17:49:14 -0500332 static bool detectSSE4_1()
333 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000334#if defined(__i386__) || defined(__x86_64__)
335 int registers[4];
336 cpuid(registers, 1);
337 return (registers[2] & 0x00080000) != 0;
338#else
339 return false;
340#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500341 }
342};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500343
Nicolas Capens157ba262019-12-10 17:49:14 -0500344const bool CPUID::ARM = CPUID::detectARM();
345const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
346const bool emulateIntrinsics = false;
347const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400348
Nicolas Capens157ba262019-12-10 17:49:14 -0500349constexpr bool subzeroDumpEnabled = false;
350constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500351
352#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500353static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
354static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500355#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500356
357} // anonymous namespace
358
359namespace rr {
360
Antonio Maioranoab210f92019-12-13 16:26:24 -0500361std::string BackendName()
362{
363 return "Subzero";
364}
365
Ben Clayton713b8d32019-12-17 20:37:56 +0000366const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500367 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500368};
369
370enum EmulatedType
371{
372 EmulatedShift = 16,
373 EmulatedV2 = 2 << EmulatedShift,
374 EmulatedV4 = 4 << EmulatedShift,
375 EmulatedV8 = 8 << EmulatedShift,
376 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
377
378 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
379 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
380 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000381 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
382 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500383 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
384};
385
Ben Clayton713b8d32019-12-17 20:37:56 +0000386class Value : public Ice::Operand
387{};
388class SwitchCases : public Ice::InstSwitch
389{};
390class BasicBlock : public Ice::CfgNode
391{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500392
393Ice::Type T(Type *t)
394{
395 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
396 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500397}
398
Nicolas Capens157ba262019-12-10 17:49:14 -0500399Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400400{
Ben Clayton713b8d32019-12-17 20:37:56 +0000401 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500402}
403
404Type *T(EmulatedType t)
405{
Ben Clayton713b8d32019-12-17 20:37:56 +0000406 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500407}
408
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500409std::vector<Ice::Type> T(const std::vector<Type *> &types)
410{
411 std::vector<Ice::Type> result;
412 result.reserve(types.size());
413 for(auto &t : types)
414 {
415 result.push_back(T(t));
416 }
417 return result;
418}
419
Nicolas Capens157ba262019-12-10 17:49:14 -0500420Value *V(Ice::Operand *v)
421{
Ben Clayton713b8d32019-12-17 20:37:56 +0000422 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500423}
424
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500425Ice::Operand *V(Value *v)
426{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500427 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500428}
429
Antonio Maiorano62427e02020-02-13 09:18:05 -0500430std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
431{
432 std::vector<Ice::Operand *> result;
433 result.reserve(values.size());
434 for(auto &v : values)
435 {
436 result.push_back(V(v));
437 }
438 return result;
439}
440
Nicolas Capens157ba262019-12-10 17:49:14 -0500441BasicBlock *B(Ice::CfgNode *b)
442{
Ben Clayton713b8d32019-12-17 20:37:56 +0000443 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500444}
445
446static size_t typeSize(Type *type)
447{
448 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400449 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500450 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800451 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000452 case Type_v2i32: return 8;
453 case Type_v4i16: return 8;
454 case Type_v2i16: return 4;
455 case Type_v8i8: return 8;
456 case Type_v4i8: return 4;
457 case Type_v2f32: return 8;
458 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500459 }
460 }
461
462 return Ice::typeWidthInBytes(T(type));
463}
464
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400465static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500466{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400467 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500468 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
469 {
470 Nucleus::createRetVoid();
471 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400472
473 // Connect the entry block to the top of the initial basic block
474 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
475 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500476}
477
Ben Clayton713b8d32019-12-17 20:37:56 +0000478using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
479using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500480
481inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
482{
Ben Clayton713b8d32019-12-17 20:37:56 +0000483 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500484}
485
486inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
487{
488 return &sectionHeader(elfHeader)[index];
489}
490
491static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
492{
493 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
494
495 uint32_t index = relocation.getSymbol();
496 int table = relocationTable.sh_link;
497 void *symbolValue = nullptr;
498
499 if(index != SHN_UNDEF)
500 {
501 if(table == SHN_UNDEF) return nullptr;
502 const SectionHeader *symbolTable = elfSection(elfHeader, table);
503
504 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
505 if(index >= symtab_entries)
506 {
507 ASSERT(index < symtab_entries && "Symbol Index out of range");
508 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800509 }
510
Nicolas Capens157ba262019-12-10 17:49:14 -0500511 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000512 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500513 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800514
Nicolas Capens157ba262019-12-10 17:49:14 -0500515 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400516 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500517 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000518 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400519 }
520 else
521 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500522 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400523 }
Nicolas Capens66478362016-10-13 15:36:36 -0400524 }
525
Nicolas Capens157ba262019-12-10 17:49:14 -0500526 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000527 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500528
529 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400530 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400531 switch(relocation.getType())
532 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000533 case R_ARM_NONE:
534 // No relocation
535 break;
536 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500537 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000538 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500539 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
540 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
541 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400542 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000543 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500544 {
545 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
546 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
547 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400548 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000549 default:
550 ASSERT(false && "Unsupported relocation type");
551 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400552 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500553 }
554 else
555 {
556 switch(relocation.getType())
557 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000558 case R_386_NONE:
559 // No relocation
560 break;
561 case R_386_32:
562 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
563 break;
564 case R_386_PC32:
565 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
566 break;
567 default:
568 ASSERT(false && "Unsupported relocation type");
569 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500570 }
Nicolas Capens66478362016-10-13 15:36:36 -0400571 }
572
Nicolas Capens157ba262019-12-10 17:49:14 -0500573 return symbolValue;
574}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400575
Nicolas Capens157ba262019-12-10 17:49:14 -0500576static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
577{
578 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
579
580 uint32_t index = relocation.getSymbol();
581 int table = relocationTable.sh_link;
582 void *symbolValue = nullptr;
583
584 if(index != SHN_UNDEF)
585 {
586 if(table == SHN_UNDEF) return nullptr;
587 const SectionHeader *symbolTable = elfSection(elfHeader, table);
588
589 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
590 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400591 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500592 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400593 return nullptr;
594 }
595
Nicolas Capens157ba262019-12-10 17:49:14 -0500596 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000597 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500598 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400599
Nicolas Capens157ba262019-12-10 17:49:14 -0500600 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400601 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500602 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000603 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500604 }
605 else
606 {
607 return nullptr;
608 }
609 }
Nicolas Capens66478362016-10-13 15:36:36 -0400610
Nicolas Capens157ba262019-12-10 17:49:14 -0500611 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000612 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
613 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400614
Nicolas Capens157ba262019-12-10 17:49:14 -0500615 switch(relocation.getType())
616 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000617 case R_X86_64_NONE:
618 // No relocation
619 break;
620 case R_X86_64_64:
621 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
622 break;
623 case R_X86_64_PC32:
624 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
625 break;
626 case R_X86_64_32S:
627 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
628 break;
629 default:
630 ASSERT(false && "Unsupported relocation type");
631 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500632 }
633
634 return symbolValue;
635}
636
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400637struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500638{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400639 const void *entry;
640 size_t codeSize = 0;
641};
642
643std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
644{
645 ASSERT(functionNames.size() > 0);
646 std::vector<EntryPoint> entryPoints(functionNames.size());
647
Ben Clayton713b8d32019-12-17 20:37:56 +0000648 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500649
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400650 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500651 if(!elfHeader->checkMagic())
652 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400653 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500654 }
655
656 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000657 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
658#if defined(__i386__)
659 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
660#elif defined(__x86_64__)
661 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
662#elif defined(__arm__)
663 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
664#elif defined(__aarch64__)
665 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
666#elif defined(__mips__)
667 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
668#else
669# error "Unsupported platform"
670#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500671
Ben Clayton713b8d32019-12-17 20:37:56 +0000672 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500673
674 for(int i = 0; i < elfHeader->e_shnum; i++)
675 {
676 if(sectionHeader[i].sh_type == SHT_PROGBITS)
677 {
678 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
679 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400680 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500681 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400682 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500683
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400684 for(size_t j = 0; j < functionNames.size(); ++j)
685 {
686 if(strstr(sectionName, functionNames[j]) != nullptr)
687 {
688 return j;
689 }
690 }
691
692 UNREACHABLE("Failed to find executable section that matches input function names");
693 return static_cast<size_t>(-1);
694 };
695
696 size_t index = findSectionNameEntryIndex();
697 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
698 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400699 }
700 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500701 else if(sectionHeader[i].sh_type == SHT_REL)
702 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000703 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400704
Nicolas Capens157ba262019-12-10 17:49:14 -0500705 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
706 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000707 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500708 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
709 }
710 }
711 else if(sectionHeader[i].sh_type == SHT_RELA)
712 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000713 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500714
715 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
716 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000717 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500718 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
719 }
720 }
721 }
722
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400723 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500724}
725
726template<typename T>
727struct ExecutableAllocator
728{
729 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000730 template<class U>
731 ExecutableAllocator(const ExecutableAllocator<U> &other)
732 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500733
734 using value_type = T;
735 using size_type = std::size_t;
736
737 T *allocate(size_type n)
738 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000739 return (T *)allocateMemoryPages(
740 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500741 }
742
743 void deallocate(T *p, size_type n)
744 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800745 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500746 }
747};
748
749class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
750{
751 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
752 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
753
754public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000755 ELFMemoryStreamer()
756 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500757 {
758 position = 0;
759 buffer.reserve(0x1000);
760 }
761
762 ~ELFMemoryStreamer() override
763 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500764 }
765
766 void write8(uint8_t Value) override
767 {
768 if(position == (uint64_t)buffer.size())
769 {
770 buffer.push_back(Value);
771 position++;
772 }
773 else if(position < (uint64_t)buffer.size())
774 {
775 buffer[position] = Value;
776 position++;
777 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000778 else
779 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500780 }
781
782 void writeBytes(llvm::StringRef Bytes) override
783 {
784 std::size_t oldSize = buffer.size();
785 buffer.resize(oldSize + Bytes.size());
786 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
787 position += Bytes.size();
788 }
789
790 uint64_t tell() const override { return position; }
791
792 void seek(uint64_t Off) override { position = Off; }
793
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400794 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500795 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400796 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500797
798#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500799 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
800#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400801 for(auto &entryPoint : entryPoints)
802 {
803 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
804 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500805#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500806
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400807 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400808 }
809
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500810 void finalize()
811 {
812 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
813
814 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
815 }
816
Ben Clayton713b8d32019-12-17 20:37:56 +0000817 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400818 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500819 ASSERT(func);
820 funcs[index] = func;
821 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400822
Nicolas Capens157ba262019-12-10 17:49:14 -0500823 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400824 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500825 ASSERT(funcs[index]);
826 return funcs[index];
827 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400828
Antonio Maiorano02a39532020-01-21 15:15:34 -0500829 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500830 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500831 // TODO(b/148086935): Replace with a buffer allocator.
832 size_t space = size + alignment;
833 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
834 void *ptr = buf.get();
835 void *alignedPtr = std::align(alignment, size, ptr, space);
836 ASSERT(alignedPtr);
837 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500838 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500839 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500840 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400841
Nicolas Capens157ba262019-12-10 17:49:14 -0500842private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000843 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500844 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
845 std::size_t position;
846 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500847};
848
Antonio Maiorano62427e02020-02-13 09:18:05 -0500849#ifdef ENABLE_RR_PRINT
850void VPrintf(const std::vector<Value *> &vals)
851{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400852 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500853}
854#endif // ENABLE_RR_PRINT
855
Nicolas Capens157ba262019-12-10 17:49:14 -0500856Nucleus::Nucleus()
857{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400858 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500859
860 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
861 Ice::ClFlags::getParsedClFlags(Flags);
862
Ben Clayton713b8d32019-12-17 20:37:56 +0000863#if defined(__arm__)
864 Flags.setTargetArch(Ice::Target_ARM32);
865 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
866#elif defined(__mips__)
867 Flags.setTargetArch(Ice::Target_MIPS32);
868 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
869#else // x86
870 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
871 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
872#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500873 Flags.setOutFileType(Ice::FT_Elf);
874 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
875 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
876 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
877 Flags.setDisableHybridAssembly(true);
878
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500879 // Emit functions into separate sections in the ELF so we can find them by name
880 Flags.setFunctionSections(true);
881
Nicolas Capens157ba262019-12-10 17:49:14 -0500882 static llvm::raw_os_ostream cout(std::cout);
883 static llvm::raw_os_ostream cerr(std::cerr);
884
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500885 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500886 {
887 // Decorate text asm with liveness info
888 Flags.setDecorateAsm(true);
889 }
890
Ben Clayton713b8d32019-12-17 20:37:56 +0000891 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500892 {
893 std::error_code errorCode;
894 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
895 ::elfFile = new Ice::ELFFileStreamer(*out);
896 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
897 }
898 else
899 {
900 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
901 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
902 ::routine = elfMemory;
903 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400904
Nicolas Capens46485a02020-06-17 01:31:10 -0400905#if defined(_WIN32) // TODO(b/157525646): Initialization of thread_local variables in shared libraries may not be supported on all platforms.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400906 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400907#endif
Nicolas Capens67cdce92020-05-01 21:37:20 -0400908 Variable::unmaterializedVariables = new std::unordered_set<const Variable *>();
Nicolas Capens157ba262019-12-10 17:49:14 -0500909}
910
911Nucleus::~Nucleus()
912{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400913 delete Variable::unmaterializedVariables;
914 Variable::unmaterializedVariables = nullptr;
915
Nicolas Capens157ba262019-12-10 17:49:14 -0500916 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500917 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500918
919 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500920 ::allocator = nullptr;
921
Nicolas Capens157ba262019-12-10 17:49:14 -0500922 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500923 ::function = nullptr;
924
Nicolas Capens157ba262019-12-10 17:49:14 -0500925 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500926 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500927
928 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500929 ::elfFile = nullptr;
930
Nicolas Capens157ba262019-12-10 17:49:14 -0500931 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500932 ::out = nullptr;
933
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400934 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500935 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400936 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500937
938 ::codegenMutex.unlock();
939}
940
941void Nucleus::setDefaultConfig(const Config &cfg)
942{
943 std::unique_lock<std::mutex> lock(::defaultConfigLock);
944 ::defaultConfig() = cfg;
945}
946
947void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
948{
949 std::unique_lock<std::mutex> lock(::defaultConfigLock);
950 auto &config = ::defaultConfig();
951 config = cfgEdit.apply(config);
952}
953
954Config Nucleus::getDefaultConfig()
955{
956 std::unique_lock<std::mutex> lock(::defaultConfigLock);
957 return ::defaultConfig();
958}
959
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500960// This function lowers and produces executable binary code in memory for the input functions,
961// and returns a Routine with the entry points to these functions.
962template<size_t Count>
963static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500964{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500965 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
966 // and GlobalContext::emitItems.
967
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500968 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500969 {
970 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500971 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500972 }
973
974 ::context->emitFileHeader();
975
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500976 // Translate
977
978 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500979 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500980 Ice::Cfg *currFunc = functions[i];
981
982 // Install function allocator in TLS for Cfg-specific container allocators
983 Ice::CfgLocalAllocatorScope allocScope(currFunc);
984
985 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
986
987 rr::optimize(currFunc);
988
989 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500990 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500991
992 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500993 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500994
995 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
996
997 if(subzeroEmitTextAsm)
998 {
999 currFunc->emit();
1000 }
1001
1002 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -05001003 }
1004
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001005 // Emit items
1006
1007 ::context->lowerGlobals("");
1008
Nicolas Capens157ba262019-12-10 17:49:14 -05001009 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001010
1011 for(size_t i = 0; i < Count; ++i)
1012 {
1013 Ice::Cfg *currFunc = functions[i];
1014
1015 // Accumulate globals from functions to emit into the "last" section at the end
1016 auto globals = currFunc->getGlobalInits();
1017 if(globals && !globals->empty())
1018 {
1019 ::context->getGlobals()->merge(globals.get());
1020 }
1021
1022 auto assembler = currFunc->releaseAssembler();
1023 assembler->alignFunction();
1024 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1025 }
1026
Nicolas Capens157ba262019-12-10 17:49:14 -05001027 ::context->lowerGlobals("last");
1028 ::context->lowerConstants();
1029 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001030
Nicolas Capens157ba262019-12-10 17:49:14 -05001031 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001032 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001033 objectWriter->writeNonUserSections();
1034
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001035 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001036 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1037 ASSERT(entryPoints.size() == Count);
1038 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001039 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001040 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001041 }
1042
1043 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001044
1045 Routine *handoffRoutine = ::routine;
1046 ::routine = nullptr;
1047
1048 return std::shared_ptr<Routine>(handoffRoutine);
1049}
1050
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001051std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1052{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001053 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001054 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1055}
1056
Nicolas Capens157ba262019-12-10 17:49:14 -05001057Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1058{
1059 Ice::Type type = T(t);
1060 int typeSize = Ice::typeWidthInBytes(type);
1061 int totalSize = typeSize * (arraySize ? arraySize : 1);
1062
1063 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1064 auto address = ::function->makeVariable(T(getPointerType(t)));
1065 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1066 ::function->getEntryNode()->getInsts().push_front(alloca);
1067
1068 return V(address);
1069}
1070
1071BasicBlock *Nucleus::createBasicBlock()
1072{
1073 return B(::function->makeNode());
1074}
1075
1076BasicBlock *Nucleus::getInsertBlock()
1077{
1078 return B(::basicBlock);
1079}
1080
1081void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1082{
Ben Clayton713b8d32019-12-17 20:37:56 +00001083 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001084
1085 Variable::materializeAll();
1086
1087 ::basicBlock = basicBlock;
1088}
1089
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001090void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001091{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001092 ASSERT(::function == nullptr);
1093 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001094 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001095 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001096 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001097
1098 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1099
1100 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1101 // becomes invalid if another one is created; for example, when creating await and destroy functions
1102 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1103 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001104 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1105
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001106 ::entryBlock = ::function->getEntryNode();
1107 ::basicBlock = ::function->makeNode();
1108 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001109}
1110
1111Value *Nucleus::getArgument(unsigned int index)
1112{
1113 return V(::function->getArgs()[index]);
1114}
1115
1116void Nucleus::createRetVoid()
1117{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001118 RR_DEBUG_INFO_UPDATE_LOC();
1119
Nicolas Capens157ba262019-12-10 17:49:14 -05001120 // Code generated after this point is unreachable, so any variables
1121 // being read can safely return an undefined value. We have to avoid
1122 // materializing variables after the terminator ret instruction.
1123 Variable::killUnmaterialized();
1124
1125 Ice::InstRet *ret = Ice::InstRet::create(::function);
1126 ::basicBlock->appendInst(ret);
1127}
1128
1129void Nucleus::createRet(Value *v)
1130{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001131 RR_DEBUG_INFO_UPDATE_LOC();
1132
Nicolas Capens157ba262019-12-10 17:49:14 -05001133 // Code generated after this point is unreachable, so any variables
1134 // being read can safely return an undefined value. We have to avoid
1135 // materializing variables after the terminator ret instruction.
1136 Variable::killUnmaterialized();
1137
1138 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1139 ::basicBlock->appendInst(ret);
1140}
1141
1142void Nucleus::createBr(BasicBlock *dest)
1143{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001144 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001145 Variable::materializeAll();
1146
1147 auto br = Ice::InstBr::create(::function, dest);
1148 ::basicBlock->appendInst(br);
1149}
1150
1151void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1152{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001153 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001154 Variable::materializeAll();
1155
1156 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1157 ::basicBlock->appendInst(br);
1158}
1159
1160static bool isCommutative(Ice::InstArithmetic::OpKind op)
1161{
1162 switch(op)
1163 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001164 case Ice::InstArithmetic::Add:
1165 case Ice::InstArithmetic::Fadd:
1166 case Ice::InstArithmetic::Mul:
1167 case Ice::InstArithmetic::Fmul:
1168 case Ice::InstArithmetic::And:
1169 case Ice::InstArithmetic::Or:
1170 case Ice::InstArithmetic::Xor:
1171 return true;
1172 default:
1173 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001174 }
1175}
1176
1177static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1178{
1179 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1180
1181 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1182
1183 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1184 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1185 ::basicBlock->appendInst(arithmetic);
1186
1187 return V(result);
1188}
1189
1190Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1191{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001192 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001193 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1194}
1195
1196Value *Nucleus::createSub(Value *lhs, Value *rhs)
1197{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001198 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001199 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1200}
1201
1202Value *Nucleus::createMul(Value *lhs, Value *rhs)
1203{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001204 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001205 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1206}
1207
1208Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1209{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001210 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001211 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1212}
1213
1214Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1215{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001216 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001217 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1218}
1219
1220Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1221{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001222 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001223 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1224}
1225
1226Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1227{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001228 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001229 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1230}
1231
1232Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1233{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001234 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001235 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1236}
1237
1238Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1239{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001240 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001241 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1242}
1243
1244Value *Nucleus::createURem(Value *lhs, Value *rhs)
1245{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001246 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001247 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1248}
1249
1250Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1251{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001252 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001253 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1254}
1255
1256Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1257{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001258 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001259 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1260 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001261 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001262 return nullptr;
1263}
1264
1265RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1266{
1267 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001268}
1269
1270Value *Nucleus::createShl(Value *lhs, Value *rhs)
1271{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001272 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001273 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1274}
1275
1276Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1277{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001278 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001279 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1280}
1281
1282Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1283{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001284 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001285 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1286}
1287
1288Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1289{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001290 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001291 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1292}
1293
1294Value *Nucleus::createOr(Value *lhs, Value *rhs)
1295{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001296 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001297 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1298}
1299
1300Value *Nucleus::createXor(Value *lhs, Value *rhs)
1301{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001302 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001303 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1304}
1305
1306Value *Nucleus::createNeg(Value *v)
1307{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001308 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001309 return createSub(createNullValue(T(v->getType())), v);
1310}
1311
1312Value *Nucleus::createFNeg(Value *v)
1313{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001314 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001315 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1316 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001317
1318 return createFSub(negativeZero, v);
1319}
1320
1321Value *Nucleus::createNot(Value *v)
1322{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001323 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001324 if(Ice::isScalarIntegerType(v->getType()))
1325 {
1326 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1327 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001328 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001329 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001330 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001331 return createXor(v, createConstantVector(c, T(v->getType())));
1332 }
1333}
1334
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001335static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1336{
1337#if defined(__i386__) || defined(__x86_64__)
1338 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1339 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1340 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1341#else
1342 if(atomic)
1343 {
1344 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1345 }
1346 if(memoryOrder != std::memory_order_relaxed)
1347 {
1348 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1349 }
1350#endif
1351
1352 // Vulkan doesn't allow sequential memory order
1353 ASSERT(memoryOrder != std::memory_order_seq_cst);
1354}
1355
Nicolas Capens157ba262019-12-10 17:49:14 -05001356Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1357{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001358 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001359 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001360
1361 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001362 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001363
Ben Clayton713b8d32019-12-17 20:37:56 +00001364 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001365 {
1366 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001367 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001368 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001369 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001370 auto pointer = RValue<Pointer<Byte>>(ptr);
1371 Int x = *Pointer<Int>(pointer);
1372
1373 Int4 vector;
1374 vector = Insert(vector, x, 0);
1375
Antonio Maiorano02a39532020-01-21 15:15:34 -05001376 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001377 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1378 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001379 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001380 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001381 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001382 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001383 auto pointer = RValue<Pointer<Byte>>(ptr);
1384 Int x = *Pointer<Int>(pointer);
1385 Int y = *Pointer<Int>(pointer + 4);
1386
1387 Int4 vector;
1388 vector = Insert(vector, x, 0);
1389 vector = Insert(vector, y, 1);
1390
Antonio Maiorano02a39532020-01-21 15:15:34 -05001391 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001392 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1393 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001394 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001395 else
1396 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001397 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001398 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001399 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001400 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001401 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001402 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001403 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1404 load->addArg(ptr);
1405 load->addArg(::context->getConstantInt32(typeSize(type)));
1406 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001407 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001408 }
1409 else
1410 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001411 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001412 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001413
Antonio Maiorano02a39532020-01-21 15:15:34 -05001414 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001415 return V(result);
1416}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001417
Nicolas Capens157ba262019-12-10 17:49:14 -05001418Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1419{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001420 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001421 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001422
Ben Clayton713b8d32019-12-17 20:37:56 +00001423#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001424 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001425 if(align != 0)
1426 {
1427 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1428 call->addArg(ptr);
1429 call->addArg(::context->getConstantInt64(typeSize(type)));
1430 ::basicBlock->appendInst(call);
1431 }
1432#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001433
Nicolas Capens157ba262019-12-10 17:49:14 -05001434 int valueType = (int)reinterpret_cast<intptr_t>(type);
1435
Ben Clayton713b8d32019-12-17 20:37:56 +00001436 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001437 {
1438 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001439 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001440 if(typeSize(type) == 4)
1441 {
1442 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1443 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1444 ::basicBlock->appendInst(bitcast);
1445
1446 RValue<Int4> v(V(vector));
1447
1448 auto pointer = RValue<Pointer<Byte>>(ptr);
1449 Int x = Extract(v, 0);
1450 *Pointer<Int>(pointer) = x;
1451 }
1452 else if(typeSize(type) == 8)
1453 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001454 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001455 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1456 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1457 ::basicBlock->appendInst(bitcast);
1458
1459 RValue<Int4> v(V(vector));
1460
1461 auto pointer = RValue<Pointer<Byte>>(ptr);
1462 Int x = Extract(v, 0);
1463 *Pointer<Int>(pointer) = x;
1464 Int y = Extract(v, 1);
1465 *Pointer<Int>(pointer + 4) = y;
1466 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001467 else
1468 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001469 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001470 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001471 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001472 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001473 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1474 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1475 store->addArg(value);
1476 store->addArg(ptr);
1477 store->addArg(::context->getConstantInt32(typeSize(type)));
1478 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001479 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001480 }
1481 else
1482 {
1483 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001484
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001485 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001486 ::basicBlock->appendInst(store);
1487 }
1488
1489 return value;
1490}
1491
1492Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1493{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001494 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001495 ASSERT(index->getType() == Ice::IceType_i32);
1496
1497 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1498 {
1499 int32_t offset = constant->getValue() * (int)typeSize(type);
1500
1501 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001502 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001503 return ptr;
1504 }
1505
Nicolas Capens157ba262019-12-10 17:49:14 -05001506 return createAdd(ptr, createConstantInt(offset));
1507 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001508
Nicolas Capens157ba262019-12-10 17:49:14 -05001509 if(!Ice::isByteSizedType(T(type)))
1510 {
1511 index = createMul(index, createConstantInt((int)typeSize(type)));
1512 }
1513
Ben Clayton713b8d32019-12-17 20:37:56 +00001514 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001515 {
1516 if(unsignedIndex)
1517 {
1518 index = createZExt(index, T(Ice::IceType_i64));
1519 }
1520 else
1521 {
1522 index = createSExt(index, T(Ice::IceType_i64));
1523 }
1524 }
1525
1526 return createAdd(ptr, index);
1527}
1528
Antonio Maiorano370cba52019-12-31 11:36:07 -05001529static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1530{
1531 Ice::Variable *result = ::function->makeVariable(value->getType());
1532
1533 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1534 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1535 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1536 auto op = ::context->getConstantInt32(rmwOp);
1537 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1538 inst->addArg(op);
1539 inst->addArg(ptr);
1540 inst->addArg(value);
1541 inst->addArg(order);
1542 ::basicBlock->appendInst(inst);
1543
1544 return V(result);
1545}
1546
Nicolas Capens157ba262019-12-10 17:49:14 -05001547Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1548{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001549 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001550 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001551}
1552
1553Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1554{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001555 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001556 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001557}
1558
1559Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1560{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001561 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001562 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001563}
1564
1565Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1566{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001567 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001568 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001569}
1570
1571Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1572{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001573 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001574 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001575}
1576
1577Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1578{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001579 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001580 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001581}
1582
1583Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1584{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001585 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001586 Ice::Variable *result = ::function->makeVariable(value->getType());
1587
1588 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1589 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1590 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1591 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1592 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1593 inst->addArg(ptr);
1594 inst->addArg(compare);
1595 inst->addArg(value);
1596 inst->addArg(orderEq);
1597 inst->addArg(orderNeq);
1598 ::basicBlock->appendInst(inst);
1599
1600 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001601}
1602
1603static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1604{
1605 if(v->getType() == T(destType))
1606 {
1607 return v;
1608 }
1609
1610 Ice::Variable *result = ::function->makeVariable(T(destType));
1611 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1612 ::basicBlock->appendInst(cast);
1613
1614 return V(result);
1615}
1616
1617Value *Nucleus::createTrunc(Value *v, Type *destType)
1618{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001619 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001620 return createCast(Ice::InstCast::Trunc, v, destType);
1621}
1622
1623Value *Nucleus::createZExt(Value *v, Type *destType)
1624{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001625 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001626 return createCast(Ice::InstCast::Zext, v, destType);
1627}
1628
1629Value *Nucleus::createSExt(Value *v, Type *destType)
1630{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001631 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001632 return createCast(Ice::InstCast::Sext, v, destType);
1633}
1634
1635Value *Nucleus::createFPToUI(Value *v, Type *destType)
1636{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001637 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001638 return createCast(Ice::InstCast::Fptoui, v, destType);
1639}
1640
1641Value *Nucleus::createFPToSI(Value *v, Type *destType)
1642{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001643 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001644 return createCast(Ice::InstCast::Fptosi, v, destType);
1645}
1646
1647Value *Nucleus::createSIToFP(Value *v, Type *destType)
1648{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001649 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001650 return createCast(Ice::InstCast::Sitofp, v, destType);
1651}
1652
1653Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1654{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001655 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001656 return createCast(Ice::InstCast::Fptrunc, v, destType);
1657}
1658
1659Value *Nucleus::createFPExt(Value *v, Type *destType)
1660{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001661 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001662 return createCast(Ice::InstCast::Fpext, v, destType);
1663}
1664
1665Value *Nucleus::createBitCast(Value *v, Type *destType)
1666{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001667 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001668 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1669 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1670 // emulate them by writing to the stack and reading back as the destination type.
1671 if(emulateMismatchedBitCast)
1672 {
1673 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1674 {
1675 Value *address = allocateStackVariable(destType);
1676 createStore(v, address, T(v->getType()));
1677 return createLoad(address, destType);
1678 }
1679 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1680 {
1681 Value *address = allocateStackVariable(T(v->getType()));
1682 createStore(v, address, T(v->getType()));
1683 return createLoad(address, destType);
1684 }
1685 }
1686
1687 return createCast(Ice::InstCast::Bitcast, v, destType);
1688}
1689
1690static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1691{
1692 ASSERT(lhs->getType() == rhs->getType());
1693
1694 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1695 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1696 ::basicBlock->appendInst(cmp);
1697
1698 return V(result);
1699}
1700
1701Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1702{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001703 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001704 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1705}
1706
1707Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1708{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001709 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001710 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1711}
1712
1713Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1714{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001715 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001716 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1717}
1718
1719Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1720{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001721 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001722 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1723}
1724
1725Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1726{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001727 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001728 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1729}
1730
1731Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1732{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001733 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001734 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1735}
1736
1737Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1738{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001739 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001740 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1741}
1742
1743Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1744{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001745 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001746 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1747}
1748
1749Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1750{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001751 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001752 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1753}
1754
1755Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1756{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001757 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001758 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1759}
1760
1761Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1762{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001763 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001764 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1765}
1766
1767static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1768{
1769 ASSERT(lhs->getType() == rhs->getType());
1770 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1771
1772 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1773 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1774 ::basicBlock->appendInst(cmp);
1775
1776 return V(result);
1777}
1778
1779Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1780{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001781 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001782 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1783}
1784
1785Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1786{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001787 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001788 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1789}
1790
1791Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1792{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001793 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001794 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1795}
1796
1797Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1798{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001799 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001800 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1801}
1802
1803Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1804{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001805 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001806 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1807}
1808
1809Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1810{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001811 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001812 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1813}
1814
1815Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1816{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001817 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001818 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1819}
1820
1821Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1822{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001823 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001824 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1825}
1826
1827Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1828{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001829 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001830 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1831}
1832
1833Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1834{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001835 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001836 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1837}
1838
1839Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1840{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001841 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001842 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1843}
1844
1845Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1846{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001847 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001848 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1849}
1850
1851Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001854 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1855}
1856
1857Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1858{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001859 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001860 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1861}
1862
1863Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001866 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001867 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001868 ::basicBlock->appendInst(extract);
1869
1870 return V(result);
1871}
1872
1873Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1874{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001875 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001876 auto result = ::function->makeVariable(vector->getType());
1877 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1878 ::basicBlock->appendInst(insert);
1879
1880 return V(result);
1881}
1882
1883Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1884{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001885 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001886 ASSERT(V1->getType() == V2->getType());
1887
1888 int size = Ice::typeNumElements(V1->getType());
1889 auto result = ::function->makeVariable(V1->getType());
1890 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1891
1892 for(int i = 0; i < size; i++)
1893 {
1894 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1895 }
1896
1897 ::basicBlock->appendInst(shuffle);
1898
1899 return V(result);
1900}
1901
1902Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1903{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001904 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001905 ASSERT(ifTrue->getType() == ifFalse->getType());
1906
1907 auto result = ::function->makeVariable(ifTrue->getType());
1908 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1909 ::basicBlock->appendInst(select);
1910
1911 return V(result);
1912}
1913
1914SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1915{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001916 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001917 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1918 ::basicBlock->appendInst(switchInst);
1919
Ben Clayton713b8d32019-12-17 20:37:56 +00001920 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001921}
1922
1923void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1924{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001925 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001926 switchCases->addBranch(label, label, branch);
1927}
1928
1929void Nucleus::createUnreachable()
1930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001932 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1933 ::basicBlock->appendInst(unreachable);
1934}
1935
Antonio Maiorano62427e02020-02-13 09:18:05 -05001936Type *Nucleus::getType(Value *value)
1937{
1938 return T(V(value)->getType());
1939}
1940
1941Type *Nucleus::getContainedType(Type *vectorType)
1942{
1943 Ice::Type vecTy = T(vectorType);
1944 switch(vecTy)
1945 {
1946 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1947 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1948 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1949 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1950 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1951 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1952 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1953 default:
1954 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1955 return {};
1956 }
1957}
1958
Nicolas Capens157ba262019-12-10 17:49:14 -05001959Type *Nucleus::getPointerType(Type *ElementType)
1960{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001961 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001962}
1963
Antonio Maiorano62427e02020-02-13 09:18:05 -05001964static constexpr Ice::Type getNaturalIntType()
1965{
1966 constexpr size_t intSize = sizeof(int);
1967 static_assert(intSize == 4 || intSize == 8, "");
1968 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1969}
1970
1971Type *Nucleus::getPrintfStorageType(Type *valueType)
1972{
1973 Ice::Type valueTy = T(valueType);
1974 switch(valueTy)
1975 {
1976 case Ice::IceType_i32:
1977 return T(getNaturalIntType());
1978
1979 case Ice::IceType_f32:
1980 return T(Ice::IceType_f64);
1981
1982 default:
1983 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1984 return {};
1985 }
1986}
1987
Nicolas Capens157ba262019-12-10 17:49:14 -05001988Value *Nucleus::createNullValue(Type *Ty)
1989{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001990 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001991 if(Ice::isVectorType(T(Ty)))
1992 {
1993 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001994 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001995 return createConstantVector(c, Ty);
1996 }
1997 else
1998 {
1999 return V(::context->getConstantZero(T(Ty)));
2000 }
2001}
2002
2003Value *Nucleus::createConstantLong(int64_t i)
2004{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002005 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002006 return V(::context->getConstantInt64(i));
2007}
2008
2009Value *Nucleus::createConstantInt(int i)
2010{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002011 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002012 return V(::context->getConstantInt32(i));
2013}
2014
2015Value *Nucleus::createConstantInt(unsigned int i)
2016{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002017 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002018 return V(::context->getConstantInt32(i));
2019}
2020
2021Value *Nucleus::createConstantBool(bool b)
2022{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002023 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002024 return V(::context->getConstantInt1(b));
2025}
2026
2027Value *Nucleus::createConstantByte(signed char i)
2028{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002029 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002030 return V(::context->getConstantInt8(i));
2031}
2032
2033Value *Nucleus::createConstantByte(unsigned char i)
2034{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002035 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002036 return V(::context->getConstantInt8(i));
2037}
2038
2039Value *Nucleus::createConstantShort(short i)
2040{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002041 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002042 return V(::context->getConstantInt16(i));
2043}
2044
2045Value *Nucleus::createConstantShort(unsigned short i)
2046{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002047 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002048 return V(::context->getConstantInt16(i));
2049}
2050
2051Value *Nucleus::createConstantFloat(float x)
2052{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002053 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002054 return V(::context->getConstantFloat(x));
2055}
2056
2057Value *Nucleus::createNullPointer(Type *Ty)
2058{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002059 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002060 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002061}
2062
Antonio Maiorano02a39532020-01-21 15:15:34 -05002063static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2064{
2065 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2066}
2067
Nicolas Capens157ba262019-12-10 17:49:14 -05002068Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2069{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002070 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002071 const int vectorSize = 16;
2072 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2073 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002074
2075 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002076 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002077
Antonio Maioranoa0957112020-03-04 15:06:19 -05002078 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002079 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002080
2081 switch((int)reinterpret_cast<intptr_t>(type))
2082 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002083 case Ice::IceType_v4i32:
2084 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002085 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002086 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002087 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002088 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002089 }
2090 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002091 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002092 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002093 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002094 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002095 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002096 }
2097 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002098 case Ice::IceType_v8i16:
2099 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002100 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002101 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002102 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002103 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002104 }
2105 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002106 case Ice::IceType_v16i8:
2107 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002108 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002109 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002110 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002111 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002112 }
2113 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002114 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002115 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002116 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002117 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002118 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002119 }
2120 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002121 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002122 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002123 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002124 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002125 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002126 }
2127 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002128 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002129 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002130 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002131 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002132 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002133 }
2134 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002135 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002136 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002137 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002138 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002139 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002140 }
2141 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002142 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002143 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002144 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002145 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002146 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002147 }
2148 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002149 default:
2150 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002151 }
2152
Antonio Maiorano02a39532020-01-21 15:15:34 -05002153 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002154
Antonio Maiorano02a39532020-01-21 15:15:34 -05002155 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002156 return V(result);
2157}
2158
2159Value *Nucleus::createConstantVector(const double *constants, Type *type)
2160{
Ben Clayton713b8d32019-12-17 20:37:56 +00002161 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002162}
2163
Antonio Maiorano62427e02020-02-13 09:18:05 -05002164Value *Nucleus::createConstantString(const char *v)
2165{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002166 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002167 return V(IceConstantData(v, strlen(v) + 1));
2168}
2169
Nicolas Capens519cf222020-05-08 15:27:19 -04002170Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002171{
2172 return T(Ice::IceType_void);
2173}
2174
Nicolas Capens519cf222020-05-08 15:27:19 -04002175Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002176{
2177 return T(Ice::IceType_i1);
2178}
2179
Nicolas Capens519cf222020-05-08 15:27:19 -04002180Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002181{
2182 return T(Ice::IceType_i8);
2183}
2184
Nicolas Capens519cf222020-05-08 15:27:19 -04002185Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002186{
2187 return T(Ice::IceType_i8);
2188}
2189
Nicolas Capens519cf222020-05-08 15:27:19 -04002190Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002191{
2192 return T(Ice::IceType_i16);
2193}
2194
Nicolas Capens519cf222020-05-08 15:27:19 -04002195Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002196{
2197 return T(Ice::IceType_i16);
2198}
2199
Nicolas Capens519cf222020-05-08 15:27:19 -04002200Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002201{
2202 return T(Type_v4i8);
2203}
2204
Nicolas Capens519cf222020-05-08 15:27:19 -04002205Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002206{
2207 return T(Type_v4i8);
2208}
2209
Ben Clayton713b8d32019-12-17 20:37:56 +00002210namespace {
2211RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002212{
Ben Clayton713b8d32019-12-17 20:37:56 +00002213 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002214}
2215
Ben Clayton713b8d32019-12-17 20:37:56 +00002216RValue<Byte> Extract(RValue<Byte8> val, int i)
2217{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002218 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002219}
2220
2221RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2222{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002223 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002224}
2225} // namespace
2226
Nicolas Capens157ba262019-12-10 17:49:14 -05002227RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2228{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002229 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002230 if(emulateIntrinsics)
2231 {
2232 Byte8 result;
2233 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2234 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2235 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2236 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2237 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2238 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2239 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2240 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2241
2242 return result;
2243 }
2244 else
2245 {
2246 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002247 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002248 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2249 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002250 paddusb->addArg(x.value());
2251 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002252 ::basicBlock->appendInst(paddusb);
2253
2254 return RValue<Byte8>(V(result));
2255 }
2256}
2257
2258RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2259{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002260 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002261 if(emulateIntrinsics)
2262 {
2263 Byte8 result;
2264 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2265 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2266 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2267 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2268 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2269 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2270 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2271 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2272
2273 return result;
2274 }
2275 else
2276 {
2277 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002278 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002279 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2280 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002281 psubusw->addArg(x.value());
2282 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002283 ::basicBlock->appendInst(psubusw);
2284
2285 return RValue<Byte8>(V(result));
2286 }
2287}
2288
2289RValue<SByte> Extract(RValue<SByte8> val, int i)
2290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002292 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002293}
2294
2295RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2296{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002297 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002298 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002299}
2300
2301RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2302{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002303 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002304 if(emulateIntrinsics)
2305 {
2306 SByte8 result;
2307 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2308 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2309 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2310 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2311 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2312 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2313 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2314 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2315
2316 return result;
2317 }
2318 else
2319 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002320#if defined(__i386__) || defined(__x86_64__)
2321 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2322 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2323 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002324
Ben Clayton713b8d32019-12-17 20:37:56 +00002325 return As<SByte8>(hi | lo);
2326#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002327 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002328#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002329 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002330}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002331
Nicolas Capens157ba262019-12-10 17:49:14 -05002332RValue<Int> SignMask(RValue<Byte8> x)
2333{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002334 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002335 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002336 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002337 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2338 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002339 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002340 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002341 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002342 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002343 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002344 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2345 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002346 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002347 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002348
Nicolas Capens157ba262019-12-10 17:49:14 -05002349 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002350 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002351}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002352
2353// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2354// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002355// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002356// }
2357
Nicolas Capens157ba262019-12-10 17:49:14 -05002358RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002360 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002361 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002362}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002363
Nicolas Capens519cf222020-05-08 15:27:19 -04002364Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002365{
2366 return T(Type_v8i8);
2367}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002368
Nicolas Capens598f8d82016-09-26 15:09:10 -04002369// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2370// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002371// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002372// }
2373
2374// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2375// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002376// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002377// }
2378
Nicolas Capens157ba262019-12-10 17:49:14 -05002379RValue<SByte> SaturateSigned(RValue<Short> x)
2380{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002381 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002382 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2383}
2384
2385RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2386{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002387 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002388 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002389 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002390 SByte8 result;
2391 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2392 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2393 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2394 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2395 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2396 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2397 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2398 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002399
Nicolas Capens157ba262019-12-10 17:49:14 -05002400 return result;
2401 }
2402 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002403 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002404 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002405 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002406 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2407 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002408 paddsb->addArg(x.value());
2409 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002410 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002411
Nicolas Capens157ba262019-12-10 17:49:14 -05002412 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002413 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002414}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002415
Nicolas Capens157ba262019-12-10 17:49:14 -05002416RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002418 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002419 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002420 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002421 SByte8 result;
2422 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2423 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2424 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2425 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2426 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2427 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2428 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2429 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002430
Nicolas Capens157ba262019-12-10 17:49:14 -05002431 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002432 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002433 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002434 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002435 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002436 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002437 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2438 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002439 psubsb->addArg(x.value());
2440 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002441 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002442
Nicolas Capens157ba262019-12-10 17:49:14 -05002443 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002444 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002445}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002446
Nicolas Capens157ba262019-12-10 17:49:14 -05002447RValue<Int> SignMask(RValue<SByte8> x)
2448{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002449 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002450 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002451 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002452 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2453 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002454 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002455 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002456 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002457 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002458 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002459 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2460 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002461 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002462 ::basicBlock->appendInst(movmsk);
2463
2464 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002465 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002466}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002467
Nicolas Capens157ba262019-12-10 17:49:14 -05002468RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2469{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002470 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002471 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002472}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002473
Nicolas Capens157ba262019-12-10 17:49:14 -05002474RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2475{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002476 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002477 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002478}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002479
Nicolas Capens519cf222020-05-08 15:27:19 -04002480Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002481{
2482 return T(Type_v8i8);
2483}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002484
Nicolas Capens519cf222020-05-08 15:27:19 -04002485Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002486{
2487 return T(Ice::IceType_v16i8);
2488}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002489
Nicolas Capens519cf222020-05-08 15:27:19 -04002490Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002491{
2492 return T(Ice::IceType_v16i8);
2493}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002494
Nicolas Capens519cf222020-05-08 15:27:19 -04002495Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002496{
2497 return T(Type_v2i16);
2498}
Nicolas Capensd4227962016-11-09 14:24:25 -05002499
Nicolas Capens519cf222020-05-08 15:27:19 -04002500Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002501{
2502 return T(Type_v2i16);
2503}
Nicolas Capensd4227962016-11-09 14:24:25 -05002504
Nicolas Capens157ba262019-12-10 17:49:14 -05002505Short4::Short4(RValue<Int4> cast)
2506{
Ben Clayton713b8d32019-12-17 20:37:56 +00002507 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002508 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002509 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2510
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002511 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002512 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002513
2514 storeValue(short4);
2515}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002516
2517// Short4::Short4(RValue<Float> cast)
2518// {
2519// }
2520
Nicolas Capens157ba262019-12-10 17:49:14 -05002521Short4::Short4(RValue<Float4> cast)
2522{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002523 // TODO(b/150791192): Generalize and optimize
2524 auto smin = std::numeric_limits<short>::min();
2525 auto smax = std::numeric_limits<short>::max();
2526 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002527}
2528
2529RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2530{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002531 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002532 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002533 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002534 Short4 result;
2535 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2536 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2537 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2538 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002539
2540 return result;
2541 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002542 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002543 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002544 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002545 }
2546}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002547
Nicolas Capens157ba262019-12-10 17:49:14 -05002548RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2549{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002550 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002551 if(emulateIntrinsics)
2552 {
2553 Short4 result;
2554 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2555 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2556 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2557 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2558
2559 return result;
2560 }
2561 else
2562 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002563 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002564 }
2565}
2566
2567RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2568{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002569 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002570 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002571 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002572 ::basicBlock->appendInst(cmp);
2573
2574 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002575 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002576 ::basicBlock->appendInst(select);
2577
2578 return RValue<Short4>(V(result));
2579}
2580
2581RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2582{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002583 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002584 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002585 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002586 ::basicBlock->appendInst(cmp);
2587
2588 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002589 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002590 ::basicBlock->appendInst(select);
2591
2592 return RValue<Short4>(V(result));
2593}
2594
2595RValue<Short> SaturateSigned(RValue<Int> x)
2596{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002597 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002598 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2599}
2600
2601RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2602{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002603 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002604 if(emulateIntrinsics)
2605 {
2606 Short4 result;
2607 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2608 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2609 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2610 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2611
2612 return result;
2613 }
2614 else
2615 {
2616 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002617 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002618 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2619 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002620 paddsw->addArg(x.value());
2621 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002622 ::basicBlock->appendInst(paddsw);
2623
2624 return RValue<Short4>(V(result));
2625 }
2626}
2627
2628RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2629{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002630 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002631 if(emulateIntrinsics)
2632 {
2633 Short4 result;
2634 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2635 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2636 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2637 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2638
2639 return result;
2640 }
2641 else
2642 {
2643 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002644 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002645 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2646 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002647 psubsw->addArg(x.value());
2648 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002649 ::basicBlock->appendInst(psubsw);
2650
2651 return RValue<Short4>(V(result));
2652 }
2653}
2654
2655RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2656{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002657 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002658 if(emulateIntrinsics)
2659 {
2660 Short4 result;
2661 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2662 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2663 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2664 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2665
2666 return result;
2667 }
2668 else
2669 {
2670 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002671 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002672 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2673 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002674 pmulhw->addArg(x.value());
2675 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002676 ::basicBlock->appendInst(pmulhw);
2677
2678 return RValue<Short4>(V(result));
2679 }
2680}
2681
2682RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2683{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002684 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002685 if(emulateIntrinsics)
2686 {
2687 Int2 result;
2688 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2689 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2690
2691 return result;
2692 }
2693 else
2694 {
2695 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002696 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002697 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2698 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002699 pmaddwd->addArg(x.value());
2700 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002701 ::basicBlock->appendInst(pmaddwd);
2702
2703 return As<Int2>(V(result));
2704 }
2705}
2706
2707RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2708{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002709 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002710 if(emulateIntrinsics)
2711 {
2712 SByte8 result;
2713 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2714 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2715 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2716 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2717 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2718 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2719 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2720 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2721
2722 return result;
2723 }
2724 else
2725 {
2726 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002727 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002728 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2729 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002730 pack->addArg(x.value());
2731 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002732 ::basicBlock->appendInst(pack);
2733
2734 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2735 }
2736}
2737
2738RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2739{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002740 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002741 if(emulateIntrinsics)
2742 {
2743 Byte8 result;
2744 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2745 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2746 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2747 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2748 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2749 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2750 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2751 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2752
2753 return result;
2754 }
2755 else
2756 {
2757 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002758 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002759 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2760 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002761 pack->addArg(x.value());
2762 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002763 ::basicBlock->appendInst(pack);
2764
2765 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2766 }
2767}
2768
2769RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2770{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002771 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002772 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002773}
2774
2775RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2776{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002777 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002778 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002779}
2780
Nicolas Capens519cf222020-05-08 15:27:19 -04002781Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002782{
2783 return T(Type_v4i16);
2784}
2785
2786UShort4::UShort4(RValue<Float4> cast, bool saturate)
2787{
2788 if(saturate)
2789 {
2790 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002791 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002792 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2793 // PackUnsigned takes care of 0x0000 saturation.
2794 Int4 int4(Min(cast, Float4(0xFFFF)));
2795 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002796 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002797 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002798 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002799 // ARM saturates the 32-bit integer result on overflow/undeflow.
2800 Int4 int4(cast);
2801 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002802 }
2803 else
2804 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002805 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002806 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002807 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002808 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002809 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002810 *this = Short4(Int4(cast));
2811 }
2812}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002813
Nicolas Capens157ba262019-12-10 17:49:14 -05002814RValue<UShort> Extract(RValue<UShort4> val, int i)
2815{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002816 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002817}
2818
2819RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2820{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002821 return RValue<UShort4>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002822}
2823
2824RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2825{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002826 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002827 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002828
Nicolas Capens157ba262019-12-10 17:49:14 -05002829 {
2830 UShort4 result;
2831 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2832 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2833 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2834 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2835
2836 return result;
2837 }
2838 else
2839 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002840 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002841 }
2842}
2843
2844RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2845{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002846 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002847 if(emulateIntrinsics)
2848 {
2849 UShort4 result;
2850 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2851 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2852 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2853 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2854
2855 return result;
2856 }
2857 else
2858 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002859 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002860 }
2861}
2862
2863RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002866 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002867 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002868 ::basicBlock->appendInst(cmp);
2869
2870 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002871 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002872 ::basicBlock->appendInst(select);
2873
2874 return RValue<UShort4>(V(result));
2875}
2876
2877RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2878{
2879 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002880 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002881 ::basicBlock->appendInst(cmp);
2882
2883 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002884 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002885 ::basicBlock->appendInst(select);
2886
2887 return RValue<UShort4>(V(result));
2888}
2889
2890RValue<UShort> SaturateUnsigned(RValue<Int> x)
2891{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002892 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002893 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2894}
2895
2896RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2897{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002898 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002899 if(emulateIntrinsics)
2900 {
2901 UShort4 result;
2902 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2903 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2904 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2905 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2906
2907 return result;
2908 }
2909 else
2910 {
2911 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002912 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002913 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2914 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002915 paddusw->addArg(x.value());
2916 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002917 ::basicBlock->appendInst(paddusw);
2918
2919 return RValue<UShort4>(V(result));
2920 }
2921}
2922
2923RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2924{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002925 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002926 if(emulateIntrinsics)
2927 {
2928 UShort4 result;
2929 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2930 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2931 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2932 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2933
2934 return result;
2935 }
2936 else
2937 {
2938 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002939 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002940 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2941 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002942 psubusw->addArg(x.value());
2943 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002944 ::basicBlock->appendInst(psubusw);
2945
2946 return RValue<UShort4>(V(result));
2947 }
2948}
2949
2950RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2951{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002952 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002953 if(emulateIntrinsics)
2954 {
2955 UShort4 result;
2956 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2957 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2958 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2959 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2960
2961 return result;
2962 }
2963 else
2964 {
2965 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002966 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002967 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2968 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002969 pmulhuw->addArg(x.value());
2970 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002971 ::basicBlock->appendInst(pmulhuw);
2972
2973 return RValue<UShort4>(V(result));
2974 }
2975}
2976
2977RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2978{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002979 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002980 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2981
2982 // Scalarized implementation.
2983 Int4 result;
2984 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2985 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2986 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2987 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2988
2989 return result;
2990}
2991
2992RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2993{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002994 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002995 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2996
2997 if(false) // Partial product based implementation.
2998 {
2999 auto xh = x >> 16;
3000 auto yh = y >> 16;
3001 auto xl = x & UInt4(0x0000FFFF);
3002 auto yl = y & UInt4(0x0000FFFF);
3003 auto xlyh = xl * yh;
3004 auto xhyl = xh * yl;
3005 auto xlyhh = xlyh >> 16;
3006 auto xhylh = xhyl >> 16;
3007 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3008 auto xhyll = xhyl & UInt4(0x0000FFFF);
3009 auto xlylh = (xl * yl) >> 16;
3010 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3011
3012 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003013 }
3014
Nicolas Capens157ba262019-12-10 17:49:14 -05003015 // Scalarized implementation.
3016 Int4 result;
3017 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3018 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3019 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3020 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3021
3022 return As<UInt4>(result);
3023}
3024
3025RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3026{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003027 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003028 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003029 return UShort4(0);
3030}
3031
Nicolas Capens519cf222020-05-08 15:27:19 -04003032Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003033{
3034 return T(Type_v4i16);
3035}
3036
3037RValue<Short> Extract(RValue<Short8> val, int i)
3038{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003039 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003040 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003041}
3042
3043RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3044{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003045 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003046 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003047}
3048
3049RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3050{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003051 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003052 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003053 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003054 Short8 result;
3055 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3056 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3057 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3058 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3059 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3060 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3061 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3062 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003063
Nicolas Capens157ba262019-12-10 17:49:14 -05003064 return result;
3065 }
3066 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003067 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003068 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003069 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003070}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003071
Nicolas Capens157ba262019-12-10 17:49:14 -05003072RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3073{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003074 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003075 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003076 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003077 Short8 result;
3078 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3079 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3080 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3081 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3082 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3083 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3084 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3085 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003086
Nicolas Capens157ba262019-12-10 17:49:14 -05003087 return result;
3088 }
3089 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003090 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003091 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003092 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003093}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003094
Nicolas Capens157ba262019-12-10 17:49:14 -05003095RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3096{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003097 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003098 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003099 return Int4(0);
3100}
3101
3102RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3103{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003104 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003105 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003106 return Short8(0);
3107}
3108
Nicolas Capens519cf222020-05-08 15:27:19 -04003109Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003110{
3111 return T(Ice::IceType_v8i16);
3112}
3113
3114RValue<UShort> Extract(RValue<UShort8> val, int i)
3115{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003116 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003117 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003118}
3119
3120RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3121{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003122 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003123 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003124}
3125
3126RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3127{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003128 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003129 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003130 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003131 UShort8 result;
3132 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3133 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3134 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3135 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3136 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3137 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3138 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3139 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003140
Nicolas Capens157ba262019-12-10 17:49:14 -05003141 return result;
3142 }
3143 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003144 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003145 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003146 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003147}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003148
Nicolas Capens157ba262019-12-10 17:49:14 -05003149RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3150{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003151 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003152 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003153 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003154 UShort8 result;
3155 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3156 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3157 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3158 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3159 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3160 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3161 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3162 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003163
Nicolas Capens157ba262019-12-10 17:49:14 -05003164 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003165 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003166 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003167 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003168 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003169 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003170}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003171
Nicolas Capens157ba262019-12-10 17:49:14 -05003172RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3173{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003174 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003175 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003176 return UShort8(0);
3177}
3178
Nicolas Capens519cf222020-05-08 15:27:19 -04003179Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003180{
3181 return T(Ice::IceType_v8i16);
3182}
3183
Ben Clayton713b8d32019-12-17 20:37:56 +00003184RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003185{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003186 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003187 RValue<Int> res = val;
3188 val += 1;
3189 return res;
3190}
3191
Ben Clayton713b8d32019-12-17 20:37:56 +00003192const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003193{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003194 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003195 val += 1;
3196 return val;
3197}
3198
Ben Clayton713b8d32019-12-17 20:37:56 +00003199RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003200{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003201 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003202 RValue<Int> res = val;
3203 val -= 1;
3204 return res;
3205}
3206
Ben Clayton713b8d32019-12-17 20:37:56 +00003207const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003208{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003209 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003210 val -= 1;
3211 return val;
3212}
3213
3214RValue<Int> RoundInt(RValue<Float> cast)
3215{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003216 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003217 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003218 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003219 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3220 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003221 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003222 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003223 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003224 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003225 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003226 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3227 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003228 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003229 ::basicBlock->appendInst(nearbyint);
3230
3231 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003232 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003233}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003234
Nicolas Capens519cf222020-05-08 15:27:19 -04003235Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003236{
3237 return T(Ice::IceType_i32);
3238}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003239
Nicolas Capens519cf222020-05-08 15:27:19 -04003240Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003241{
3242 return T(Ice::IceType_i64);
3243}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003244
Nicolas Capens157ba262019-12-10 17:49:14 -05003245UInt::UInt(RValue<Float> cast)
3246{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003247 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003248 // Smallest positive value representable in UInt, but not in Int
3249 const unsigned int ustart = 0x80000000u;
3250 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003251
Nicolas Capens157ba262019-12-10 17:49:14 -05003252 // If the value is negative, store 0, otherwise store the result of the conversion
3253 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003254 // Check if the value can be represented as an Int
3255 IfThenElse(cast >= ustartf,
3256 // If the value is too large, subtract ustart and re-add it after conversion.
3257 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3258 // Otherwise, just convert normally
3259 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003260 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003261}
Nicolas Capensa8086512016-11-07 17:32:17 -05003262
Ben Clayton713b8d32019-12-17 20:37:56 +00003263RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003264{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003265 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003266 RValue<UInt> res = val;
3267 val += 1;
3268 return res;
3269}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003270
Ben Clayton713b8d32019-12-17 20:37:56 +00003271const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003272{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003273 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003274 val += 1;
3275 return val;
3276}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003277
Ben Clayton713b8d32019-12-17 20:37:56 +00003278RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003279{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003280 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003281 RValue<UInt> res = val;
3282 val -= 1;
3283 return res;
3284}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003285
Ben Clayton713b8d32019-12-17 20:37:56 +00003286const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003289 val -= 1;
3290 return val;
3291}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003292
Nicolas Capens598f8d82016-09-26 15:09:10 -04003293// RValue<UInt> RoundUInt(RValue<Float> cast)
3294// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003295// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003296// }
3297
Nicolas Capens519cf222020-05-08 15:27:19 -04003298Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003299{
3300 return T(Ice::IceType_i32);
3301}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003302
3303// Int2::Int2(RValue<Int> cast)
3304// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003305// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003306// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003307//
3308// Constant *shuffle[2];
3309// shuffle[0] = Nucleus::createConstantInt(0);
3310// shuffle[1] = Nucleus::createConstantInt(0);
3311//
Nicolas Capens519cf222020-05-08 15:27:19 -04003312// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003313//
3314// storeValue(replicate);
3315// }
3316
Nicolas Capens157ba262019-12-10 17:49:14 -05003317RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3318{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003319 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003320 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003321 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003322 Int2 result;
3323 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3324 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003325
Nicolas Capens157ba262019-12-10 17:49:14 -05003326 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003327 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003328 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003329 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003330 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003332}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003333
Nicolas Capens157ba262019-12-10 17:49:14 -05003334RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003337 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003338 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003339 Int2 result;
3340 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3341 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3342
3343 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003344 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003345 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003346 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003347 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003348 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003349}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003350
Nicolas Capens519cf222020-05-08 15:27:19 -04003351Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003352{
3353 return T(Type_v2i32);
3354}
3355
3356RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3357{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003358 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003359 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003360 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003361 UInt2 result;
3362 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3363 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003364
Nicolas Capens157ba262019-12-10 17:49:14 -05003365 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003366 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003367 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003368 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003369 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003370 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003371}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003372
Nicolas Capens157ba262019-12-10 17:49:14 -05003373RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3374{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003375 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003376 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003377 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003378 UInt2 result;
3379 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3380 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003381
Nicolas Capens157ba262019-12-10 17:49:14 -05003382 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003383 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003384 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003385 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003386 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003387 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003388}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003389
Nicolas Capens519cf222020-05-08 15:27:19 -04003390Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003391{
3392 return T(Type_v2i32);
3393}
3394
Ben Clayton713b8d32019-12-17 20:37:56 +00003395Int4::Int4(RValue<Byte4> cast)
3396 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003397{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003398 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003399 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003400 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3401
3402 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003403 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003404 Value *b = Nucleus::createBitCast(a, Byte16::type());
3405 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003406
Ben Clayton713b8d32019-12-17 20:37:56 +00003407 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003408 Value *d = Nucleus::createBitCast(c, Short8::type());
3409 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003410
Nicolas Capens519cf222020-05-08 15:27:19 -04003411 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003412 storeValue(f);
3413}
3414
Ben Clayton713b8d32019-12-17 20:37:56 +00003415Int4::Int4(RValue<SByte4> cast)
3416 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003418 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003419 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003420 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3421
Ben Clayton713b8d32019-12-17 20:37:56 +00003422 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003423 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003424 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3425
Ben Clayton713b8d32019-12-17 20:37:56 +00003426 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003427 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003428 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3429
3430 *this = As<Int4>(e) >> 24;
3431}
3432
Ben Clayton713b8d32019-12-17 20:37:56 +00003433Int4::Int4(RValue<Short4> cast)
3434 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003435{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003436 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003437 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003438 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003439
3440 *this = As<Int4>(c) >> 16;
3441}
3442
Ben Clayton713b8d32019-12-17 20:37:56 +00003443Int4::Int4(RValue<UShort4> cast)
3444 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003445{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003446 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003447 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003448 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003449 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003450 storeValue(d);
3451}
3452
Ben Clayton713b8d32019-12-17 20:37:56 +00003453Int4::Int4(RValue<Int> rhs)
3454 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003455{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003456 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003457 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003458
Ben Clayton713b8d32019-12-17 20:37:56 +00003459 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003460 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3461
3462 storeValue(replicate);
3463}
3464
3465RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3466{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003467 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003468 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003469 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003470 Int4 result;
3471 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3472 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3473 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3474 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003475
Nicolas Capens157ba262019-12-10 17:49:14 -05003476 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003477 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003478 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003479 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003480 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003481 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003482}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003483
Nicolas Capens157ba262019-12-10 17:49:14 -05003484RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3485{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003486 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003487 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003488 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003489 Int4 result;
3490 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3491 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3492 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3493 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003494
Nicolas Capens157ba262019-12-10 17:49:14 -05003495 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003496 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003497 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003498 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003499 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003500 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003501}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003502
Nicolas Capens157ba262019-12-10 17:49:14 -05003503RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3504{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003505 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003506 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003507}
3508
3509RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3510{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003511 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003512 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003513}
3514
3515RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3516{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003517 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003518 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003519}
3520
3521RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3522{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003523 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003524 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003525}
3526
3527RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3528{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003529 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003530 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003531}
3532
3533RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3534{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003535 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003536 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003537}
3538
3539RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3540{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003541 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003542 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003543 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003544 ::basicBlock->appendInst(cmp);
3545
3546 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003547 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003548 ::basicBlock->appendInst(select);
3549
3550 return RValue<Int4>(V(result));
3551}
3552
3553RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3554{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003555 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003556 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003557 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003558 ::basicBlock->appendInst(cmp);
3559
3560 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003561 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003562 ::basicBlock->appendInst(select);
3563
3564 return RValue<Int4>(V(result));
3565}
3566
3567RValue<Int4> RoundInt(RValue<Float4> cast)
3568{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003569 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003570 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003571 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003572 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3573 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003574 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003575 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003576 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003577 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003578 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003579 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3580 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003581 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003582 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003583
3584 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003585 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003586}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003587
Nicolas Capens157ba262019-12-10 17:49:14 -05003588RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3589{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003590 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003591 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003592 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003593 Short8 result;
3594 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3595 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3596 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3597 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3598 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3599 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3600 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3601 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003602
Nicolas Capens157ba262019-12-10 17:49:14 -05003603 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003604 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003605 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003606 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003607 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003608 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003609 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3610 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003611 pack->addArg(x.value());
3612 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003613 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003614
Nicolas Capens157ba262019-12-10 17:49:14 -05003615 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003616 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003617}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003618
Nicolas Capens157ba262019-12-10 17:49:14 -05003619RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3620{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003621 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003622 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003623 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003624 RValue<Int4> sx = As<Int4>(x);
3625 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003626
Nicolas Capens157ba262019-12-10 17:49:14 -05003627 RValue<Int4> sy = As<Int4>(y);
3628 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003629
Nicolas Capens157ba262019-12-10 17:49:14 -05003630 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003631 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003632 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003633 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003634 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003635 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003636 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3637 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003638 pack->addArg(x.value());
3639 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003640 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003641
Nicolas Capens157ba262019-12-10 17:49:14 -05003642 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003643 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003644}
Nicolas Capens33438a62017-09-27 11:47:35 -04003645
Nicolas Capens157ba262019-12-10 17:49:14 -05003646RValue<Int> SignMask(RValue<Int4> x)
3647{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003648 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003649 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003650 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003651 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3652 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003653 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003654 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003655 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003656 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003657 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003658 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3659 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003660 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003661 ::basicBlock->appendInst(movmsk);
3662
3663 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003664 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003665}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003666
Nicolas Capens519cf222020-05-08 15:27:19 -04003667Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003668{
3669 return T(Ice::IceType_v4i32);
3670}
3671
Ben Clayton713b8d32019-12-17 20:37:56 +00003672UInt4::UInt4(RValue<Float4> cast)
3673 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003674{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003675 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003676 // Smallest positive value representable in UInt, but not in Int
3677 const unsigned int ustart = 0x80000000u;
3678 const float ustartf = float(ustart);
3679
3680 // Check if the value can be represented as an Int
3681 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3682 // If the value is too large, subtract ustart and re-add it after conversion.
3683 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003684 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003685 (~uiValue & Int4(cast));
3686 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003687 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003688}
3689
Ben Clayton713b8d32019-12-17 20:37:56 +00003690UInt4::UInt4(RValue<UInt> rhs)
3691 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003692{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003693 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003694 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003695
Ben Clayton713b8d32019-12-17 20:37:56 +00003696 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003697 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3698
3699 storeValue(replicate);
3700}
3701
3702RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3703{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003704 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003705 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003706 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003707 UInt4 result;
3708 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3709 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3710 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3711 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003712
Nicolas Capens157ba262019-12-10 17:49:14 -05003713 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003714 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003715 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003716 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003717 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003718 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003719}
Ben Clayton88816fa2019-05-15 17:08:14 +01003720
Nicolas Capens157ba262019-12-10 17:49:14 -05003721RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3722{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003723 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003724 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003725 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003726 UInt4 result;
3727 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3728 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3729 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3730 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003731
Nicolas Capens157ba262019-12-10 17:49:14 -05003732 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003733 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003734 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003735 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003736 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003737 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003738}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003739
Nicolas Capens157ba262019-12-10 17:49:14 -05003740RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3741{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003742 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003743 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003744}
3745
3746RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3747{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003748 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003749 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003750}
3751
3752RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3753{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003754 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003755 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003756}
3757
3758RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3759{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003760 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003761 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003762}
3763
3764RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3765{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003766 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003767 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003768}
3769
3770RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3771{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003772 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003773 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003774}
3775
3776RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3777{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003778 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003779 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003780 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003781 ::basicBlock->appendInst(cmp);
3782
3783 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003784 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003785 ::basicBlock->appendInst(select);
3786
3787 return RValue<UInt4>(V(result));
3788}
3789
3790RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3791{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003792 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003793 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003794 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003795 ::basicBlock->appendInst(cmp);
3796
3797 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003798 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003799 ::basicBlock->appendInst(select);
3800
3801 return RValue<UInt4>(V(result));
3802}
3803
Nicolas Capens519cf222020-05-08 15:27:19 -04003804Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003805{
3806 return T(Ice::IceType_v4i32);
3807}
3808
Nicolas Capens519cf222020-05-08 15:27:19 -04003809Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003810{
3811 return T(Ice::IceType_i16);
3812}
3813
3814RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3815{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003816 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003817 return 1.0f / x;
3818}
3819
3820RValue<Float> RcpSqrt_pp(RValue<Float> x)
3821{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003822 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003823 return Rcp_pp(Sqrt(x));
3824}
3825
3826RValue<Float> Sqrt(RValue<Float> x)
3827{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003828 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003829 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003830 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003831 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3832 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003833 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003834 ::basicBlock->appendInst(sqrt);
3835
3836 return RValue<Float>(V(result));
3837}
3838
3839RValue<Float> Round(RValue<Float> x)
3840{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003841 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003842 return Float4(Round(Float4(x))).x;
3843}
3844
3845RValue<Float> Trunc(RValue<Float> x)
3846{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003847 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003848 return Float4(Trunc(Float4(x))).x;
3849}
3850
3851RValue<Float> Frac(RValue<Float> x)
3852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003854 return Float4(Frac(Float4(x))).x;
3855}
3856
3857RValue<Float> Floor(RValue<Float> x)
3858{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003859 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003860 return Float4(Floor(Float4(x))).x;
3861}
3862
3863RValue<Float> Ceil(RValue<Float> x)
3864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003866 return Float4(Ceil(Float4(x))).x;
3867}
3868
Nicolas Capens519cf222020-05-08 15:27:19 -04003869Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003870{
3871 return T(Ice::IceType_f32);
3872}
3873
Nicolas Capens519cf222020-05-08 15:27:19 -04003874Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003875{
3876 return T(Type_v2f32);
3877}
3878
Ben Clayton713b8d32019-12-17 20:37:56 +00003879Float4::Float4(RValue<Float> rhs)
3880 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003881{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003882 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003883 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003884
Ben Clayton713b8d32019-12-17 20:37:56 +00003885 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003886 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3887
3888 storeValue(replicate);
3889}
3890
3891RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3892{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003893 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003894 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003895 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003896 ::basicBlock->appendInst(cmp);
3897
3898 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003899 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003900 ::basicBlock->appendInst(select);
3901
3902 return RValue<Float4>(V(result));
3903}
3904
3905RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3906{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003907 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003908 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003909 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003910 ::basicBlock->appendInst(cmp);
3911
3912 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003913 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003914 ::basicBlock->appendInst(select);
3915
3916 return RValue<Float4>(V(result));
3917}
3918
3919RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3920{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003921 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003922 return Float4(1.0f) / x;
3923}
3924
3925RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3926{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003927 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003928 return Rcp_pp(Sqrt(x));
3929}
3930
3931RValue<Float4> Sqrt(RValue<Float4> x)
3932{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003933 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003934 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003935 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003936 Float4 result;
3937 result.x = Sqrt(Float(Float4(x).x));
3938 result.y = Sqrt(Float(Float4(x).y));
3939 result.z = Sqrt(Float(Float4(x).z));
3940 result.w = Sqrt(Float(Float4(x).w));
3941
3942 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003943 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003944 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003945 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003946 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003947 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003948 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3949 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003950 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04003951 ::basicBlock->appendInst(sqrt);
3952
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003953 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003954 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003955}
Nicolas Capens157ba262019-12-10 17:49:14 -05003956
3957RValue<Int> SignMask(RValue<Float4> x)
3958{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003959 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003960 if(emulateIntrinsics || CPUID::ARM)
3961 {
3962 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3963 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3964 }
3965 else
3966 {
3967 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003968 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003969 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3970 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003971 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003972 ::basicBlock->appendInst(movmsk);
3973
3974 return RValue<Int>(V(result));
3975 }
3976}
3977
3978RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3979{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003980 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003981 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003982}
3983
3984RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3985{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003986 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003987 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003988}
3989
3990RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3991{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003992 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003993 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003994}
3995
3996RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3997{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003998 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003999 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004000}
4001
4002RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4003{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004004 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004005 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004006}
4007
4008RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4009{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004010 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004011 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004012}
4013
4014RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4015{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004016 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004017 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004018}
4019
4020RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4021{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004022 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004023 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004024}
4025
4026RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4027{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004028 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004029 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004030}
4031
4032RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4033{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004034 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004035 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004036}
4037
4038RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4039{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004040 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004041 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004042}
4043
4044RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4045{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004046 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004047 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004048}
4049
4050RValue<Float4> Round(RValue<Float4> x)
4051{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004052 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004053 if(emulateIntrinsics || CPUID::ARM)
4054 {
4055 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4056 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4057 }
4058 else if(CPUID::SSE4_1)
4059 {
4060 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004061 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004062 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4063 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004064 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004065 round->addArg(::context->getConstantInt32(0));
4066 ::basicBlock->appendInst(round);
4067
4068 return RValue<Float4>(V(result));
4069 }
4070 else
4071 {
4072 return Float4(RoundInt(x));
4073 }
4074}
4075
4076RValue<Float4> Trunc(RValue<Float4> x)
4077{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004078 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004079 if(CPUID::SSE4_1)
4080 {
4081 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004082 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004083 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4084 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004085 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004086 round->addArg(::context->getConstantInt32(3));
4087 ::basicBlock->appendInst(round);
4088
4089 return RValue<Float4>(V(result));
4090 }
4091 else
4092 {
4093 return Float4(Int4(x));
4094 }
4095}
4096
4097RValue<Float4> Frac(RValue<Float4> x)
4098{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004099 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004100 Float4 frc;
4101
4102 if(CPUID::SSE4_1)
4103 {
4104 frc = x - Floor(x);
4105 }
4106 else
4107 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004108 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004109
Ben Clayton713b8d32019-12-17 20:37:56 +00004110 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004111 }
4112
4113 // x - floor(x) can be 1.0 for very small negative x.
4114 // Clamp against the value just below 1.0.
4115 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4116}
4117
4118RValue<Float4> Floor(RValue<Float4> x)
4119{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004120 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004121 if(CPUID::SSE4_1)
4122 {
4123 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004124 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004125 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4126 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004127 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004128 round->addArg(::context->getConstantInt32(1));
4129 ::basicBlock->appendInst(round);
4130
4131 return RValue<Float4>(V(result));
4132 }
4133 else
4134 {
4135 return x - Frac(x);
4136 }
4137}
4138
4139RValue<Float4> Ceil(RValue<Float4> x)
4140{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004141 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004142 if(CPUID::SSE4_1)
4143 {
4144 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004145 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004146 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4147 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004148 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004149 round->addArg(::context->getConstantInt32(2));
4150 ::basicBlock->appendInst(round);
4151
4152 return RValue<Float4>(V(result));
4153 }
4154 else
4155 {
4156 return -Floor(-x);
4157 }
4158}
4159
Nicolas Capens519cf222020-05-08 15:27:19 -04004160Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004161{
4162 return T(Ice::IceType_v4f32);
4163}
4164
4165RValue<Long> Ticks()
4166{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004167 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004168 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004169 return Long(Int(0));
4170}
4171
Ben Clayton713b8d32019-12-17 20:37:56 +00004172RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004173{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004174 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004175 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004176}
4177
Ben Clayton713b8d32019-12-17 20:37:56 +00004178RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004179{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004180 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004181 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004182}
4183
Ben Clayton713b8d32019-12-17 20:37:56 +00004184Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004185{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004186 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004187 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004188}
4189
4190void Breakpoint()
4191{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004192 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004193 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004194 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4195 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4196 ::basicBlock->appendInst(trap);
4197}
4198
Ben Clayton713b8d32019-12-17 20:37:56 +00004199void Nucleus::createFence(std::memory_order memoryOrder)
4200{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004201 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004202 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4203 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4204 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4205 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4206 inst->addArg(order);
4207 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004208}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004209
Ben Clayton713b8d32019-12-17 20:37:56 +00004210Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4211{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004212 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004213 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004214 return nullptr;
4215}
4216void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4217{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004218 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004219 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004220}
Nicolas Capens157ba262019-12-10 17:49:14 -05004221
4222RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4223{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004224 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004225 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4226}
4227
4228RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4229{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004230 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004231 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4232}
4233
4234void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4235{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004236 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004237 return emulated::Scatter(base, val, offsets, mask, alignment);
4238}
4239
4240void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4241{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004242 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004243 return emulated::Scatter(base, val, offsets, mask, alignment);
4244}
4245
4246RValue<Float> Exp2(RValue<Float> x)
4247{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004248 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004249 return emulated::Exp2(x);
4250}
4251
4252RValue<Float> Log2(RValue<Float> x)
4253{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004254 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004255 return emulated::Log2(x);
4256}
4257
4258RValue<Float4> Sin(RValue<Float4> x)
4259{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004260 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004261 return emulated::Sin(x);
4262}
4263
4264RValue<Float4> Cos(RValue<Float4> x)
4265{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004266 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004267 return emulated::Cos(x);
4268}
4269
4270RValue<Float4> Tan(RValue<Float4> x)
4271{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004272 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004273 return emulated::Tan(x);
4274}
4275
4276RValue<Float4> Asin(RValue<Float4> x)
4277{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004278 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004279 return emulated::Asin(x);
4280}
4281
4282RValue<Float4> Acos(RValue<Float4> x)
4283{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004284 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004285 return emulated::Acos(x);
4286}
4287
4288RValue<Float4> Atan(RValue<Float4> x)
4289{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004290 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004291 return emulated::Atan(x);
4292}
4293
4294RValue<Float4> Sinh(RValue<Float4> x)
4295{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004296 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004297 return emulated::Sinh(x);
4298}
4299
4300RValue<Float4> Cosh(RValue<Float4> x)
4301{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004302 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004303 return emulated::Cosh(x);
4304}
4305
4306RValue<Float4> Tanh(RValue<Float4> x)
4307{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004308 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004309 return emulated::Tanh(x);
4310}
4311
4312RValue<Float4> Asinh(RValue<Float4> x)
4313{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004314 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004315 return emulated::Asinh(x);
4316}
4317
4318RValue<Float4> Acosh(RValue<Float4> x)
4319{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004320 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004321 return emulated::Acosh(x);
4322}
4323
4324RValue<Float4> Atanh(RValue<Float4> x)
4325{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004326 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004327 return emulated::Atanh(x);
4328}
4329
4330RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4331{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004332 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004333 return emulated::Atan2(x, y);
4334}
4335
4336RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4337{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004338 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004339 return emulated::Pow(x, y);
4340}
4341
4342RValue<Float4> Exp(RValue<Float4> x)
4343{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004344 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004345 return emulated::Exp(x);
4346}
4347
4348RValue<Float4> Log(RValue<Float4> x)
4349{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004350 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004351 return emulated::Log(x);
4352}
4353
4354RValue<Float4> Exp2(RValue<Float4> x)
4355{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004356 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004357 return emulated::Exp2(x);
4358}
4359
4360RValue<Float4> Log2(RValue<Float4> x)
4361{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004362 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004363 return emulated::Log2(x);
4364}
4365
4366RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4367{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004368 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004369 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004370 {
Ben Claytonce54c592020-02-07 11:30:51 +00004371 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004372 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004373 }
4374 else
4375 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004376 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004377 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4378 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4379 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004380 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004381 ::basicBlock->appendInst(ctlz);
4382
4383 return RValue<UInt>(V(result));
4384 }
4385}
4386
4387RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4388{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004389 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004390 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004391 {
Ben Claytonce54c592020-02-07 11:30:51 +00004392 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004393 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004394 }
4395 else
4396 {
4397 // TODO: implement vectorized version in Subzero
4398 UInt4 result;
4399 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4400 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4401 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4402 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4403 return result;
4404 }
4405}
4406
4407RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4408{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004409 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004410 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004411 {
Ben Claytonce54c592020-02-07 11:30:51 +00004412 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004413 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004414 }
4415 else
4416 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004417 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004418 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4419 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4420 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004421 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004422 ::basicBlock->appendInst(ctlz);
4423
4424 return RValue<UInt>(V(result));
4425 }
4426}
4427
4428RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4429{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004430 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004431 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004432 {
Ben Claytonce54c592020-02-07 11:30:51 +00004433 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004434 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004435 }
4436 else
4437 {
4438 // TODO: implement vectorized version in Subzero
4439 UInt4 result;
4440 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4441 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4442 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4443 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4444 return result;
4445 }
4446}
4447
Antonio Maiorano370cba52019-12-31 11:36:07 -05004448RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4449{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004450 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004451 return emulated::MinAtomic(x, y, memoryOrder);
4452}
4453
4454RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4455{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004456 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004457 return emulated::MinAtomic(x, y, memoryOrder);
4458}
4459
4460RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4461{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004462 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004463 return emulated::MaxAtomic(x, y, memoryOrder);
4464}
4465
4466RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4467{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004468 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004469 return emulated::MaxAtomic(x, y, memoryOrder);
4470}
4471
Antonio Maioranoaae33732020-02-14 14:52:34 -05004472void EmitDebugLocation()
4473{
4474#ifdef ENABLE_RR_DEBUG_INFO
4475# ifdef ENABLE_RR_EMIT_PRINT_LOCATION
4476 emitPrintLocation(getCallerBacktrace());
4477# endif // ENABLE_RR_EMIT_PRINT_LOCATION
4478#endif // ENABLE_RR_DEBUG_INFO
4479}
Ben Clayton713b8d32019-12-17 20:37:56 +00004480void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004481void FlushDebug() {}
4482
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004483namespace {
4484namespace coro {
4485
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004486// Instance data per generated coroutine
4487// This is the "handle" type used for Coroutine functions
4488// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4489struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004490{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004491 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004492 bool done = false; // the coroutine should stop at the next yield()
4493 bool terminated = false; // the coroutine has finished.
4494 bool inRoutine = false; // is the coroutine currently executing?
4495 marl::Scheduler::Fiber *mainFiber = nullptr;
4496 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004497 void *promisePtr = nullptr;
4498};
4499
4500CoroutineData *createCoroutineData()
4501{
4502 return new CoroutineData{};
4503}
4504
4505void destroyCoroutineData(CoroutineData *coroData)
4506{
4507 delete coroData;
4508}
4509
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004510// suspend() pauses execution of the coroutine, and resumes execution from the
4511// caller's call to await().
4512// Returns true if await() is called again, or false if coroutine_destroy()
4513// is called.
4514bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004515{
Ben Claytonc3466532020-03-24 11:54:05 +00004516 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4517 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4518 ASSERT(coroData->inRoutine);
4519 coroData->inRoutine = false;
4520 coroData->mainFiber->notify();
4521 while(!coroData->inRoutine)
4522 {
4523 coroData->routineFiber->wait();
4524 }
4525 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004526}
4527
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004528// resume() is called by await(), blocking until the coroutine calls yield()
4529// or the coroutine terminates.
4530void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004531{
Ben Claytonc3466532020-03-24 11:54:05 +00004532 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4533 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4534 ASSERT(!coroData->inRoutine);
4535 coroData->inRoutine = true;
4536 coroData->routineFiber->notify();
4537 while(coroData->inRoutine)
4538 {
4539 coroData->mainFiber->wait();
4540 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004541}
4542
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004543// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4544// until the coroutine ends, and deletes the coroutine data.
4545void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004546{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004547 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004548 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4549 ASSERT(!coroData->inRoutine);
4550 if(!coroData->terminated)
4551 {
4552 coroData->done = true;
4553 coroData->inRoutine = true;
4554 coroData->routineFiber->notify();
4555 while(!coroData->terminated)
4556 {
4557 coroData->mainFiber->wait();
4558 }
4559 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004560 if(coroData->useInternalScheduler)
4561 {
4562 ::getOrCreateScheduler().unbind();
4563 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004564 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004565}
4566
4567namespace detail {
4568thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4569} // namespace detail
4570
4571void setHandleParam(Nucleus::CoroutineHandle handle)
4572{
4573 ASSERT(!detail::coroHandle);
4574 detail::coroHandle = handle;
4575}
4576
4577Nucleus::CoroutineHandle getHandleParam()
4578{
4579 ASSERT(detail::coroHandle);
4580 auto handle = detail::coroHandle;
4581 detail::coroHandle = {};
4582 return handle;
4583}
4584
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004585bool isDone(Nucleus::CoroutineHandle handle)
4586{
4587 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004588 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004589}
4590
4591void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4592{
4593 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4594 coroData->promisePtr = promisePtr;
4595}
4596
4597void *getPromisePtr(Nucleus::CoroutineHandle handle)
4598{
4599 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4600 return coroData->promisePtr;
4601}
4602
4603} // namespace coro
4604} // namespace
4605
4606// Used to generate coroutines.
4607// Lifetime: from yield to acquireCoroutine
4608class CoroutineGenerator
4609{
4610public:
4611 CoroutineGenerator()
4612 {
4613 }
4614
4615 // Inserts instructions at the top of the current function to make it a coroutine.
4616 void generateCoroutineBegin()
4617 {
4618 // Begin building the main coroutine_begin() function.
4619 // We insert these instructions at the top of the entry node,
4620 // before existing reactor-generated instructions.
4621
4622 // CoroutineHandle coroutine_begin(<Arguments>)
4623 // {
4624 // this->handle = coro::getHandleParam();
4625 //
4626 // YieldType promise;
4627 // coro::setPromisePtr(handle, &promise); // For await
4628 //
4629 // ... <REACTOR CODE> ...
4630 //
4631
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004632 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004633 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004634
4635 // YieldType promise;
4636 // coro::setPromisePtr(handle, &promise); // For await
4637 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004638 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004639 }
4640
4641 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4642 void generateYield(Value *val)
4643 {
4644 // ... <REACTOR CODE> ...
4645 //
4646 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004647 // if (!coro::suspend(handle)) {
4648 // return false; // coroutine has been stopped by the caller.
4649 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004650 //
4651 // ... <REACTOR CODE> ...
4652
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004653 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004654 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004655
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004656 // if (!coro::suspend(handle)) {
4657 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4658 auto doneBlock = Nucleus::createBasicBlock();
4659 auto resumeBlock = Nucleus::createBasicBlock();
4660 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4661
4662 // return false; // coroutine has been stopped by the caller.
4663 ::basicBlock = doneBlock;
4664 Nucleus::createRetVoid(); // coroutine return value is ignored.
4665
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004666 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004667 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004668 }
4669
4670 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4671
4672 // Generates the await function for the current coroutine.
4673 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4674 static FunctionUniquePtr generateAwaitFunction()
4675 {
4676 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4677 // {
4678 // if (coro::isDone())
4679 // {
4680 // return false;
4681 // }
4682 // else // resume
4683 // {
4684 // YieldType* promise = coro::getPromisePtr(handle);
4685 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004686 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004687 // return true;
4688 // }
4689 // }
4690
4691 // Subzero doesn't support bool types (IceType_i1) as return type
4692 const Ice::Type ReturnType = Ice::IceType_i32;
4693 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4694 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4695
4696 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4697 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4698
4699 Ice::Variable *handle = awaitFunc->getArgs()[0];
4700 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4701
4702 auto doneBlock = awaitFunc->makeNode();
4703 {
4704 // return false;
4705 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4706 doneBlock->appendInst(ret);
4707 }
4708
4709 auto resumeBlock = awaitFunc->makeNode();
4710 {
4711 // YieldType* promise = coro::getPromisePtr(handle);
4712 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4713
4714 // *out = *promise;
4715 // Load promise value
4716 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4717 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4718 resumeBlock->appendInst(load);
4719 // Then store it in output param
4720 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4721 resumeBlock->appendInst(store);
4722
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004723 // coro::resume(handle);
4724 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004725
4726 // return true;
4727 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4728 resumeBlock->appendInst(ret);
4729 }
4730
4731 // if (coro::isDone())
4732 // {
4733 // <doneBlock>
4734 // }
4735 // else // resume
4736 // {
4737 // <resumeBlock>
4738 // }
4739 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004740 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004741 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4742 bb->appendInst(br);
4743
4744 return FunctionUniquePtr{ awaitFunc };
4745 }
4746
4747 // Generates the destroy function for the current coroutine.
4748 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4749 static FunctionUniquePtr generateDestroyFunction()
4750 {
4751 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4752 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004753 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004754 // return;
4755 // }
4756
4757 const Ice::Type ReturnType = Ice::IceType_void;
4758 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4759
4760 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4761 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4762
4763 Ice::Variable *handle = destroyFunc->getArgs()[0];
4764
4765 auto *bb = destroyFunc->getEntryNode();
4766
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004767 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4768 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004769
4770 // return;
4771 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4772 bb->appendInst(ret);
4773
4774 return FunctionUniquePtr{ destroyFunc };
4775 }
4776
4777private:
4778 Ice::Variable *handle{};
4779 Ice::Variable *promise{};
4780};
4781
4782static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4783{
4784 // This doubles up as our coroutine handle
4785 auto coroData = coro::createCoroutineData();
4786
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004787 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4788 if(coroData->useInternalScheduler)
4789 {
4790 ::getOrCreateScheduler().bind();
4791 }
4792
Ben Clayton76e9e532020-03-16 20:35:04 +00004793 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004794 // Store handle in TLS so that the coroutine can grab it right away, before
4795 // any fiber switch occurs.
4796 coro::setHandleParam(coroData);
4797
Ben Claytonc3466532020-03-24 11:54:05 +00004798 ASSERT(!coroData->routineFiber);
4799 coroData->routineFiber = marl::Scheduler::Fiber::current();
4800
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004801 beginFunc();
4802
Ben Claytonc3466532020-03-24 11:54:05 +00004803 ASSERT(coroData->inRoutine);
4804 coroData->done = true; // coroutine is done.
4805 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4806 coroData->inRoutine = false;
4807 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004808 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004809
Ben Claytonc3466532020-03-24 11:54:05 +00004810 ASSERT(!coroData->mainFiber);
4811 coroData->mainFiber = marl::Scheduler::Fiber::current();
4812
4813 // block until the first yield or coroutine end
4814 ASSERT(!coroData->inRoutine);
4815 coroData->inRoutine = true;
4816 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4817 while(coroData->inRoutine)
4818 {
4819 coroData->mainFiber->wait();
4820 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004821
4822 return coroData;
4823}
4824
4825void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4826{
4827 // Start by creating a regular function
4828 createFunction(yieldType, params);
4829
4830 // Save in case yield() is called
4831 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4832 ::coroYieldType = yieldType;
4833}
4834
4835void Nucleus::yield(Value *val)
4836{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004837 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004838 Variable::materializeAll();
4839
4840 // On first yield, we start generating coroutine functions
4841 if(!::coroGen)
4842 {
4843 ::coroGen = std::make_shared<CoroutineGenerator>();
4844 ::coroGen->generateCoroutineBegin();
4845 }
4846
4847 ASSERT(::coroGen);
4848 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004849}
4850
Ben Clayton713b8d32019-12-17 20:37:56 +00004851static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4852{
4853 return false;
4854}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004855
4856static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4857{
4858}
Nicolas Capens157ba262019-12-10 17:49:14 -05004859
4860std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4861{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004862 if(::coroGen)
4863 {
4864 // Finish generating coroutine functions
4865 {
4866 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004867 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004868 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004869
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004870 auto awaitFunc = ::coroGen->generateAwaitFunction();
4871 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004872
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004873 // At this point, we no longer need the CoroutineGenerator.
4874 ::coroGen.reset();
4875 ::coroYieldType = nullptr;
4876
4877 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4878 { name, "await", "destroy" },
4879 cfgEdit);
4880
4881 return routine;
4882 }
4883 else
4884 {
4885 {
4886 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004887 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004888 }
4889
4890 ::coroYieldType = nullptr;
4891
4892 // Not an actual coroutine (no yields), so return stubs for await and destroy
4893 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4894
4895 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4896 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4897 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4898 return routine;
4899 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004900}
4901
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004902Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004903{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004904 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4905
4906 if(isCoroutine)
4907 {
4908 return rr::invokeCoroutineBegin(func);
4909 }
4910 else
4911 {
4912 // For regular routines, just invoke the begin func directly
4913 return func();
4914 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004915}
Nicolas Capens157ba262019-12-10 17:49:14 -05004916
4917} // namespace rr