blob: 522b5668772d769f0a3da352d697f4d5b426ed7a [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050017#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000018#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050019#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040020
Nicolas Capens1a3ce872018-10-10 10:42:36 -040021#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000022#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040023
Nicolas Capens598f8d82016-09-26 15:09:10 -040024#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000027#include "src/IceELFStreamer.h"
28#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040029#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031
Ben Clayton713b8d32019-12-17 20:37:56 +000032#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040033#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040035
Antonio Maiorano8bce0672020-02-28 13:13:45 -050036#include "marl/event.h"
37
Nicolas Capens6a990f82018-07-06 15:54:07 -040038#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040041
Nicolas Capensbd65da92017-01-05 16:31:06 -050042#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000043# ifndef WIN32_LEAN_AND_MEAN
44# define WIN32_LEAN_AND_MEAN
45# endif // !WIN32_LEAN_AND_MEAN
46# ifndef NOMINMAX
47# define NOMINMAX
48# endif // !NOMINMAX
49# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050050#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Ben Clayton683bad82020-02-10 23:57:09 +000052#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040053#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000054#include <limits>
55#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040056
Antonio Maiorano02a39532020-01-21 15:15:34 -050057// Subzero utility functions
58// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050059namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050060namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061
62Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
63{
64 uint32_t sequenceNumber = 0;
65 auto function = Ice::Cfg::create(context, sequenceNumber).release();
66
67 Ice::CfgLocalAllocatorScope allocScope{ function };
68
69 for(auto type : paramTypes)
70 {
71 Ice::Variable *arg = function->makeVariable(type);
72 function->addArg(arg);
73 }
74
75 Ice::CfgNode *node = function->makeNode();
76 function->setEntryNode(node);
77
78 return function;
79}
80
81Ice::Type getPointerType(Ice::Type elementType)
82{
83 if(sizeof(void *) == 8)
84 {
85 return Ice::IceType_i64;
86 }
87 else
88 {
89 return Ice::IceType_i32;
90 }
91}
92
93Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
94{
95 int typeSize = Ice::typeWidthInBytes(type);
96 int totalSize = typeSize * (arraySize ? arraySize : 1);
97
98 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
99 auto address = function->makeVariable(getPointerType(type));
100 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
101 function->getEntryNode()->getInsts().push_front(alloca);
102
103 return address;
104}
105
106Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500107{
108 if(sizeof(void *) == 8)
109 {
110 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
111 }
112 else
113 {
114 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
115 }
116}
117
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400118// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
119Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500120
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400121// Wrapper for calls on C functions with Ice types
122Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
123{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500124 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400125
126 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
127 // then truncate result to bool.
128 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
129 const bool returningBool = (retTy == Ice::IceType_i1);
130 if(returningBool)
131 {
132 ret = function->makeVariable(Ice::IceType_i32);
133 }
134 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500135 {
136 ret = function->makeVariable(retTy);
137 }
138
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400139 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500140 for(auto arg : iceArgs)
141 {
142 call->addArg(arg);
143 }
144
145 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400146
147 if(returningBool)
148 {
149 // Truncate result to bool so that if any (lsb) bits were set, result will be true
150 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
151 }
152
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500153 return ret;
154}
155
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400156Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
157{
158 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
159 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
160}
161
Antonio Maiorano62427e02020-02-13 09:18:05 -0500162// Wrapper for calls on C functions with Ice types
163template<typename Return, typename... CArgs, typename... RArgs>
164Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
165{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400166 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
167
Antonio Maiorano62427e02020-02-13 09:18:05 -0500168 Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
169 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500170 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500171}
172
Antonio Maiorano02a39532020-01-21 15:15:34 -0500173// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500174Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500175{
176 Ice::Variable *result = function->makeVariable(v->getType());
177 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
178 basicBlock->appendInst(cast);
179 return result;
180}
181
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400182Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
183{
184 Ice::Variable *to = function->makeVariable(toType);
185 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
186 basicBlock->appendInst(cast);
187 return to;
188}
189
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500190Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500191{
192 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
193 // absolute address. We circumvent this by casting to a non-const variable, and loading
194 // from that.
195 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
196 {
197 ptr = sz::createUnconstCast(function, basicBlock, cptr);
198 }
199
200 Ice::Variable *result = function->makeVariable(type);
201 auto load = Ice::InstLoad::create(function, result, ptr, align);
202 basicBlock->appendInst(load);
203
204 return result;
205}
206
207} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500208} // namespace
209
Ben Clayton713b8d32019-12-17 20:37:56 +0000210namespace rr {
211class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500212class CoroutineGenerator;
213} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500214
215namespace {
216
217// Default configuration settings. Must be accessed under mutex lock.
218std::mutex defaultConfigLock;
219rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000220{
Nicolas Capens157ba262019-12-10 17:49:14 -0500221 // This uses a static in a function to avoid the cost of a global static
222 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
223 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000224 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500225 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000226}
227
Nicolas Capens157ba262019-12-10 17:49:14 -0500228Ice::GlobalContext *context = nullptr;
229Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400230Ice::CfgNode *entryBlock = nullptr;
231Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500232Ice::CfgNode *basicBlock = nullptr;
233Ice::CfgLocalAllocatorScope *allocator = nullptr;
234rr::ELFMemoryStreamer *routine = nullptr;
235
236std::mutex codegenMutex;
237
238Ice::ELFFileStreamer *elfFile = nullptr;
239Ice::Fdstream *out = nullptr;
240
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500241// Coroutine globals
242rr::Type *coroYieldType = nullptr;
243std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500244marl::Scheduler &getOrCreateScheduler()
245{
246 static auto scheduler = [] {
247 auto s = std::make_unique<marl::Scheduler>();
248 s->setWorkerThreadCount(8);
249 return s;
250 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500251
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500252 return *scheduler;
253}
Nicolas Capens157ba262019-12-10 17:49:14 -0500254} // Anonymous namespace
255
256namespace {
257
258#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000259# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500260#endif
261
Ben Clayton713b8d32019-12-17 20:37:56 +0000262#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
263# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500264#endif
265
Antonio Maiorano370cba52019-12-31 11:36:07 -0500266Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400267{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500268 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100269 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500270 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000271 case rr::Optimization::Level::None: return Ice::Opt_m1;
272 case rr::Optimization::Level::Less: return Ice::Opt_m1;
273 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500274 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
275 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100276 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500277 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400278}
279
Antonio Maiorano370cba52019-12-31 11:36:07 -0500280Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
281{
282 switch(memoryOrder)
283 {
284 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
285 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
286 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
287 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
288 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
289 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
290 }
291 return Ice::Intrinsics::MemoryOrderInvalid;
292}
293
Nicolas Capens157ba262019-12-10 17:49:14 -0500294class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500295{
Nicolas Capens157ba262019-12-10 17:49:14 -0500296public:
297 const static bool ARM;
298 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400299
Nicolas Capens157ba262019-12-10 17:49:14 -0500300private:
301 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100302 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000303#if defined(__i386__) || defined(__x86_64__)
304# if defined(_WIN32)
305 __cpuid(registers, info);
306# else
307 __asm volatile("cpuid"
308 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
309 : "a"(info));
310# endif
311#else
312 registers[0] = 0;
313 registers[1] = 0;
314 registers[2] = 0;
315 registers[3] = 0;
316#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100317 }
318
Nicolas Capens157ba262019-12-10 17:49:14 -0500319 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500320 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000321#if defined(__arm__) || defined(__aarch64__)
322 return true;
323#elif defined(__i386__) || defined(__x86_64__)
324 return false;
325#elif defined(__mips__)
326 return false;
327#else
328# error "Unknown architecture"
329#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500330 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500331
Nicolas Capens157ba262019-12-10 17:49:14 -0500332 static bool detectSSE4_1()
333 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000334#if defined(__i386__) || defined(__x86_64__)
335 int registers[4];
336 cpuid(registers, 1);
337 return (registers[2] & 0x00080000) != 0;
338#else
339 return false;
340#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500341 }
342};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500343
Nicolas Capens157ba262019-12-10 17:49:14 -0500344const bool CPUID::ARM = CPUID::detectARM();
345const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
346const bool emulateIntrinsics = false;
347const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400348
Nicolas Capens157ba262019-12-10 17:49:14 -0500349constexpr bool subzeroDumpEnabled = false;
350constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500351
352#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500353static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
354static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500355#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500356
357} // anonymous namespace
358
359namespace rr {
360
Antonio Maioranoab210f92019-12-13 16:26:24 -0500361std::string BackendName()
362{
363 return "Subzero";
364}
365
Ben Clayton713b8d32019-12-17 20:37:56 +0000366const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500367 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500368};
369
370enum EmulatedType
371{
372 EmulatedShift = 16,
373 EmulatedV2 = 2 << EmulatedShift,
374 EmulatedV4 = 4 << EmulatedShift,
375 EmulatedV8 = 8 << EmulatedShift,
376 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
377
378 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
379 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
380 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000381 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
382 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500383 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
384};
385
Ben Clayton713b8d32019-12-17 20:37:56 +0000386class Value : public Ice::Operand
387{};
388class SwitchCases : public Ice::InstSwitch
389{};
390class BasicBlock : public Ice::CfgNode
391{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500392
393Ice::Type T(Type *t)
394{
395 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
396 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500397}
398
Nicolas Capens157ba262019-12-10 17:49:14 -0500399Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400400{
Ben Clayton713b8d32019-12-17 20:37:56 +0000401 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500402}
403
404Type *T(EmulatedType t)
405{
Ben Clayton713b8d32019-12-17 20:37:56 +0000406 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500407}
408
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500409std::vector<Ice::Type> T(const std::vector<Type *> &types)
410{
411 std::vector<Ice::Type> result;
412 result.reserve(types.size());
413 for(auto &t : types)
414 {
415 result.push_back(T(t));
416 }
417 return result;
418}
419
Nicolas Capens157ba262019-12-10 17:49:14 -0500420Value *V(Ice::Operand *v)
421{
Ben Clayton713b8d32019-12-17 20:37:56 +0000422 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500423}
424
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500425Ice::Operand *V(Value *v)
426{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500427 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500428}
429
Antonio Maiorano62427e02020-02-13 09:18:05 -0500430std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
431{
432 std::vector<Ice::Operand *> result;
433 result.reserve(values.size());
434 for(auto &v : values)
435 {
436 result.push_back(V(v));
437 }
438 return result;
439}
440
Nicolas Capens157ba262019-12-10 17:49:14 -0500441BasicBlock *B(Ice::CfgNode *b)
442{
Ben Clayton713b8d32019-12-17 20:37:56 +0000443 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500444}
445
446static size_t typeSize(Type *type)
447{
448 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400449 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500450 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800451 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000452 case Type_v2i32: return 8;
453 case Type_v4i16: return 8;
454 case Type_v2i16: return 4;
455 case Type_v8i8: return 8;
456 case Type_v4i8: return 4;
457 case Type_v2f32: return 8;
458 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500459 }
460 }
461
462 return Ice::typeWidthInBytes(T(type));
463}
464
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400465static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500466{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400467 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500468 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
469 {
470 Nucleus::createRetVoid();
471 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400472
473 // Connect the entry block to the top of the initial basic block
474 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
475 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500476}
477
Ben Clayton713b8d32019-12-17 20:37:56 +0000478using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
479using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500480
481inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
482{
Ben Clayton713b8d32019-12-17 20:37:56 +0000483 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500484}
485
486inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
487{
488 return &sectionHeader(elfHeader)[index];
489}
490
491static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
492{
493 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
494
495 uint32_t index = relocation.getSymbol();
496 int table = relocationTable.sh_link;
497 void *symbolValue = nullptr;
498
499 if(index != SHN_UNDEF)
500 {
501 if(table == SHN_UNDEF) return nullptr;
502 const SectionHeader *symbolTable = elfSection(elfHeader, table);
503
504 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
505 if(index >= symtab_entries)
506 {
507 ASSERT(index < symtab_entries && "Symbol Index out of range");
508 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800509 }
510
Nicolas Capens157ba262019-12-10 17:49:14 -0500511 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000512 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500513 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800514
Nicolas Capens157ba262019-12-10 17:49:14 -0500515 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400516 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500517 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000518 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400519 }
520 else
521 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500522 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400523 }
Nicolas Capens66478362016-10-13 15:36:36 -0400524 }
525
Nicolas Capens157ba262019-12-10 17:49:14 -0500526 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000527 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500528
529 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400530 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400531 switch(relocation.getType())
532 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000533 case R_ARM_NONE:
534 // No relocation
535 break;
536 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500537 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000538 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500539 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
540 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
541 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400542 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000543 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500544 {
545 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
546 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
547 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400548 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000549 default:
550 ASSERT(false && "Unsupported relocation type");
551 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400552 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500553 }
554 else
555 {
556 switch(relocation.getType())
557 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000558 case R_386_NONE:
559 // No relocation
560 break;
561 case R_386_32:
562 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
563 break;
564 case R_386_PC32:
565 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
566 break;
567 default:
568 ASSERT(false && "Unsupported relocation type");
569 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500570 }
Nicolas Capens66478362016-10-13 15:36:36 -0400571 }
572
Nicolas Capens157ba262019-12-10 17:49:14 -0500573 return symbolValue;
574}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400575
Nicolas Capens157ba262019-12-10 17:49:14 -0500576static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
577{
578 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
579
580 uint32_t index = relocation.getSymbol();
581 int table = relocationTable.sh_link;
582 void *symbolValue = nullptr;
583
584 if(index != SHN_UNDEF)
585 {
586 if(table == SHN_UNDEF) return nullptr;
587 const SectionHeader *symbolTable = elfSection(elfHeader, table);
588
589 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
590 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400591 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500592 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400593 return nullptr;
594 }
595
Nicolas Capens157ba262019-12-10 17:49:14 -0500596 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000597 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500598 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400599
Nicolas Capens157ba262019-12-10 17:49:14 -0500600 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400601 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500602 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000603 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500604 }
605 else
606 {
607 return nullptr;
608 }
609 }
Nicolas Capens66478362016-10-13 15:36:36 -0400610
Nicolas Capens157ba262019-12-10 17:49:14 -0500611 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000612 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
613 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400614
Nicolas Capens157ba262019-12-10 17:49:14 -0500615 switch(relocation.getType())
616 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000617 case R_X86_64_NONE:
618 // No relocation
619 break;
620 case R_X86_64_64:
621 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
622 break;
623 case R_X86_64_PC32:
624 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
625 break;
626 case R_X86_64_32S:
627 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
628 break;
629 default:
630 ASSERT(false && "Unsupported relocation type");
631 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500632 }
633
634 return symbolValue;
635}
636
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400637struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500638{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400639 const void *entry;
640 size_t codeSize = 0;
641};
642
643std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
644{
645 ASSERT(functionNames.size() > 0);
646 std::vector<EntryPoint> entryPoints(functionNames.size());
647
Ben Clayton713b8d32019-12-17 20:37:56 +0000648 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500649
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400650 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500651 if(!elfHeader->checkMagic())
652 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400653 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500654 }
655
656 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000657 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
658#if defined(__i386__)
659 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
660#elif defined(__x86_64__)
661 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
662#elif defined(__arm__)
663 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
664#elif defined(__aarch64__)
665 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
666#elif defined(__mips__)
667 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
668#else
669# error "Unsupported platform"
670#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500671
Ben Clayton713b8d32019-12-17 20:37:56 +0000672 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500673
674 for(int i = 0; i < elfHeader->e_shnum; i++)
675 {
676 if(sectionHeader[i].sh_type == SHT_PROGBITS)
677 {
678 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
679 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400680 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500681 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400682 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500683
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400684 for(size_t j = 0; j < functionNames.size(); ++j)
685 {
686 if(strstr(sectionName, functionNames[j]) != nullptr)
687 {
688 return j;
689 }
690 }
691
692 UNREACHABLE("Failed to find executable section that matches input function names");
693 return static_cast<size_t>(-1);
694 };
695
696 size_t index = findSectionNameEntryIndex();
697 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
698 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400699 }
700 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500701 else if(sectionHeader[i].sh_type == SHT_REL)
702 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000703 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400704
Nicolas Capens157ba262019-12-10 17:49:14 -0500705 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
706 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000707 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500708 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
709 }
710 }
711 else if(sectionHeader[i].sh_type == SHT_RELA)
712 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000713 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500714
715 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
716 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000717 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500718 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
719 }
720 }
721 }
722
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400723 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500724}
725
726template<typename T>
727struct ExecutableAllocator
728{
729 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000730 template<class U>
731 ExecutableAllocator(const ExecutableAllocator<U> &other)
732 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500733
734 using value_type = T;
735 using size_type = std::size_t;
736
737 T *allocate(size_type n)
738 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000739 return (T *)allocateMemoryPages(
740 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500741 }
742
743 void deallocate(T *p, size_type n)
744 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800745 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500746 }
747};
748
749class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
750{
751 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
752 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
753
754public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000755 ELFMemoryStreamer()
756 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500757 {
758 position = 0;
759 buffer.reserve(0x1000);
760 }
761
762 ~ELFMemoryStreamer() override
763 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500764 }
765
766 void write8(uint8_t Value) override
767 {
768 if(position == (uint64_t)buffer.size())
769 {
770 buffer.push_back(Value);
771 position++;
772 }
773 else if(position < (uint64_t)buffer.size())
774 {
775 buffer[position] = Value;
776 position++;
777 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000778 else
779 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500780 }
781
782 void writeBytes(llvm::StringRef Bytes) override
783 {
784 std::size_t oldSize = buffer.size();
785 buffer.resize(oldSize + Bytes.size());
786 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
787 position += Bytes.size();
788 }
789
790 uint64_t tell() const override { return position; }
791
792 void seek(uint64_t Off) override { position = Off; }
793
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400794 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500795 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400796 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500797
798#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500799 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
800#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400801 for(auto &entryPoint : entryPoints)
802 {
803 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
804 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500805#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500806
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400807 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400808 }
809
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500810 void finalize()
811 {
812 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
813
814 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
815 }
816
Ben Clayton713b8d32019-12-17 20:37:56 +0000817 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400818 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500819 ASSERT(func);
820 funcs[index] = func;
821 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400822
Nicolas Capens157ba262019-12-10 17:49:14 -0500823 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400824 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500825 ASSERT(funcs[index]);
826 return funcs[index];
827 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400828
Antonio Maiorano02a39532020-01-21 15:15:34 -0500829 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500830 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500831 // TODO(b/148086935): Replace with a buffer allocator.
832 size_t space = size + alignment;
833 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
834 void *ptr = buf.get();
835 void *alignedPtr = std::align(alignment, size, ptr, space);
836 ASSERT(alignedPtr);
837 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500838 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500839 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500840 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400841
Nicolas Capens157ba262019-12-10 17:49:14 -0500842private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000843 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500844 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
845 std::size_t position;
846 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500847};
848
Antonio Maiorano62427e02020-02-13 09:18:05 -0500849#ifdef ENABLE_RR_PRINT
850void VPrintf(const std::vector<Value *> &vals)
851{
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500852 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(::printf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500853}
854#endif // ENABLE_RR_PRINT
855
Nicolas Capens157ba262019-12-10 17:49:14 -0500856Nucleus::Nucleus()
857{
Ben Clayton713b8d32019-12-17 20:37:56 +0000858 ::codegenMutex.lock(); // Reactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500859
860 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
861 Ice::ClFlags::getParsedClFlags(Flags);
862
Ben Clayton713b8d32019-12-17 20:37:56 +0000863#if defined(__arm__)
864 Flags.setTargetArch(Ice::Target_ARM32);
865 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
866#elif defined(__mips__)
867 Flags.setTargetArch(Ice::Target_MIPS32);
868 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
869#else // x86
870 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
871 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
872#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500873 Flags.setOutFileType(Ice::FT_Elf);
874 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
875 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
876 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
877 Flags.setDisableHybridAssembly(true);
878
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500879 // Emit functions into separate sections in the ELF so we can find them by name
880 Flags.setFunctionSections(true);
881
Nicolas Capens157ba262019-12-10 17:49:14 -0500882 static llvm::raw_os_ostream cout(std::cout);
883 static llvm::raw_os_ostream cerr(std::cerr);
884
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500885 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500886 {
887 // Decorate text asm with liveness info
888 Flags.setDecorateAsm(true);
889 }
890
Ben Clayton713b8d32019-12-17 20:37:56 +0000891 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500892 {
893 std::error_code errorCode;
894 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
895 ::elfFile = new Ice::ELFFileStreamer(*out);
896 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
897 }
898 else
899 {
900 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
901 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
902 ::routine = elfMemory;
903 }
904}
905
906Nucleus::~Nucleus()
907{
908 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500909 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500910
911 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500912 ::allocator = nullptr;
913
Nicolas Capens157ba262019-12-10 17:49:14 -0500914 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500915 ::function = nullptr;
916
Nicolas Capens157ba262019-12-10 17:49:14 -0500917 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500918 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500919
920 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500921 ::elfFile = nullptr;
922
Nicolas Capens157ba262019-12-10 17:49:14 -0500923 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500924 ::out = nullptr;
925
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400926 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500927 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400928 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500929
930 ::codegenMutex.unlock();
931}
932
933void Nucleus::setDefaultConfig(const Config &cfg)
934{
935 std::unique_lock<std::mutex> lock(::defaultConfigLock);
936 ::defaultConfig() = cfg;
937}
938
939void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
940{
941 std::unique_lock<std::mutex> lock(::defaultConfigLock);
942 auto &config = ::defaultConfig();
943 config = cfgEdit.apply(config);
944}
945
946Config Nucleus::getDefaultConfig()
947{
948 std::unique_lock<std::mutex> lock(::defaultConfigLock);
949 return ::defaultConfig();
950}
951
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500952// This function lowers and produces executable binary code in memory for the input functions,
953// and returns a Routine with the entry points to these functions.
954template<size_t Count>
955static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500956{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500957 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
958 // and GlobalContext::emitItems.
959
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500960 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500961 {
962 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500963 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500964 }
965
966 ::context->emitFileHeader();
967
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500968 // Translate
969
970 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500971 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500972 Ice::Cfg *currFunc = functions[i];
973
974 // Install function allocator in TLS for Cfg-specific container allocators
975 Ice::CfgLocalAllocatorScope allocScope(currFunc);
976
977 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
978
979 rr::optimize(currFunc);
980
981 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500982 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500983
984 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500985 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500986
987 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
988
989 if(subzeroEmitTextAsm)
990 {
991 currFunc->emit();
992 }
993
994 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -0500995 }
996
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500997 // Emit items
998
999 ::context->lowerGlobals("");
1000
Nicolas Capens157ba262019-12-10 17:49:14 -05001001 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001002
1003 for(size_t i = 0; i < Count; ++i)
1004 {
1005 Ice::Cfg *currFunc = functions[i];
1006
1007 // Accumulate globals from functions to emit into the "last" section at the end
1008 auto globals = currFunc->getGlobalInits();
1009 if(globals && !globals->empty())
1010 {
1011 ::context->getGlobals()->merge(globals.get());
1012 }
1013
1014 auto assembler = currFunc->releaseAssembler();
1015 assembler->alignFunction();
1016 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1017 }
1018
Nicolas Capens157ba262019-12-10 17:49:14 -05001019 ::context->lowerGlobals("last");
1020 ::context->lowerConstants();
1021 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001022
Nicolas Capens157ba262019-12-10 17:49:14 -05001023 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001024 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001025 objectWriter->writeNonUserSections();
1026
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001027 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001028 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1029 ASSERT(entryPoints.size() == Count);
1030 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001031 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001032 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001033 }
1034
1035 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001036
1037 Routine *handoffRoutine = ::routine;
1038 ::routine = nullptr;
1039
1040 return std::shared_ptr<Routine>(handoffRoutine);
1041}
1042
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001043std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1044{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001045 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001046 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1047}
1048
Nicolas Capens157ba262019-12-10 17:49:14 -05001049Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1050{
1051 Ice::Type type = T(t);
1052 int typeSize = Ice::typeWidthInBytes(type);
1053 int totalSize = typeSize * (arraySize ? arraySize : 1);
1054
1055 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1056 auto address = ::function->makeVariable(T(getPointerType(t)));
1057 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1058 ::function->getEntryNode()->getInsts().push_front(alloca);
1059
1060 return V(address);
1061}
1062
1063BasicBlock *Nucleus::createBasicBlock()
1064{
1065 return B(::function->makeNode());
1066}
1067
1068BasicBlock *Nucleus::getInsertBlock()
1069{
1070 return B(::basicBlock);
1071}
1072
1073void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1074{
Ben Clayton713b8d32019-12-17 20:37:56 +00001075 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001076
1077 Variable::materializeAll();
1078
1079 ::basicBlock = basicBlock;
1080}
1081
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001082void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001083{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001084 ASSERT(::function == nullptr);
1085 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001086 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001087 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001088 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001089
1090 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1091
1092 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1093 // becomes invalid if another one is created; for example, when creating await and destroy functions
1094 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1095 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001096 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1097
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001098 ::entryBlock = ::function->getEntryNode();
1099 ::basicBlock = ::function->makeNode();
1100 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001101}
1102
1103Value *Nucleus::getArgument(unsigned int index)
1104{
1105 return V(::function->getArgs()[index]);
1106}
1107
1108void Nucleus::createRetVoid()
1109{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001110 RR_DEBUG_INFO_UPDATE_LOC();
1111
Nicolas Capens157ba262019-12-10 17:49:14 -05001112 // Code generated after this point is unreachable, so any variables
1113 // being read can safely return an undefined value. We have to avoid
1114 // materializing variables after the terminator ret instruction.
1115 Variable::killUnmaterialized();
1116
1117 Ice::InstRet *ret = Ice::InstRet::create(::function);
1118 ::basicBlock->appendInst(ret);
1119}
1120
1121void Nucleus::createRet(Value *v)
1122{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001123 RR_DEBUG_INFO_UPDATE_LOC();
1124
Nicolas Capens157ba262019-12-10 17:49:14 -05001125 // Code generated after this point is unreachable, so any variables
1126 // being read can safely return an undefined value. We have to avoid
1127 // materializing variables after the terminator ret instruction.
1128 Variable::killUnmaterialized();
1129
1130 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1131 ::basicBlock->appendInst(ret);
1132}
1133
1134void Nucleus::createBr(BasicBlock *dest)
1135{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001136 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001137 Variable::materializeAll();
1138
1139 auto br = Ice::InstBr::create(::function, dest);
1140 ::basicBlock->appendInst(br);
1141}
1142
1143void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1144{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001145 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001146 Variable::materializeAll();
1147
1148 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1149 ::basicBlock->appendInst(br);
1150}
1151
1152static bool isCommutative(Ice::InstArithmetic::OpKind op)
1153{
1154 switch(op)
1155 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001156 case Ice::InstArithmetic::Add:
1157 case Ice::InstArithmetic::Fadd:
1158 case Ice::InstArithmetic::Mul:
1159 case Ice::InstArithmetic::Fmul:
1160 case Ice::InstArithmetic::And:
1161 case Ice::InstArithmetic::Or:
1162 case Ice::InstArithmetic::Xor:
1163 return true;
1164 default:
1165 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001166 }
1167}
1168
1169static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1170{
1171 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1172
1173 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1174
1175 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1176 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1177 ::basicBlock->appendInst(arithmetic);
1178
1179 return V(result);
1180}
1181
1182Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1183{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001184 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001185 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1186}
1187
1188Value *Nucleus::createSub(Value *lhs, Value *rhs)
1189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001190 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001191 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1192}
1193
1194Value *Nucleus::createMul(Value *lhs, Value *rhs)
1195{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001196 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001197 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1198}
1199
1200Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1201{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001202 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001203 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1204}
1205
1206Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1207{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001208 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001209 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1210}
1211
1212Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1213{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001214 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001215 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1216}
1217
1218Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1219{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001220 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001221 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1222}
1223
1224Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1225{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001226 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001227 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1228}
1229
1230Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1231{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001232 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001233 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1234}
1235
1236Value *Nucleus::createURem(Value *lhs, Value *rhs)
1237{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001238 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001239 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1240}
1241
1242Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1243{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001244 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001245 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1246}
1247
1248Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1249{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001250 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001251 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1252 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001253 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001254 return nullptr;
1255}
1256
1257RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1258{
1259 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001260}
1261
1262Value *Nucleus::createShl(Value *lhs, Value *rhs)
1263{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001264 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001265 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1266}
1267
1268Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001271 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1272}
1273
1274Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001277 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1278}
1279
1280Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001282 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001283 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1284}
1285
1286Value *Nucleus::createOr(Value *lhs, Value *rhs)
1287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001289 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1290}
1291
1292Value *Nucleus::createXor(Value *lhs, Value *rhs)
1293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001295 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1296}
1297
1298Value *Nucleus::createNeg(Value *v)
1299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001301 return createSub(createNullValue(T(v->getType())), v);
1302}
1303
1304Value *Nucleus::createFNeg(Value *v)
1305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001306 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001307 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1308 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001309
1310 return createFSub(negativeZero, v);
1311}
1312
1313Value *Nucleus::createNot(Value *v)
1314{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001315 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001316 if(Ice::isScalarIntegerType(v->getType()))
1317 {
1318 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1319 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001320 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001321 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001322 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001323 return createXor(v, createConstantVector(c, T(v->getType())));
1324 }
1325}
1326
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001327static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1328{
1329#if defined(__i386__) || defined(__x86_64__)
1330 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1331 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1332 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1333#else
1334 if(atomic)
1335 {
1336 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1337 }
1338 if(memoryOrder != std::memory_order_relaxed)
1339 {
1340 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1341 }
1342#endif
1343
1344 // Vulkan doesn't allow sequential memory order
1345 ASSERT(memoryOrder != std::memory_order_seq_cst);
1346}
1347
Nicolas Capens157ba262019-12-10 17:49:14 -05001348Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1349{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001350 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001351 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001352
1353 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001354 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001355
Ben Clayton713b8d32019-12-17 20:37:56 +00001356 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001357 {
1358 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001359 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001360 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001361 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001362 auto pointer = RValue<Pointer<Byte>>(ptr);
1363 Int x = *Pointer<Int>(pointer);
1364
1365 Int4 vector;
1366 vector = Insert(vector, x, 0);
1367
Antonio Maiorano02a39532020-01-21 15:15:34 -05001368 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001369 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1370 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001371 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001372 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001373 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001374 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001375 auto pointer = RValue<Pointer<Byte>>(ptr);
1376 Int x = *Pointer<Int>(pointer);
1377 Int y = *Pointer<Int>(pointer + 4);
1378
1379 Int4 vector;
1380 vector = Insert(vector, x, 0);
1381 vector = Insert(vector, y, 1);
1382
Antonio Maiorano02a39532020-01-21 15:15:34 -05001383 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001384 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1385 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001386 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001387 else
1388 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001389 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001390 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001391 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001392 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001393 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001394 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001395 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1396 load->addArg(ptr);
1397 load->addArg(::context->getConstantInt32(typeSize(type)));
1398 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001399 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001400 }
1401 else
1402 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001403 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001404 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001405
Antonio Maiorano02a39532020-01-21 15:15:34 -05001406 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001407 return V(result);
1408}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001409
Nicolas Capens157ba262019-12-10 17:49:14 -05001410Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1411{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001412 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001413 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001414
Ben Clayton713b8d32019-12-17 20:37:56 +00001415#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001416 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001417 if(align != 0)
1418 {
1419 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1420 call->addArg(ptr);
1421 call->addArg(::context->getConstantInt64(typeSize(type)));
1422 ::basicBlock->appendInst(call);
1423 }
1424#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001425
Nicolas Capens157ba262019-12-10 17:49:14 -05001426 int valueType = (int)reinterpret_cast<intptr_t>(type);
1427
Ben Clayton713b8d32019-12-17 20:37:56 +00001428 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001429 {
1430 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001431 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001432 if(typeSize(type) == 4)
1433 {
1434 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1435 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1436 ::basicBlock->appendInst(bitcast);
1437
1438 RValue<Int4> v(V(vector));
1439
1440 auto pointer = RValue<Pointer<Byte>>(ptr);
1441 Int x = Extract(v, 0);
1442 *Pointer<Int>(pointer) = x;
1443 }
1444 else if(typeSize(type) == 8)
1445 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001446 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001447 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1448 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1449 ::basicBlock->appendInst(bitcast);
1450
1451 RValue<Int4> v(V(vector));
1452
1453 auto pointer = RValue<Pointer<Byte>>(ptr);
1454 Int x = Extract(v, 0);
1455 *Pointer<Int>(pointer) = x;
1456 Int y = Extract(v, 1);
1457 *Pointer<Int>(pointer + 4) = y;
1458 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001459 else
1460 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001461 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001462 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001463 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001464 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001465 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1466 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1467 store->addArg(value);
1468 store->addArg(ptr);
1469 store->addArg(::context->getConstantInt32(typeSize(type)));
1470 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001471 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001472 }
1473 else
1474 {
1475 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001476
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001477 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001478 ::basicBlock->appendInst(store);
1479 }
1480
1481 return value;
1482}
1483
1484Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1485{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001486 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001487 ASSERT(index->getType() == Ice::IceType_i32);
1488
1489 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1490 {
1491 int32_t offset = constant->getValue() * (int)typeSize(type);
1492
1493 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001494 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001495 return ptr;
1496 }
1497
Nicolas Capens157ba262019-12-10 17:49:14 -05001498 return createAdd(ptr, createConstantInt(offset));
1499 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001500
Nicolas Capens157ba262019-12-10 17:49:14 -05001501 if(!Ice::isByteSizedType(T(type)))
1502 {
1503 index = createMul(index, createConstantInt((int)typeSize(type)));
1504 }
1505
Ben Clayton713b8d32019-12-17 20:37:56 +00001506 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001507 {
1508 if(unsignedIndex)
1509 {
1510 index = createZExt(index, T(Ice::IceType_i64));
1511 }
1512 else
1513 {
1514 index = createSExt(index, T(Ice::IceType_i64));
1515 }
1516 }
1517
1518 return createAdd(ptr, index);
1519}
1520
Antonio Maiorano370cba52019-12-31 11:36:07 -05001521static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1522{
1523 Ice::Variable *result = ::function->makeVariable(value->getType());
1524
1525 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1526 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1527 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1528 auto op = ::context->getConstantInt32(rmwOp);
1529 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1530 inst->addArg(op);
1531 inst->addArg(ptr);
1532 inst->addArg(value);
1533 inst->addArg(order);
1534 ::basicBlock->appendInst(inst);
1535
1536 return V(result);
1537}
1538
Nicolas Capens157ba262019-12-10 17:49:14 -05001539Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1540{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001541 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001542 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001543}
1544
1545Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1546{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001547 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001548 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001549}
1550
1551Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1552{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001553 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001554 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001555}
1556
1557Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1558{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001559 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001560 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001561}
1562
1563Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1564{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001565 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001566 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001567}
1568
1569Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1570{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001571 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001572 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001573}
1574
1575Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1576{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001577 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001578 Ice::Variable *result = ::function->makeVariable(value->getType());
1579
1580 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1581 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1582 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1583 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1584 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1585 inst->addArg(ptr);
1586 inst->addArg(compare);
1587 inst->addArg(value);
1588 inst->addArg(orderEq);
1589 inst->addArg(orderNeq);
1590 ::basicBlock->appendInst(inst);
1591
1592 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001593}
1594
1595static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1596{
1597 if(v->getType() == T(destType))
1598 {
1599 return v;
1600 }
1601
1602 Ice::Variable *result = ::function->makeVariable(T(destType));
1603 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1604 ::basicBlock->appendInst(cast);
1605
1606 return V(result);
1607}
1608
1609Value *Nucleus::createTrunc(Value *v, Type *destType)
1610{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001611 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001612 return createCast(Ice::InstCast::Trunc, v, destType);
1613}
1614
1615Value *Nucleus::createZExt(Value *v, Type *destType)
1616{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001617 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001618 return createCast(Ice::InstCast::Zext, v, destType);
1619}
1620
1621Value *Nucleus::createSExt(Value *v, Type *destType)
1622{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001623 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001624 return createCast(Ice::InstCast::Sext, v, destType);
1625}
1626
1627Value *Nucleus::createFPToUI(Value *v, Type *destType)
1628{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001629 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001630 return createCast(Ice::InstCast::Fptoui, v, destType);
1631}
1632
1633Value *Nucleus::createFPToSI(Value *v, Type *destType)
1634{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001635 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001636 return createCast(Ice::InstCast::Fptosi, v, destType);
1637}
1638
1639Value *Nucleus::createSIToFP(Value *v, Type *destType)
1640{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001641 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001642 return createCast(Ice::InstCast::Sitofp, v, destType);
1643}
1644
1645Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1646{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001647 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001648 return createCast(Ice::InstCast::Fptrunc, v, destType);
1649}
1650
1651Value *Nucleus::createFPExt(Value *v, Type *destType)
1652{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001653 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001654 return createCast(Ice::InstCast::Fpext, v, destType);
1655}
1656
1657Value *Nucleus::createBitCast(Value *v, Type *destType)
1658{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001659 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001660 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1661 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1662 // emulate them by writing to the stack and reading back as the destination type.
1663 if(emulateMismatchedBitCast)
1664 {
1665 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1666 {
1667 Value *address = allocateStackVariable(destType);
1668 createStore(v, address, T(v->getType()));
1669 return createLoad(address, destType);
1670 }
1671 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1672 {
1673 Value *address = allocateStackVariable(T(v->getType()));
1674 createStore(v, address, T(v->getType()));
1675 return createLoad(address, destType);
1676 }
1677 }
1678
1679 return createCast(Ice::InstCast::Bitcast, v, destType);
1680}
1681
1682static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1683{
1684 ASSERT(lhs->getType() == rhs->getType());
1685
1686 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1687 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1688 ::basicBlock->appendInst(cmp);
1689
1690 return V(result);
1691}
1692
1693Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1694{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001695 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001696 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1697}
1698
1699Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1700{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001701 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001702 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1703}
1704
1705Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1706{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001707 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001708 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1709}
1710
1711Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1712{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001713 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001714 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1715}
1716
1717Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1718{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001719 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001720 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1721}
1722
1723Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1724{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001725 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001726 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1727}
1728
1729Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1730{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001731 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001732 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1733}
1734
1735Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1736{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001737 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001738 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1739}
1740
1741Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1742{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001743 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001744 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1745}
1746
1747Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1748{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001749 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001750 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1751}
1752
1753Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1754{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001755 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001756 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1757}
1758
1759static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1760{
1761 ASSERT(lhs->getType() == rhs->getType());
1762 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1763
1764 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1765 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1766 ::basicBlock->appendInst(cmp);
1767
1768 return V(result);
1769}
1770
1771Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1772{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001773 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001774 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1775}
1776
1777Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001780 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1781}
1782
1783Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1784{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001785 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001786 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1787}
1788
1789Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001792 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1793}
1794
1795Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001798 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1799}
1800
1801Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1802{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001803 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001804 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1805}
1806
1807Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1808{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001809 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001810 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1811}
1812
1813Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1814{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001815 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001816 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1817}
1818
1819Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1820{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001821 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001822 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1823}
1824
1825Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1826{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001827 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001828 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1829}
1830
1831Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1832{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001833 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001834 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1835}
1836
1837Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001840 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1841}
1842
1843Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001846 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1847}
1848
1849Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001852 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1853}
1854
1855Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001858 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001859 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001860 ::basicBlock->appendInst(extract);
1861
1862 return V(result);
1863}
1864
1865Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1866{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001867 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001868 auto result = ::function->makeVariable(vector->getType());
1869 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1870 ::basicBlock->appendInst(insert);
1871
1872 return V(result);
1873}
1874
1875Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1876{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001877 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001878 ASSERT(V1->getType() == V2->getType());
1879
1880 int size = Ice::typeNumElements(V1->getType());
1881 auto result = ::function->makeVariable(V1->getType());
1882 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1883
1884 for(int i = 0; i < size; i++)
1885 {
1886 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1887 }
1888
1889 ::basicBlock->appendInst(shuffle);
1890
1891 return V(result);
1892}
1893
1894Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1895{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001896 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001897 ASSERT(ifTrue->getType() == ifFalse->getType());
1898
1899 auto result = ::function->makeVariable(ifTrue->getType());
1900 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1901 ::basicBlock->appendInst(select);
1902
1903 return V(result);
1904}
1905
1906SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1907{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001908 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001909 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1910 ::basicBlock->appendInst(switchInst);
1911
Ben Clayton713b8d32019-12-17 20:37:56 +00001912 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001913}
1914
1915void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1916{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001917 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001918 switchCases->addBranch(label, label, branch);
1919}
1920
1921void Nucleus::createUnreachable()
1922{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001923 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001924 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1925 ::basicBlock->appendInst(unreachable);
1926}
1927
Antonio Maiorano62427e02020-02-13 09:18:05 -05001928Type *Nucleus::getType(Value *value)
1929{
1930 return T(V(value)->getType());
1931}
1932
1933Type *Nucleus::getContainedType(Type *vectorType)
1934{
1935 Ice::Type vecTy = T(vectorType);
1936 switch(vecTy)
1937 {
1938 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1939 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1940 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1941 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1942 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1943 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1944 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1945 default:
1946 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1947 return {};
1948 }
1949}
1950
Nicolas Capens157ba262019-12-10 17:49:14 -05001951Type *Nucleus::getPointerType(Type *ElementType)
1952{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001953 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001954}
1955
Antonio Maiorano62427e02020-02-13 09:18:05 -05001956static constexpr Ice::Type getNaturalIntType()
1957{
1958 constexpr size_t intSize = sizeof(int);
1959 static_assert(intSize == 4 || intSize == 8, "");
1960 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1961}
1962
1963Type *Nucleus::getPrintfStorageType(Type *valueType)
1964{
1965 Ice::Type valueTy = T(valueType);
1966 switch(valueTy)
1967 {
1968 case Ice::IceType_i32:
1969 return T(getNaturalIntType());
1970
1971 case Ice::IceType_f32:
1972 return T(Ice::IceType_f64);
1973
1974 default:
1975 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1976 return {};
1977 }
1978}
1979
Nicolas Capens157ba262019-12-10 17:49:14 -05001980Value *Nucleus::createNullValue(Type *Ty)
1981{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001982 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001983 if(Ice::isVectorType(T(Ty)))
1984 {
1985 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001986 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001987 return createConstantVector(c, Ty);
1988 }
1989 else
1990 {
1991 return V(::context->getConstantZero(T(Ty)));
1992 }
1993}
1994
1995Value *Nucleus::createConstantLong(int64_t i)
1996{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001997 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001998 return V(::context->getConstantInt64(i));
1999}
2000
2001Value *Nucleus::createConstantInt(int i)
2002{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002003 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002004 return V(::context->getConstantInt32(i));
2005}
2006
2007Value *Nucleus::createConstantInt(unsigned int i)
2008{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002009 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002010 return V(::context->getConstantInt32(i));
2011}
2012
2013Value *Nucleus::createConstantBool(bool b)
2014{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002015 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002016 return V(::context->getConstantInt1(b));
2017}
2018
2019Value *Nucleus::createConstantByte(signed char i)
2020{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002021 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002022 return V(::context->getConstantInt8(i));
2023}
2024
2025Value *Nucleus::createConstantByte(unsigned char i)
2026{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002027 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002028 return V(::context->getConstantInt8(i));
2029}
2030
2031Value *Nucleus::createConstantShort(short i)
2032{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002033 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002034 return V(::context->getConstantInt16(i));
2035}
2036
2037Value *Nucleus::createConstantShort(unsigned short i)
2038{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002039 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002040 return V(::context->getConstantInt16(i));
2041}
2042
2043Value *Nucleus::createConstantFloat(float x)
2044{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002045 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002046 return V(::context->getConstantFloat(x));
2047}
2048
2049Value *Nucleus::createNullPointer(Type *Ty)
2050{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002051 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002052 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002053}
2054
Antonio Maiorano02a39532020-01-21 15:15:34 -05002055static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2056{
2057 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2058}
2059
Nicolas Capens157ba262019-12-10 17:49:14 -05002060Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2061{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002062 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002063 const int vectorSize = 16;
2064 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2065 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002066
2067 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002068 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002069
Antonio Maioranoa0957112020-03-04 15:06:19 -05002070 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002071 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002072
2073 switch((int)reinterpret_cast<intptr_t>(type))
2074 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002075 case Ice::IceType_v4i32:
2076 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002077 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002078 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002079 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002080 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002081 }
2082 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002083 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002084 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002085 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002086 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002087 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002088 }
2089 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002090 case Ice::IceType_v8i16:
2091 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002092 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002093 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002094 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002095 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002096 }
2097 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002098 case Ice::IceType_v16i8:
2099 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002100 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002101 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002102 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002103 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002104 }
2105 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002106 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002107 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002108 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002109 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002110 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002111 }
2112 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002113 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002114 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002115 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002116 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002117 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002118 }
2119 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002120 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002121 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002122 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002123 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002124 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002125 }
2126 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002127 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002128 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002129 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002130 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002131 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002132 }
2133 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002134 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002135 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002136 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002137 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002138 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002139 }
2140 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002141 default:
2142 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002143 }
2144
Antonio Maiorano02a39532020-01-21 15:15:34 -05002145 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002146
Antonio Maiorano02a39532020-01-21 15:15:34 -05002147 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002148 return V(result);
2149}
2150
2151Value *Nucleus::createConstantVector(const double *constants, Type *type)
2152{
Ben Clayton713b8d32019-12-17 20:37:56 +00002153 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002154}
2155
Antonio Maiorano62427e02020-02-13 09:18:05 -05002156Value *Nucleus::createConstantString(const char *v)
2157{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002158 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002159 return V(IceConstantData(v, strlen(v) + 1));
2160}
2161
Nicolas Capens157ba262019-12-10 17:49:14 -05002162Type *Void::getType()
2163{
2164 return T(Ice::IceType_void);
2165}
2166
2167Type *Bool::getType()
2168{
2169 return T(Ice::IceType_i1);
2170}
2171
2172Type *Byte::getType()
2173{
2174 return T(Ice::IceType_i8);
2175}
2176
2177Type *SByte::getType()
2178{
2179 return T(Ice::IceType_i8);
2180}
2181
2182Type *Short::getType()
2183{
2184 return T(Ice::IceType_i16);
2185}
2186
2187Type *UShort::getType()
2188{
2189 return T(Ice::IceType_i16);
2190}
2191
2192Type *Byte4::getType()
2193{
2194 return T(Type_v4i8);
2195}
2196
2197Type *SByte4::getType()
2198{
2199 return T(Type_v4i8);
2200}
2201
Ben Clayton713b8d32019-12-17 20:37:56 +00002202namespace {
2203RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002204{
Ben Clayton713b8d32019-12-17 20:37:56 +00002205 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002206}
2207
Ben Clayton713b8d32019-12-17 20:37:56 +00002208RValue<Byte> Extract(RValue<Byte8> val, int i)
2209{
2210 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
2211}
2212
2213RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2214{
2215 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
2216}
2217} // namespace
2218
Nicolas Capens157ba262019-12-10 17:49:14 -05002219RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2220{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002221 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002222 if(emulateIntrinsics)
2223 {
2224 Byte8 result;
2225 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2226 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2227 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2228 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2229 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2230 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2231 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2232 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2233
2234 return result;
2235 }
2236 else
2237 {
2238 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002239 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002240 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2241 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2242 paddusb->addArg(x.value);
2243 paddusb->addArg(y.value);
2244 ::basicBlock->appendInst(paddusb);
2245
2246 return RValue<Byte8>(V(result));
2247 }
2248}
2249
2250RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2251{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002252 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002253 if(emulateIntrinsics)
2254 {
2255 Byte8 result;
2256 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2257 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2258 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2259 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2260 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2261 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2262 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2263 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2264
2265 return result;
2266 }
2267 else
2268 {
2269 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002270 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002271 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2272 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2273 psubusw->addArg(x.value);
2274 psubusw->addArg(y.value);
2275 ::basicBlock->appendInst(psubusw);
2276
2277 return RValue<Byte8>(V(result));
2278 }
2279}
2280
2281RValue<SByte> Extract(RValue<SByte8> val, int i)
2282{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002283 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002284 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
2285}
2286
2287RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2288{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002289 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002290 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2291}
2292
2293RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2294{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002295 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002296 if(emulateIntrinsics)
2297 {
2298 SByte8 result;
2299 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2300 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2301 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2302 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2303 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2304 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2305 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2306 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2307
2308 return result;
2309 }
2310 else
2311 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002312#if defined(__i386__) || defined(__x86_64__)
2313 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2314 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2315 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002316
Ben Clayton713b8d32019-12-17 20:37:56 +00002317 return As<SByte8>(hi | lo);
2318#else
2319 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2320#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002321 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002322}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002323
Nicolas Capens157ba262019-12-10 17:49:14 -05002324RValue<Int> SignMask(RValue<Byte8> x)
2325{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002326 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002327 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002328 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002329 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2330 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002331 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002332 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002333 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002334 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002335 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002336 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2337 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2338 movmsk->addArg(x.value);
2339 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002340
Nicolas Capens157ba262019-12-10 17:49:14 -05002341 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002342 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002343}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002344
2345// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2346// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05002347// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002348// }
2349
Nicolas Capens157ba262019-12-10 17:49:14 -05002350RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2351{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002352 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002353 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2354}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002355
Nicolas Capens157ba262019-12-10 17:49:14 -05002356Type *Byte8::getType()
2357{
2358 return T(Type_v8i8);
2359}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002360
Nicolas Capens598f8d82016-09-26 15:09:10 -04002361// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2362// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002363// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002364// }
2365
2366// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2367// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002368// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002369// }
2370
Nicolas Capens157ba262019-12-10 17:49:14 -05002371RValue<SByte> SaturateSigned(RValue<Short> x)
2372{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002373 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002374 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2375}
2376
2377RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2378{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002379 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002380 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002381 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002382 SByte8 result;
2383 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2384 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2385 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2386 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2387 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2388 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2389 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2390 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002391
Nicolas Capens157ba262019-12-10 17:49:14 -05002392 return result;
2393 }
2394 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002395 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002396 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002397 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002398 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2399 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2400 paddsb->addArg(x.value);
2401 paddsb->addArg(y.value);
2402 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002403
Nicolas Capens157ba262019-12-10 17:49:14 -05002404 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002405 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002406}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002407
Nicolas Capens157ba262019-12-10 17:49:14 -05002408RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2409{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002410 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002411 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002412 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002413 SByte8 result;
2414 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2415 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2416 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2417 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2418 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2419 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2420 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2421 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002422
Nicolas Capens157ba262019-12-10 17:49:14 -05002423 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002424 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002425 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002426 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002427 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002428 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002429 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2430 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2431 psubsb->addArg(x.value);
2432 psubsb->addArg(y.value);
2433 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002434
Nicolas Capens157ba262019-12-10 17:49:14 -05002435 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002436 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002437}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002438
Nicolas Capens157ba262019-12-10 17:49:14 -05002439RValue<Int> SignMask(RValue<SByte8> x)
2440{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002441 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002442 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002443 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002444 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2445 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002446 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002447 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002448 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002449 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002450 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002451 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2452 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2453 movmsk->addArg(x.value);
2454 ::basicBlock->appendInst(movmsk);
2455
2456 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002457 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002458}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002459
Nicolas Capens157ba262019-12-10 17:49:14 -05002460RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2461{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002462 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002463 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2464}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002465
Nicolas Capens157ba262019-12-10 17:49:14 -05002466RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2467{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002468 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002469 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2470}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002471
Nicolas Capens157ba262019-12-10 17:49:14 -05002472Type *SByte8::getType()
2473{
2474 return T(Type_v8i8);
2475}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002476
Nicolas Capens157ba262019-12-10 17:49:14 -05002477Type *Byte16::getType()
2478{
2479 return T(Ice::IceType_v16i8);
2480}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002481
Nicolas Capens157ba262019-12-10 17:49:14 -05002482Type *SByte16::getType()
2483{
2484 return T(Ice::IceType_v16i8);
2485}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002486
Nicolas Capens157ba262019-12-10 17:49:14 -05002487Type *Short2::getType()
2488{
2489 return T(Type_v2i16);
2490}
Nicolas Capensd4227962016-11-09 14:24:25 -05002491
Nicolas Capens157ba262019-12-10 17:49:14 -05002492Type *UShort2::getType()
2493{
2494 return T(Type_v2i16);
2495}
Nicolas Capensd4227962016-11-09 14:24:25 -05002496
Nicolas Capens157ba262019-12-10 17:49:14 -05002497Short4::Short4(RValue<Int4> cast)
2498{
Ben Clayton713b8d32019-12-17 20:37:56 +00002499 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002500 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2501 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2502
2503 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
2504 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2505
2506 storeValue(short4);
2507}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002508
2509// Short4::Short4(RValue<Float> cast)
2510// {
2511// }
2512
Nicolas Capens157ba262019-12-10 17:49:14 -05002513Short4::Short4(RValue<Float4> cast)
2514{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002515 // TODO(b/150791192): Generalize and optimize
2516 auto smin = std::numeric_limits<short>::min();
2517 auto smax = std::numeric_limits<short>::max();
2518 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002519}
2520
2521RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2522{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002523 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002524 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002525 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002526 Short4 result;
2527 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2528 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2529 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2530 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002531
2532 return result;
2533 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002534 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002535 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002536 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2537 }
2538}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002539
Nicolas Capens157ba262019-12-10 17:49:14 -05002540RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2541{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002542 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002543 if(emulateIntrinsics)
2544 {
2545 Short4 result;
2546 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2547 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2548 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2549 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2550
2551 return result;
2552 }
2553 else
2554 {
2555 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2556 }
2557}
2558
2559RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2560{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002561 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002562 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2563 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2564 ::basicBlock->appendInst(cmp);
2565
2566 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2567 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2568 ::basicBlock->appendInst(select);
2569
2570 return RValue<Short4>(V(result));
2571}
2572
2573RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2574{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002575 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002576 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2577 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2578 ::basicBlock->appendInst(cmp);
2579
2580 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2581 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2582 ::basicBlock->appendInst(select);
2583
2584 return RValue<Short4>(V(result));
2585}
2586
2587RValue<Short> SaturateSigned(RValue<Int> x)
2588{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002589 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002590 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2591}
2592
2593RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2594{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002595 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002596 if(emulateIntrinsics)
2597 {
2598 Short4 result;
2599 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2600 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2601 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2602 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2603
2604 return result;
2605 }
2606 else
2607 {
2608 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002609 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002610 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2611 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2612 paddsw->addArg(x.value);
2613 paddsw->addArg(y.value);
2614 ::basicBlock->appendInst(paddsw);
2615
2616 return RValue<Short4>(V(result));
2617 }
2618}
2619
2620RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2621{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002622 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002623 if(emulateIntrinsics)
2624 {
2625 Short4 result;
2626 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2627 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2628 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2629 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2630
2631 return result;
2632 }
2633 else
2634 {
2635 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002636 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002637 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2638 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2639 psubsw->addArg(x.value);
2640 psubsw->addArg(y.value);
2641 ::basicBlock->appendInst(psubsw);
2642
2643 return RValue<Short4>(V(result));
2644 }
2645}
2646
2647RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2648{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002649 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002650 if(emulateIntrinsics)
2651 {
2652 Short4 result;
2653 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2654 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2655 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2656 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2657
2658 return result;
2659 }
2660 else
2661 {
2662 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002663 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002664 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2665 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2666 pmulhw->addArg(x.value);
2667 pmulhw->addArg(y.value);
2668 ::basicBlock->appendInst(pmulhw);
2669
2670 return RValue<Short4>(V(result));
2671 }
2672}
2673
2674RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2675{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002676 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002677 if(emulateIntrinsics)
2678 {
2679 Int2 result;
2680 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2681 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2682
2683 return result;
2684 }
2685 else
2686 {
2687 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002688 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002689 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2690 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2691 pmaddwd->addArg(x.value);
2692 pmaddwd->addArg(y.value);
2693 ::basicBlock->appendInst(pmaddwd);
2694
2695 return As<Int2>(V(result));
2696 }
2697}
2698
2699RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2700{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002701 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002702 if(emulateIntrinsics)
2703 {
2704 SByte8 result;
2705 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2706 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2707 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2708 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2709 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2710 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2711 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2712 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2713
2714 return result;
2715 }
2716 else
2717 {
2718 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002719 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002720 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2721 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2722 pack->addArg(x.value);
2723 pack->addArg(y.value);
2724 ::basicBlock->appendInst(pack);
2725
2726 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2727 }
2728}
2729
2730RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2731{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002732 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002733 if(emulateIntrinsics)
2734 {
2735 Byte8 result;
2736 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2737 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2738 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2739 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2740 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2741 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2742 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2743 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2744
2745 return result;
2746 }
2747 else
2748 {
2749 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002750 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002751 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2752 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2753 pack->addArg(x.value);
2754 pack->addArg(y.value);
2755 ::basicBlock->appendInst(pack);
2756
2757 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2758 }
2759}
2760
2761RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2762{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002763 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002764 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2765}
2766
2767RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2768{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002769 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002770 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2771}
2772
2773Type *Short4::getType()
2774{
2775 return T(Type_v4i16);
2776}
2777
2778UShort4::UShort4(RValue<Float4> cast, bool saturate)
2779{
2780 if(saturate)
2781 {
2782 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002783 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002784 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2785 // PackUnsigned takes care of 0x0000 saturation.
2786 Int4 int4(Min(cast, Float4(0xFFFF)));
2787 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002788 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002789 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002790 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002791 // ARM saturates the 32-bit integer result on overflow/undeflow.
2792 Int4 int4(cast);
2793 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002794 }
2795 else
2796 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002797 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002798 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002799 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002800 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002801 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002802 *this = Short4(Int4(cast));
2803 }
2804}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002805
Nicolas Capens157ba262019-12-10 17:49:14 -05002806RValue<UShort> Extract(RValue<UShort4> val, int i)
2807{
2808 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2809}
2810
2811RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2812{
2813 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2814}
2815
2816RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2817{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002818 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002819 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002820
Nicolas Capens157ba262019-12-10 17:49:14 -05002821 {
2822 UShort4 result;
2823 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2824 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2825 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2826 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2827
2828 return result;
2829 }
2830 else
2831 {
2832 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2833 }
2834}
2835
2836RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2837{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002838 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002839 if(emulateIntrinsics)
2840 {
2841 UShort4 result;
2842 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2843 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2844 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2845 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2846
2847 return result;
2848 }
2849 else
2850 {
2851 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2852 }
2853}
2854
2855RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002858 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2859 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2860 ::basicBlock->appendInst(cmp);
2861
2862 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2863 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2864 ::basicBlock->appendInst(select);
2865
2866 return RValue<UShort4>(V(result));
2867}
2868
2869RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2870{
2871 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2872 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2873 ::basicBlock->appendInst(cmp);
2874
2875 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2876 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2877 ::basicBlock->appendInst(select);
2878
2879 return RValue<UShort4>(V(result));
2880}
2881
2882RValue<UShort> SaturateUnsigned(RValue<Int> x)
2883{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002884 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002885 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2886}
2887
2888RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2889{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002890 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002891 if(emulateIntrinsics)
2892 {
2893 UShort4 result;
2894 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2895 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2896 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2897 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2898
2899 return result;
2900 }
2901 else
2902 {
2903 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002904 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002905 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2906 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2907 paddusw->addArg(x.value);
2908 paddusw->addArg(y.value);
2909 ::basicBlock->appendInst(paddusw);
2910
2911 return RValue<UShort4>(V(result));
2912 }
2913}
2914
2915RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2916{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002917 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002918 if(emulateIntrinsics)
2919 {
2920 UShort4 result;
2921 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2922 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2923 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2924 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2925
2926 return result;
2927 }
2928 else
2929 {
2930 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002931 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002932 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2933 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2934 psubusw->addArg(x.value);
2935 psubusw->addArg(y.value);
2936 ::basicBlock->appendInst(psubusw);
2937
2938 return RValue<UShort4>(V(result));
2939 }
2940}
2941
2942RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2943{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002944 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002945 if(emulateIntrinsics)
2946 {
2947 UShort4 result;
2948 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2949 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2950 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2951 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2952
2953 return result;
2954 }
2955 else
2956 {
2957 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002958 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002959 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2960 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2961 pmulhuw->addArg(x.value);
2962 pmulhuw->addArg(y.value);
2963 ::basicBlock->appendInst(pmulhuw);
2964
2965 return RValue<UShort4>(V(result));
2966 }
2967}
2968
2969RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2970{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002971 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002972 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2973
2974 // Scalarized implementation.
2975 Int4 result;
2976 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2977 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2978 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2979 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2980
2981 return result;
2982}
2983
2984RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2985{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002986 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002987 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2988
2989 if(false) // Partial product based implementation.
2990 {
2991 auto xh = x >> 16;
2992 auto yh = y >> 16;
2993 auto xl = x & UInt4(0x0000FFFF);
2994 auto yl = y & UInt4(0x0000FFFF);
2995 auto xlyh = xl * yh;
2996 auto xhyl = xh * yl;
2997 auto xlyhh = xlyh >> 16;
2998 auto xhylh = xhyl >> 16;
2999 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3000 auto xhyll = xhyl & UInt4(0x0000FFFF);
3001 auto xlylh = (xl * yl) >> 16;
3002 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3003
3004 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003005 }
3006
Nicolas Capens157ba262019-12-10 17:49:14 -05003007 // Scalarized implementation.
3008 Int4 result;
3009 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3010 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3011 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3012 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3013
3014 return As<UInt4>(result);
3015}
3016
3017RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3018{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003019 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003020 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003021 return UShort4(0);
3022}
3023
3024Type *UShort4::getType()
3025{
3026 return T(Type_v4i16);
3027}
3028
3029RValue<Short> Extract(RValue<Short8> val, int i)
3030{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003031 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003032 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3033}
3034
3035RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3036{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003037 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003038 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
3039}
3040
3041RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3042{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003043 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003044 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003045 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003046 Short8 result;
3047 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3048 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3049 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3050 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3051 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3052 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3053 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3054 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003055
Nicolas Capens157ba262019-12-10 17:49:14 -05003056 return result;
3057 }
3058 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003059 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003060 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003061 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003062}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003063
Nicolas Capens157ba262019-12-10 17:49:14 -05003064RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3065{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003066 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003067 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003068 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003069 Short8 result;
3070 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3071 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3072 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3073 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3074 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3075 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3076 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3077 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003078
Nicolas Capens157ba262019-12-10 17:49:14 -05003079 return result;
3080 }
3081 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003082 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003083 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003084 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003085}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003086
Nicolas Capens157ba262019-12-10 17:49:14 -05003087RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3088{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003089 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003090 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003091 return Int4(0);
3092}
3093
3094RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3095{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003096 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003097 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003098 return Short8(0);
3099}
3100
3101Type *Short8::getType()
3102{
3103 return T(Ice::IceType_v8i16);
3104}
3105
3106RValue<UShort> Extract(RValue<UShort8> val, int i)
3107{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003108 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003109 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
3110}
3111
3112RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3113{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003114 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003115 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
3116}
3117
3118RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3119{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003120 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003121 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003122 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003123 UShort8 result;
3124 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3125 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3126 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3127 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3128 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3129 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3130 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3131 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003132
Nicolas Capens157ba262019-12-10 17:49:14 -05003133 return result;
3134 }
3135 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003136 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003137 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003138 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003139}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003140
Nicolas Capens157ba262019-12-10 17:49:14 -05003141RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3142{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003143 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003144 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003145 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003146 UShort8 result;
3147 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3148 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3149 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3150 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3151 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3152 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3153 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3154 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003155
Nicolas Capens157ba262019-12-10 17:49:14 -05003156 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003157 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003158 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003159 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003160 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003161 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003162}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003163
Nicolas Capens157ba262019-12-10 17:49:14 -05003164RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3165{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003166 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003167 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003168 return UShort8(0);
3169}
3170
Nicolas Capens157ba262019-12-10 17:49:14 -05003171Type *UShort8::getType()
3172{
3173 return T(Ice::IceType_v8i16);
3174}
3175
Ben Clayton713b8d32019-12-17 20:37:56 +00003176RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003177{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003178 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003179 RValue<Int> res = val;
3180 val += 1;
3181 return res;
3182}
3183
Ben Clayton713b8d32019-12-17 20:37:56 +00003184const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003185{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003186 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003187 val += 1;
3188 return val;
3189}
3190
Ben Clayton713b8d32019-12-17 20:37:56 +00003191RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003192{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003193 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003194 RValue<Int> res = val;
3195 val -= 1;
3196 return res;
3197}
3198
Ben Clayton713b8d32019-12-17 20:37:56 +00003199const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003200{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003201 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003202 val -= 1;
3203 return val;
3204}
3205
3206RValue<Int> RoundInt(RValue<Float> cast)
3207{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003208 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003209 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003210 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003211 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3212 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003213 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003214 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003215 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003216 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003217 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003218 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3219 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3220 nearbyint->addArg(cast.value);
3221 ::basicBlock->appendInst(nearbyint);
3222
3223 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003224 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003225}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003226
Nicolas Capens157ba262019-12-10 17:49:14 -05003227Type *Int::getType()
3228{
3229 return T(Ice::IceType_i32);
3230}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003231
Nicolas Capens157ba262019-12-10 17:49:14 -05003232Type *Long::getType()
3233{
3234 return T(Ice::IceType_i64);
3235}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003236
Nicolas Capens157ba262019-12-10 17:49:14 -05003237UInt::UInt(RValue<Float> cast)
3238{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003239 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003240 // Smallest positive value representable in UInt, but not in Int
3241 const unsigned int ustart = 0x80000000u;
3242 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003243
Nicolas Capens157ba262019-12-10 17:49:14 -05003244 // If the value is negative, store 0, otherwise store the result of the conversion
3245 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003246 // Check if the value can be represented as an Int
3247 IfThenElse(cast >= ustartf,
3248 // If the value is too large, subtract ustart and re-add it after conversion.
3249 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3250 // Otherwise, just convert normally
3251 Int(cast)))
3252 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05003253}
Nicolas Capensa8086512016-11-07 17:32:17 -05003254
Ben Clayton713b8d32019-12-17 20:37:56 +00003255RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003256{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003257 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003258 RValue<UInt> res = val;
3259 val += 1;
3260 return res;
3261}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003262
Ben Clayton713b8d32019-12-17 20:37:56 +00003263const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003264{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003265 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003266 val += 1;
3267 return val;
3268}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003269
Ben Clayton713b8d32019-12-17 20:37:56 +00003270RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003271{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003272 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003273 RValue<UInt> res = val;
3274 val -= 1;
3275 return res;
3276}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003277
Ben Clayton713b8d32019-12-17 20:37:56 +00003278const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003279{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003280 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003281 val -= 1;
3282 return val;
3283}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003284
Nicolas Capens598f8d82016-09-26 15:09:10 -04003285// RValue<UInt> RoundUInt(RValue<Float> cast)
3286// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003287// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003288// }
3289
Nicolas Capens157ba262019-12-10 17:49:14 -05003290Type *UInt::getType()
3291{
3292 return T(Ice::IceType_i32);
3293}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003294
3295// Int2::Int2(RValue<Int> cast)
3296// {
3297// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
3298// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
3299//
3300// Constant *shuffle[2];
3301// shuffle[0] = Nucleus::createConstantInt(0);
3302// shuffle[1] = Nucleus::createConstantInt(0);
3303//
3304// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
3305//
3306// storeValue(replicate);
3307// }
3308
Nicolas Capens157ba262019-12-10 17:49:14 -05003309RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3310{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003311 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003312 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003313 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003314 Int2 result;
3315 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3316 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003317
Nicolas Capens157ba262019-12-10 17:49:14 -05003318 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003319 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003320 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003321 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003322 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003323 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003324}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003325
Nicolas Capens157ba262019-12-10 17:49:14 -05003326RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3327{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003328 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003329 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003330 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003331 Int2 result;
3332 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3333 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3334
3335 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003336 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003337 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003338 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003339 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003340 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003341}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003342
Nicolas Capens157ba262019-12-10 17:49:14 -05003343Type *Int2::getType()
3344{
3345 return T(Type_v2i32);
3346}
3347
3348RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3349{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003350 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003351 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003352 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003353 UInt2 result;
3354 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3355 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003356
Nicolas Capens157ba262019-12-10 17:49:14 -05003357 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003358 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003359 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003360 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003361 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003362 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003363}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003364
Nicolas Capens157ba262019-12-10 17:49:14 -05003365RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3366{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003367 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003368 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003369 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003370 UInt2 result;
3371 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3372 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003373
Nicolas Capens157ba262019-12-10 17:49:14 -05003374 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003375 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003376 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003377 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003378 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003379 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003380}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003381
Nicolas Capens157ba262019-12-10 17:49:14 -05003382Type *UInt2::getType()
3383{
3384 return T(Type_v2i32);
3385}
3386
Ben Clayton713b8d32019-12-17 20:37:56 +00003387Int4::Int4(RValue<Byte4> cast)
3388 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003389{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003390 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003391 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3392 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3393
3394 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003395 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003396 Value *b = Nucleus::createBitCast(a, Byte16::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003397 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003398
Ben Clayton713b8d32019-12-17 20:37:56 +00003399 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003400 Value *d = Nucleus::createBitCast(c, Short8::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003401 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003402
3403 Value *f = Nucleus::createBitCast(e, Int4::getType());
3404 storeValue(f);
3405}
3406
Ben Clayton713b8d32019-12-17 20:37:56 +00003407Int4::Int4(RValue<SByte4> cast)
3408 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003409{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003410 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003411 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3412 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3413
Ben Clayton713b8d32019-12-17 20:37:56 +00003414 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003415 Value *b = Nucleus::createBitCast(a, Byte16::getType());
3416 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3417
Ben Clayton713b8d32019-12-17 20:37:56 +00003418 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003419 Value *d = Nucleus::createBitCast(c, Short8::getType());
3420 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3421
3422 *this = As<Int4>(e) >> 24;
3423}
3424
Ben Clayton713b8d32019-12-17 20:37:56 +00003425Int4::Int4(RValue<Short4> cast)
3426 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003427{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003428 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003429 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003430 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3431
3432 *this = As<Int4>(c) >> 16;
3433}
3434
Ben Clayton713b8d32019-12-17 20:37:56 +00003435Int4::Int4(RValue<UShort4> cast)
3436 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003437{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003438 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003439 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003440 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
3441 Value *d = Nucleus::createBitCast(c, Int4::getType());
3442 storeValue(d);
3443}
3444
Ben Clayton713b8d32019-12-17 20:37:56 +00003445Int4::Int4(RValue<Int> rhs)
3446 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003447{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003448 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003449 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
3450
Ben Clayton713b8d32019-12-17 20:37:56 +00003451 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003452 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3453
3454 storeValue(replicate);
3455}
3456
3457RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3458{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003459 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003460 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003461 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003462 Int4 result;
3463 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3464 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3465 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3466 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003467
Nicolas Capens157ba262019-12-10 17:49:14 -05003468 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003469 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003470 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003471 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003472 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003473 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003474}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003475
Nicolas Capens157ba262019-12-10 17:49:14 -05003476RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3477{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003478 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003479 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003480 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003481 Int4 result;
3482 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3483 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3484 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3485 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003486
Nicolas Capens157ba262019-12-10 17:49:14 -05003487 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003488 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003489 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003490 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003491 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003492 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003493}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003494
Nicolas Capens157ba262019-12-10 17:49:14 -05003495RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3496{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003497 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003498 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
3499}
3500
3501RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3502{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003503 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003504 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
3505}
3506
3507RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3508{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003509 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003510 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
3511}
3512
3513RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3514{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003515 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003516 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
3517}
3518
3519RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3520{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003521 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003522 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
3523}
3524
3525RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3526{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003527 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003528 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
3529}
3530
3531RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3532{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003533 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003534 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3535 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3536 ::basicBlock->appendInst(cmp);
3537
3538 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3539 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3540 ::basicBlock->appendInst(select);
3541
3542 return RValue<Int4>(V(result));
3543}
3544
3545RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3546{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003547 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003548 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3549 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3550 ::basicBlock->appendInst(cmp);
3551
3552 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3553 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3554 ::basicBlock->appendInst(select);
3555
3556 return RValue<Int4>(V(result));
3557}
3558
3559RValue<Int4> RoundInt(RValue<Float4> cast)
3560{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003561 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003562 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003563 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003564 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3565 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003566 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003567 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003568 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003569 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003570 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003571 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3572 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3573 nearbyint->addArg(cast.value);
3574 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003575
3576 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003577 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003578}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003579
Nicolas Capens157ba262019-12-10 17:49:14 -05003580RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3581{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003582 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003583 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003584 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003585 Short8 result;
3586 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3587 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3588 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3589 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3590 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3591 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3592 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3593 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003594
Nicolas Capens157ba262019-12-10 17:49:14 -05003595 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003596 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003597 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003598 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003599 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003600 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003601 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3602 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3603 pack->addArg(x.value);
3604 pack->addArg(y.value);
3605 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003606
Nicolas Capens157ba262019-12-10 17:49:14 -05003607 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003608 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003609}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003610
Nicolas Capens157ba262019-12-10 17:49:14 -05003611RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3612{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003613 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003614 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003615 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003616 RValue<Int4> sx = As<Int4>(x);
3617 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003618
Nicolas Capens157ba262019-12-10 17:49:14 -05003619 RValue<Int4> sy = As<Int4>(y);
3620 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003621
Nicolas Capens157ba262019-12-10 17:49:14 -05003622 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003623 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003624 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003625 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003626 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003627 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003628 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3629 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3630 pack->addArg(x.value);
3631 pack->addArg(y.value);
3632 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003633
Nicolas Capens157ba262019-12-10 17:49:14 -05003634 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003635 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003636}
Nicolas Capens33438a62017-09-27 11:47:35 -04003637
Nicolas Capens157ba262019-12-10 17:49:14 -05003638RValue<Int> SignMask(RValue<Int4> x)
3639{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003640 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003641 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003642 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003643 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3644 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003645 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003646 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003647 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003648 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003649 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003650 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3651 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3652 movmsk->addArg(x.value);
3653 ::basicBlock->appendInst(movmsk);
3654
3655 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003656 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003657}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003658
Nicolas Capens157ba262019-12-10 17:49:14 -05003659Type *Int4::getType()
3660{
3661 return T(Ice::IceType_v4i32);
3662}
3663
Ben Clayton713b8d32019-12-17 20:37:56 +00003664UInt4::UInt4(RValue<Float4> cast)
3665 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003666{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003667 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003668 // Smallest positive value representable in UInt, but not in Int
3669 const unsigned int ustart = 0x80000000u;
3670 const float ustartf = float(ustart);
3671
3672 // Check if the value can be represented as an Int
3673 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3674 // If the value is too large, subtract ustart and re-add it after conversion.
3675 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003676 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003677 (~uiValue & Int4(cast));
3678 // If the value is negative, store 0, otherwise store the result of the conversion
3679 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3680}
3681
Ben Clayton713b8d32019-12-17 20:37:56 +00003682UInt4::UInt4(RValue<UInt> rhs)
3683 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003684{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003685 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003686 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3687
Ben Clayton713b8d32019-12-17 20:37:56 +00003688 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003689 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3690
3691 storeValue(replicate);
3692}
3693
3694RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3695{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003696 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003697 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003698 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003699 UInt4 result;
3700 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3701 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3702 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3703 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003704
Nicolas Capens157ba262019-12-10 17:49:14 -05003705 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003706 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003707 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003708 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003709 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003710 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003711}
Ben Clayton88816fa2019-05-15 17:08:14 +01003712
Nicolas Capens157ba262019-12-10 17:49:14 -05003713RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3714{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003715 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003716 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003717 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003718 UInt4 result;
3719 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3720 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3721 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3722 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003723
Nicolas Capens157ba262019-12-10 17:49:14 -05003724 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003725 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003726 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003727 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003728 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003729 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003730}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003731
Nicolas Capens157ba262019-12-10 17:49:14 -05003732RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3733{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003734 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003735 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3736}
3737
3738RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3739{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003740 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003741 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3742}
3743
3744RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3745{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003746 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003747 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3748}
3749
3750RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3751{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003752 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003753 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3754}
3755
3756RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3757{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003758 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003759 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3760}
3761
3762RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3763{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003764 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003765 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3766}
3767
3768RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3769{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003770 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003771 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3772 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3773 ::basicBlock->appendInst(cmp);
3774
3775 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3776 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3777 ::basicBlock->appendInst(select);
3778
3779 return RValue<UInt4>(V(result));
3780}
3781
3782RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3783{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003784 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003785 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3786 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3787 ::basicBlock->appendInst(cmp);
3788
3789 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3790 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3791 ::basicBlock->appendInst(select);
3792
3793 return RValue<UInt4>(V(result));
3794}
3795
3796Type *UInt4::getType()
3797{
3798 return T(Ice::IceType_v4i32);
3799}
3800
3801Type *Half::getType()
3802{
3803 return T(Ice::IceType_i16);
3804}
3805
3806RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3807{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003808 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003809 return 1.0f / x;
3810}
3811
3812RValue<Float> RcpSqrt_pp(RValue<Float> x)
3813{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003814 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003815 return Rcp_pp(Sqrt(x));
3816}
3817
3818RValue<Float> Sqrt(RValue<Float> x)
3819{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003820 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003821 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003822 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003823 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3824 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3825 sqrt->addArg(x.value);
3826 ::basicBlock->appendInst(sqrt);
3827
3828 return RValue<Float>(V(result));
3829}
3830
3831RValue<Float> Round(RValue<Float> x)
3832{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003833 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003834 return Float4(Round(Float4(x))).x;
3835}
3836
3837RValue<Float> Trunc(RValue<Float> x)
3838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003840 return Float4(Trunc(Float4(x))).x;
3841}
3842
3843RValue<Float> Frac(RValue<Float> x)
3844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003846 return Float4(Frac(Float4(x))).x;
3847}
3848
3849RValue<Float> Floor(RValue<Float> x)
3850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003852 return Float4(Floor(Float4(x))).x;
3853}
3854
3855RValue<Float> Ceil(RValue<Float> x)
3856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003858 return Float4(Ceil(Float4(x))).x;
3859}
3860
3861Type *Float::getType()
3862{
3863 return T(Ice::IceType_f32);
3864}
3865
3866Type *Float2::getType()
3867{
3868 return T(Type_v2f32);
3869}
3870
Ben Clayton713b8d32019-12-17 20:37:56 +00003871Float4::Float4(RValue<Float> rhs)
3872 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003873{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003874 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003875 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3876
Ben Clayton713b8d32019-12-17 20:37:56 +00003877 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003878 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3879
3880 storeValue(replicate);
3881}
3882
3883RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3884{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003885 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003886 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3887 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3888 ::basicBlock->appendInst(cmp);
3889
3890 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3891 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3892 ::basicBlock->appendInst(select);
3893
3894 return RValue<Float4>(V(result));
3895}
3896
3897RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3898{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003899 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003900 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3901 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3902 ::basicBlock->appendInst(cmp);
3903
3904 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3905 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3906 ::basicBlock->appendInst(select);
3907
3908 return RValue<Float4>(V(result));
3909}
3910
3911RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3912{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003913 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003914 return Float4(1.0f) / x;
3915}
3916
3917RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3918{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003919 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003920 return Rcp_pp(Sqrt(x));
3921}
3922
3923RValue<Float4> Sqrt(RValue<Float4> x)
3924{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003925 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003926 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003927 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003928 Float4 result;
3929 result.x = Sqrt(Float(Float4(x).x));
3930 result.y = Sqrt(Float(Float4(x).y));
3931 result.z = Sqrt(Float(Float4(x).z));
3932 result.w = Sqrt(Float(Float4(x).w));
3933
3934 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003935 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003936 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003937 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003938 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003939 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003940 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3941 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3942 sqrt->addArg(x.value);
3943 ::basicBlock->appendInst(sqrt);
3944
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003945 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003946 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003947}
Nicolas Capens157ba262019-12-10 17:49:14 -05003948
3949RValue<Int> SignMask(RValue<Float4> x)
3950{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003951 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003952 if(emulateIntrinsics || CPUID::ARM)
3953 {
3954 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3955 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3956 }
3957 else
3958 {
3959 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003960 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003961 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3962 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3963 movmsk->addArg(x.value);
3964 ::basicBlock->appendInst(movmsk);
3965
3966 return RValue<Int>(V(result));
3967 }
3968}
3969
3970RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3971{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003972 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003973 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3974}
3975
3976RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3977{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003978 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003979 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3980}
3981
3982RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3983{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003984 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003985 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3986}
3987
3988RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3989{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003990 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003991 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3992}
3993
3994RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3995{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003996 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003997 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3998}
3999
4000RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4001{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004002 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004003 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
4004}
4005
4006RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4007{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004008 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004009 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
4010}
4011
4012RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4013{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004014 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004015 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
4016}
4017
4018RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4019{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004020 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004021 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
4022}
4023
4024RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4025{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004026 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004027 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
4028}
4029
4030RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4031{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004032 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004033 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
4034}
4035
4036RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4037{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004038 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004039 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
4040}
4041
4042RValue<Float4> Round(RValue<Float4> x)
4043{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004044 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004045 if(emulateIntrinsics || CPUID::ARM)
4046 {
4047 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4048 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4049 }
4050 else if(CPUID::SSE4_1)
4051 {
4052 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004053 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004054 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4055 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4056 round->addArg(x.value);
4057 round->addArg(::context->getConstantInt32(0));
4058 ::basicBlock->appendInst(round);
4059
4060 return RValue<Float4>(V(result));
4061 }
4062 else
4063 {
4064 return Float4(RoundInt(x));
4065 }
4066}
4067
4068RValue<Float4> Trunc(RValue<Float4> x)
4069{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004070 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004071 if(CPUID::SSE4_1)
4072 {
4073 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004074 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004075 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4076 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4077 round->addArg(x.value);
4078 round->addArg(::context->getConstantInt32(3));
4079 ::basicBlock->appendInst(round);
4080
4081 return RValue<Float4>(V(result));
4082 }
4083 else
4084 {
4085 return Float4(Int4(x));
4086 }
4087}
4088
4089RValue<Float4> Frac(RValue<Float4> x)
4090{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004091 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004092 Float4 frc;
4093
4094 if(CPUID::SSE4_1)
4095 {
4096 frc = x - Floor(x);
4097 }
4098 else
4099 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004100 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004101
Ben Clayton713b8d32019-12-17 20:37:56 +00004102 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004103 }
4104
4105 // x - floor(x) can be 1.0 for very small negative x.
4106 // Clamp against the value just below 1.0.
4107 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4108}
4109
4110RValue<Float4> Floor(RValue<Float4> x)
4111{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004112 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004113 if(CPUID::SSE4_1)
4114 {
4115 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004116 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004117 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4118 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4119 round->addArg(x.value);
4120 round->addArg(::context->getConstantInt32(1));
4121 ::basicBlock->appendInst(round);
4122
4123 return RValue<Float4>(V(result));
4124 }
4125 else
4126 {
4127 return x - Frac(x);
4128 }
4129}
4130
4131RValue<Float4> Ceil(RValue<Float4> x)
4132{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004133 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004134 if(CPUID::SSE4_1)
4135 {
4136 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004137 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004138 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4139 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4140 round->addArg(x.value);
4141 round->addArg(::context->getConstantInt32(2));
4142 ::basicBlock->appendInst(round);
4143
4144 return RValue<Float4>(V(result));
4145 }
4146 else
4147 {
4148 return -Floor(-x);
4149 }
4150}
4151
4152Type *Float4::getType()
4153{
4154 return T(Ice::IceType_v4f32);
4155}
4156
4157RValue<Long> Ticks()
4158{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004159 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004160 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004161 return Long(Int(0));
4162}
4163
Ben Clayton713b8d32019-12-17 20:37:56 +00004164RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004165{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004166 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004167 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004168}
4169
Ben Clayton713b8d32019-12-17 20:37:56 +00004170RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004171{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004172 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004173 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004174}
4175
Ben Clayton713b8d32019-12-17 20:37:56 +00004176Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004177{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004178 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano16ae92a2020-03-10 10:53:24 -04004179 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004180}
4181
4182void Breakpoint()
4183{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004184 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004185 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004186 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4187 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4188 ::basicBlock->appendInst(trap);
4189}
4190
Ben Clayton713b8d32019-12-17 20:37:56 +00004191void Nucleus::createFence(std::memory_order memoryOrder)
4192{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004193 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004194 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4195 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4196 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4197 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4198 inst->addArg(order);
4199 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004200}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004201
Ben Clayton713b8d32019-12-17 20:37:56 +00004202Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4203{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004204 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004205 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004206 return nullptr;
4207}
4208void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4209{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004210 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004211 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004212}
Nicolas Capens157ba262019-12-10 17:49:14 -05004213
4214RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4215{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004216 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004217 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4218}
4219
4220RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4221{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004222 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004223 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4224}
4225
4226void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4227{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004228 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004229 return emulated::Scatter(base, val, offsets, mask, alignment);
4230}
4231
4232void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4233{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004234 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004235 return emulated::Scatter(base, val, offsets, mask, alignment);
4236}
4237
4238RValue<Float> Exp2(RValue<Float> x)
4239{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004240 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004241 return emulated::Exp2(x);
4242}
4243
4244RValue<Float> Log2(RValue<Float> x)
4245{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004246 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004247 return emulated::Log2(x);
4248}
4249
4250RValue<Float4> Sin(RValue<Float4> x)
4251{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004252 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004253 return emulated::Sin(x);
4254}
4255
4256RValue<Float4> Cos(RValue<Float4> x)
4257{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004258 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004259 return emulated::Cos(x);
4260}
4261
4262RValue<Float4> Tan(RValue<Float4> x)
4263{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004264 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004265 return emulated::Tan(x);
4266}
4267
4268RValue<Float4> Asin(RValue<Float4> x)
4269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004271 return emulated::Asin(x);
4272}
4273
4274RValue<Float4> Acos(RValue<Float4> x)
4275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004277 return emulated::Acos(x);
4278}
4279
4280RValue<Float4> Atan(RValue<Float4> x)
4281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004282 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004283 return emulated::Atan(x);
4284}
4285
4286RValue<Float4> Sinh(RValue<Float4> x)
4287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004289 return emulated::Sinh(x);
4290}
4291
4292RValue<Float4> Cosh(RValue<Float4> x)
4293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004295 return emulated::Cosh(x);
4296}
4297
4298RValue<Float4> Tanh(RValue<Float4> x)
4299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004301 return emulated::Tanh(x);
4302}
4303
4304RValue<Float4> Asinh(RValue<Float4> x)
4305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004307 return emulated::Asinh(x);
4308}
4309
4310RValue<Float4> Acosh(RValue<Float4> x)
4311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004313 return emulated::Acosh(x);
4314}
4315
4316RValue<Float4> Atanh(RValue<Float4> x)
4317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004319 return emulated::Atanh(x);
4320}
4321
4322RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004325 return emulated::Atan2(x, y);
4326}
4327
4328RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004331 return emulated::Pow(x, y);
4332}
4333
4334RValue<Float4> Exp(RValue<Float4> x)
4335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004337 return emulated::Exp(x);
4338}
4339
4340RValue<Float4> Log(RValue<Float4> x)
4341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004343 return emulated::Log(x);
4344}
4345
4346RValue<Float4> Exp2(RValue<Float4> x)
4347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004348 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004349 return emulated::Exp2(x);
4350}
4351
4352RValue<Float4> Log2(RValue<Float4> x)
4353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004354 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004355 return emulated::Log2(x);
4356}
4357
4358RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004360 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004361 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004362 {
Ben Claytonce54c592020-02-07 11:30:51 +00004363 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004364 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004365 }
4366 else
4367 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004368 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004369 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4370 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4371 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4372 ctlz->addArg(x.value);
4373 ::basicBlock->appendInst(ctlz);
4374
4375 return RValue<UInt>(V(result));
4376 }
4377}
4378
4379RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4380{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004381 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004382 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004383 {
Ben Claytonce54c592020-02-07 11:30:51 +00004384 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004385 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004386 }
4387 else
4388 {
4389 // TODO: implement vectorized version in Subzero
4390 UInt4 result;
4391 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4392 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4393 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4394 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4395 return result;
4396 }
4397}
4398
4399RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4400{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004401 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004402 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004403 {
Ben Claytonce54c592020-02-07 11:30:51 +00004404 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004405 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004406 }
4407 else
4408 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004409 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004410 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4411 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4412 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4413 ctlz->addArg(x.value);
4414 ::basicBlock->appendInst(ctlz);
4415
4416 return RValue<UInt>(V(result));
4417 }
4418}
4419
4420RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4421{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004422 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004423 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004424 {
Ben Claytonce54c592020-02-07 11:30:51 +00004425 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004426 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004427 }
4428 else
4429 {
4430 // TODO: implement vectorized version in Subzero
4431 UInt4 result;
4432 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4433 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4434 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4435 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4436 return result;
4437 }
4438}
4439
Antonio Maiorano370cba52019-12-31 11:36:07 -05004440RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4441{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004442 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004443 return emulated::MinAtomic(x, y, memoryOrder);
4444}
4445
4446RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4447{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004448 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004449 return emulated::MinAtomic(x, y, memoryOrder);
4450}
4451
4452RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4453{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004454 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004455 return emulated::MaxAtomic(x, y, memoryOrder);
4456}
4457
4458RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4459{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004460 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004461 return emulated::MaxAtomic(x, y, memoryOrder);
4462}
4463
Antonio Maioranoaae33732020-02-14 14:52:34 -05004464void EmitDebugLocation()
4465{
4466#ifdef ENABLE_RR_DEBUG_INFO
4467# ifdef ENABLE_RR_EMIT_PRINT_LOCATION
4468 emitPrintLocation(getCallerBacktrace());
4469# endif // ENABLE_RR_EMIT_PRINT_LOCATION
4470#endif // ENABLE_RR_DEBUG_INFO
4471}
Ben Clayton713b8d32019-12-17 20:37:56 +00004472void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004473void FlushDebug() {}
4474
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004475namespace {
4476namespace coro {
4477
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004478// Instance data per generated coroutine
4479// This is the "handle" type used for Coroutine functions
4480// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4481struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004482{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004483 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004484 bool done = false; // the coroutine should stop at the next yield()
4485 bool terminated = false; // the coroutine has finished.
4486 bool inRoutine = false; // is the coroutine currently executing?
4487 marl::Scheduler::Fiber *mainFiber = nullptr;
4488 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004489 void *promisePtr = nullptr;
4490};
4491
4492CoroutineData *createCoroutineData()
4493{
4494 return new CoroutineData{};
4495}
4496
4497void destroyCoroutineData(CoroutineData *coroData)
4498{
4499 delete coroData;
4500}
4501
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004502// suspend() pauses execution of the coroutine, and resumes execution from the
4503// caller's call to await().
4504// Returns true if await() is called again, or false if coroutine_destroy()
4505// is called.
4506bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004507{
Ben Claytonc3466532020-03-24 11:54:05 +00004508 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4509 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4510 ASSERT(coroData->inRoutine);
4511 coroData->inRoutine = false;
4512 coroData->mainFiber->notify();
4513 while(!coroData->inRoutine)
4514 {
4515 coroData->routineFiber->wait();
4516 }
4517 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004518}
4519
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004520// resume() is called by await(), blocking until the coroutine calls yield()
4521// or the coroutine terminates.
4522void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004523{
Ben Claytonc3466532020-03-24 11:54:05 +00004524 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4525 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4526 ASSERT(!coroData->inRoutine);
4527 coroData->inRoutine = true;
4528 coroData->routineFiber->notify();
4529 while(coroData->inRoutine)
4530 {
4531 coroData->mainFiber->wait();
4532 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004533}
4534
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004535// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4536// until the coroutine ends, and deletes the coroutine data.
4537void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004538{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004539 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004540 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4541 ASSERT(!coroData->inRoutine);
4542 if(!coroData->terminated)
4543 {
4544 coroData->done = true;
4545 coroData->inRoutine = true;
4546 coroData->routineFiber->notify();
4547 while(!coroData->terminated)
4548 {
4549 coroData->mainFiber->wait();
4550 }
4551 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004552 if(coroData->useInternalScheduler)
4553 {
4554 ::getOrCreateScheduler().unbind();
4555 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004556 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004557}
4558
4559namespace detail {
4560thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4561} // namespace detail
4562
4563void setHandleParam(Nucleus::CoroutineHandle handle)
4564{
4565 ASSERT(!detail::coroHandle);
4566 detail::coroHandle = handle;
4567}
4568
4569Nucleus::CoroutineHandle getHandleParam()
4570{
4571 ASSERT(detail::coroHandle);
4572 auto handle = detail::coroHandle;
4573 detail::coroHandle = {};
4574 return handle;
4575}
4576
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004577bool isDone(Nucleus::CoroutineHandle handle)
4578{
4579 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004580 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004581}
4582
4583void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4584{
4585 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4586 coroData->promisePtr = promisePtr;
4587}
4588
4589void *getPromisePtr(Nucleus::CoroutineHandle handle)
4590{
4591 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4592 return coroData->promisePtr;
4593}
4594
4595} // namespace coro
4596} // namespace
4597
4598// Used to generate coroutines.
4599// Lifetime: from yield to acquireCoroutine
4600class CoroutineGenerator
4601{
4602public:
4603 CoroutineGenerator()
4604 {
4605 }
4606
4607 // Inserts instructions at the top of the current function to make it a coroutine.
4608 void generateCoroutineBegin()
4609 {
4610 // Begin building the main coroutine_begin() function.
4611 // We insert these instructions at the top of the entry node,
4612 // before existing reactor-generated instructions.
4613
4614 // CoroutineHandle coroutine_begin(<Arguments>)
4615 // {
4616 // this->handle = coro::getHandleParam();
4617 //
4618 // YieldType promise;
4619 // coro::setPromisePtr(handle, &promise); // For await
4620 //
4621 // ... <REACTOR CODE> ...
4622 //
4623
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004624 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004625 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004626
4627 // YieldType promise;
4628 // coro::setPromisePtr(handle, &promise); // For await
4629 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004630 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004631 }
4632
4633 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4634 void generateYield(Value *val)
4635 {
4636 // ... <REACTOR CODE> ...
4637 //
4638 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004639 // if (!coro::suspend(handle)) {
4640 // return false; // coroutine has been stopped by the caller.
4641 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004642 //
4643 // ... <REACTOR CODE> ...
4644
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004645 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004646 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004647
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004648 // if (!coro::suspend(handle)) {
4649 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4650 auto doneBlock = Nucleus::createBasicBlock();
4651 auto resumeBlock = Nucleus::createBasicBlock();
4652 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4653
4654 // return false; // coroutine has been stopped by the caller.
4655 ::basicBlock = doneBlock;
4656 Nucleus::createRetVoid(); // coroutine return value is ignored.
4657
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004658 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004659 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004660 }
4661
4662 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4663
4664 // Generates the await function for the current coroutine.
4665 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4666 static FunctionUniquePtr generateAwaitFunction()
4667 {
4668 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4669 // {
4670 // if (coro::isDone())
4671 // {
4672 // return false;
4673 // }
4674 // else // resume
4675 // {
4676 // YieldType* promise = coro::getPromisePtr(handle);
4677 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004678 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004679 // return true;
4680 // }
4681 // }
4682
4683 // Subzero doesn't support bool types (IceType_i1) as return type
4684 const Ice::Type ReturnType = Ice::IceType_i32;
4685 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4686 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4687
4688 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4689 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4690
4691 Ice::Variable *handle = awaitFunc->getArgs()[0];
4692 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4693
4694 auto doneBlock = awaitFunc->makeNode();
4695 {
4696 // return false;
4697 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4698 doneBlock->appendInst(ret);
4699 }
4700
4701 auto resumeBlock = awaitFunc->makeNode();
4702 {
4703 // YieldType* promise = coro::getPromisePtr(handle);
4704 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4705
4706 // *out = *promise;
4707 // Load promise value
4708 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4709 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4710 resumeBlock->appendInst(load);
4711 // Then store it in output param
4712 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4713 resumeBlock->appendInst(store);
4714
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004715 // coro::resume(handle);
4716 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004717
4718 // return true;
4719 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4720 resumeBlock->appendInst(ret);
4721 }
4722
4723 // if (coro::isDone())
4724 // {
4725 // <doneBlock>
4726 // }
4727 // else // resume
4728 // {
4729 // <resumeBlock>
4730 // }
4731 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004732 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004733 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4734 bb->appendInst(br);
4735
4736 return FunctionUniquePtr{ awaitFunc };
4737 }
4738
4739 // Generates the destroy function for the current coroutine.
4740 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4741 static FunctionUniquePtr generateDestroyFunction()
4742 {
4743 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4744 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004745 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004746 // return;
4747 // }
4748
4749 const Ice::Type ReturnType = Ice::IceType_void;
4750 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4751
4752 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4753 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4754
4755 Ice::Variable *handle = destroyFunc->getArgs()[0];
4756
4757 auto *bb = destroyFunc->getEntryNode();
4758
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004759 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4760 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004761
4762 // return;
4763 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4764 bb->appendInst(ret);
4765
4766 return FunctionUniquePtr{ destroyFunc };
4767 }
4768
4769private:
4770 Ice::Variable *handle{};
4771 Ice::Variable *promise{};
4772};
4773
4774static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4775{
4776 // This doubles up as our coroutine handle
4777 auto coroData = coro::createCoroutineData();
4778
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004779 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4780 if(coroData->useInternalScheduler)
4781 {
4782 ::getOrCreateScheduler().bind();
4783 }
4784
Ben Clayton76e9e532020-03-16 20:35:04 +00004785 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004786 // Store handle in TLS so that the coroutine can grab it right away, before
4787 // any fiber switch occurs.
4788 coro::setHandleParam(coroData);
4789
Ben Claytonc3466532020-03-24 11:54:05 +00004790 ASSERT(!coroData->routineFiber);
4791 coroData->routineFiber = marl::Scheduler::Fiber::current();
4792
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004793 beginFunc();
4794
Ben Claytonc3466532020-03-24 11:54:05 +00004795 ASSERT(coroData->inRoutine);
4796 coroData->done = true; // coroutine is done.
4797 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4798 coroData->inRoutine = false;
4799 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004800 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004801
Ben Claytonc3466532020-03-24 11:54:05 +00004802 ASSERT(!coroData->mainFiber);
4803 coroData->mainFiber = marl::Scheduler::Fiber::current();
4804
4805 // block until the first yield or coroutine end
4806 ASSERT(!coroData->inRoutine);
4807 coroData->inRoutine = true;
4808 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4809 while(coroData->inRoutine)
4810 {
4811 coroData->mainFiber->wait();
4812 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004813
4814 return coroData;
4815}
4816
4817void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4818{
4819 // Start by creating a regular function
4820 createFunction(yieldType, params);
4821
4822 // Save in case yield() is called
4823 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4824 ::coroYieldType = yieldType;
4825}
4826
4827void Nucleus::yield(Value *val)
4828{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004829 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004830 Variable::materializeAll();
4831
4832 // On first yield, we start generating coroutine functions
4833 if(!::coroGen)
4834 {
4835 ::coroGen = std::make_shared<CoroutineGenerator>();
4836 ::coroGen->generateCoroutineBegin();
4837 }
4838
4839 ASSERT(::coroGen);
4840 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004841}
4842
Ben Clayton713b8d32019-12-17 20:37:56 +00004843static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4844{
4845 return false;
4846}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004847
4848static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4849{
4850}
Nicolas Capens157ba262019-12-10 17:49:14 -05004851
4852std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4853{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004854 if(::coroGen)
4855 {
4856 // Finish generating coroutine functions
4857 {
4858 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004859 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004860 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004861
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004862 auto awaitFunc = ::coroGen->generateAwaitFunction();
4863 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004864
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004865 // At this point, we no longer need the CoroutineGenerator.
4866 ::coroGen.reset();
4867 ::coroYieldType = nullptr;
4868
4869 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4870 { name, "await", "destroy" },
4871 cfgEdit);
4872
4873 return routine;
4874 }
4875 else
4876 {
4877 {
4878 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004879 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004880 }
4881
4882 ::coroYieldType = nullptr;
4883
4884 // Not an actual coroutine (no yields), so return stubs for await and destroy
4885 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4886
4887 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4888 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4889 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4890 return routine;
4891 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004892}
4893
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004894Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004895{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004896 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4897
4898 if(isCoroutine)
4899 {
4900 return rr::invokeCoroutineBegin(func);
4901 }
4902 else
4903 {
4904 // For regular routines, just invoke the begin func directly
4905 return func();
4906 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004907}
Nicolas Capens157ba262019-12-10 17:49:14 -05004908
4909} // namespace rr