blob: 16ff015a7aad33745d477a1cc66448a0dd85f489 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050017#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000018#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050019#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040020
Nicolas Capens1a3ce872018-10-10 10:42:36 -040021#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000022#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040023
Nicolas Capens598f8d82016-09-26 15:09:10 -040024#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000027#include "src/IceELFStreamer.h"
28#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040029#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031
Ben Clayton713b8d32019-12-17 20:37:56 +000032#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040033#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040035
Antonio Maiorano8bce0672020-02-28 13:13:45 -050036#include "marl/event.h"
37
Nicolas Capens6a990f82018-07-06 15:54:07 -040038#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040041
Nicolas Capensbd65da92017-01-05 16:31:06 -050042#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000043# ifndef WIN32_LEAN_AND_MEAN
44# define WIN32_LEAN_AND_MEAN
45# endif // !WIN32_LEAN_AND_MEAN
46# ifndef NOMINMAX
47# define NOMINMAX
48# endif // !NOMINMAX
49# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050050#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Ben Clayton683bad82020-02-10 23:57:09 +000052#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040053#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000054#include <limits>
55#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040056
Antonio Maiorano02a39532020-01-21 15:15:34 -050057// Subzero utility functions
58// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050059namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050060namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061
62Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
63{
64 uint32_t sequenceNumber = 0;
65 auto function = Ice::Cfg::create(context, sequenceNumber).release();
66
67 Ice::CfgLocalAllocatorScope allocScope{ function };
68
69 for(auto type : paramTypes)
70 {
71 Ice::Variable *arg = function->makeVariable(type);
72 function->addArg(arg);
73 }
74
75 Ice::CfgNode *node = function->makeNode();
76 function->setEntryNode(node);
77
78 return function;
79}
80
81Ice::Type getPointerType(Ice::Type elementType)
82{
83 if(sizeof(void *) == 8)
84 {
85 return Ice::IceType_i64;
86 }
87 else
88 {
89 return Ice::IceType_i32;
90 }
91}
92
93Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
94{
95 int typeSize = Ice::typeWidthInBytes(type);
96 int totalSize = typeSize * (arraySize ? arraySize : 1);
97
98 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
99 auto address = function->makeVariable(getPointerType(type));
100 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
101 function->getEntryNode()->getInsts().push_front(alloca);
102
103 return address;
104}
105
106Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500107{
108 if(sizeof(void *) == 8)
109 {
110 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
111 }
112 else
113 {
114 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
115 }
116}
117
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400118// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
119Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500120
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400121// Wrapper for calls on C functions with Ice types
122Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
123{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500124 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400125
126 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
127 // then truncate result to bool.
128 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
129 const bool returningBool = (retTy == Ice::IceType_i1);
130 if(returningBool)
131 {
132 ret = function->makeVariable(Ice::IceType_i32);
133 }
134 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500135 {
136 ret = function->makeVariable(retTy);
137 }
138
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400139 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500140 for(auto arg : iceArgs)
141 {
142 call->addArg(arg);
143 }
144
145 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400146
147 if(returningBool)
148 {
149 // Truncate result to bool so that if any (lsb) bits were set, result will be true
150 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
151 }
152
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500153 return ret;
154}
155
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400156Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
157{
158 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
159 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
160}
161
Antonio Maiorano62427e02020-02-13 09:18:05 -0500162// Wrapper for calls on C functions with Ice types
163template<typename Return, typename... CArgs, typename... RArgs>
164Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
165{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400166 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
167
Nicolas Capens519cf222020-05-08 15:27:19 -0400168 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500169 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500170 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500171}
172
Antonio Maiorano02a39532020-01-21 15:15:34 -0500173// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500174Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500175{
176 Ice::Variable *result = function->makeVariable(v->getType());
177 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
178 basicBlock->appendInst(cast);
179 return result;
180}
181
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400182Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
183{
184 Ice::Variable *to = function->makeVariable(toType);
185 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
186 basicBlock->appendInst(cast);
187 return to;
188}
189
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500190Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500191{
192 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
193 // absolute address. We circumvent this by casting to a non-const variable, and loading
194 // from that.
195 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
196 {
197 ptr = sz::createUnconstCast(function, basicBlock, cptr);
198 }
199
200 Ice::Variable *result = function->makeVariable(type);
201 auto load = Ice::InstLoad::create(function, result, ptr, align);
202 basicBlock->appendInst(load);
203
204 return result;
205}
206
207} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500208} // namespace
209
Ben Clayton713b8d32019-12-17 20:37:56 +0000210namespace rr {
211class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500212class CoroutineGenerator;
213} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500214
215namespace {
216
217// Default configuration settings. Must be accessed under mutex lock.
218std::mutex defaultConfigLock;
219rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000220{
Nicolas Capens157ba262019-12-10 17:49:14 -0500221 // This uses a static in a function to avoid the cost of a global static
222 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
223 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000224 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500225 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000226}
227
Nicolas Capens157ba262019-12-10 17:49:14 -0500228Ice::GlobalContext *context = nullptr;
229Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400230Ice::CfgNode *entryBlock = nullptr;
231Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500232Ice::CfgNode *basicBlock = nullptr;
233Ice::CfgLocalAllocatorScope *allocator = nullptr;
234rr::ELFMemoryStreamer *routine = nullptr;
235
236std::mutex codegenMutex;
237
238Ice::ELFFileStreamer *elfFile = nullptr;
239Ice::Fdstream *out = nullptr;
240
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500241// Coroutine globals
242rr::Type *coroYieldType = nullptr;
243std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500244marl::Scheduler &getOrCreateScheduler()
245{
246 static auto scheduler = [] {
247 auto s = std::make_unique<marl::Scheduler>();
248 s->setWorkerThreadCount(8);
249 return s;
250 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500251
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500252 return *scheduler;
253}
Nicolas Capens157ba262019-12-10 17:49:14 -0500254} // Anonymous namespace
255
256namespace {
257
258#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000259# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500260#endif
261
Ben Clayton713b8d32019-12-17 20:37:56 +0000262#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
263# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500264#endif
265
Antonio Maiorano370cba52019-12-31 11:36:07 -0500266Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400267{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500268 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100269 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500270 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000271 case rr::Optimization::Level::None: return Ice::Opt_m1;
272 case rr::Optimization::Level::Less: return Ice::Opt_m1;
273 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500274 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
275 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100276 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500277 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400278}
279
Antonio Maiorano370cba52019-12-31 11:36:07 -0500280Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
281{
282 switch(memoryOrder)
283 {
284 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
285 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
286 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
287 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
288 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
289 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
290 }
291 return Ice::Intrinsics::MemoryOrderInvalid;
292}
293
Nicolas Capens157ba262019-12-10 17:49:14 -0500294class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500295{
Nicolas Capens157ba262019-12-10 17:49:14 -0500296public:
297 const static bool ARM;
298 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400299
Nicolas Capens157ba262019-12-10 17:49:14 -0500300private:
301 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100302 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000303#if defined(__i386__) || defined(__x86_64__)
304# if defined(_WIN32)
305 __cpuid(registers, info);
306# else
307 __asm volatile("cpuid"
308 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
309 : "a"(info));
310# endif
311#else
312 registers[0] = 0;
313 registers[1] = 0;
314 registers[2] = 0;
315 registers[3] = 0;
316#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100317 }
318
Nicolas Capens157ba262019-12-10 17:49:14 -0500319 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500320 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000321#if defined(__arm__) || defined(__aarch64__)
322 return true;
323#elif defined(__i386__) || defined(__x86_64__)
324 return false;
325#elif defined(__mips__)
326 return false;
327#else
328# error "Unknown architecture"
329#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500330 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500331
Nicolas Capens157ba262019-12-10 17:49:14 -0500332 static bool detectSSE4_1()
333 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000334#if defined(__i386__) || defined(__x86_64__)
335 int registers[4];
336 cpuid(registers, 1);
337 return (registers[2] & 0x00080000) != 0;
338#else
339 return false;
340#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500341 }
342};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500343
Nicolas Capens157ba262019-12-10 17:49:14 -0500344const bool CPUID::ARM = CPUID::detectARM();
345const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
346const bool emulateIntrinsics = false;
347const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400348
Nicolas Capens157ba262019-12-10 17:49:14 -0500349constexpr bool subzeroDumpEnabled = false;
350constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500351
352#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500353static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
354static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500355#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500356
357} // anonymous namespace
358
359namespace rr {
360
Antonio Maioranoab210f92019-12-13 16:26:24 -0500361std::string BackendName()
362{
363 return "Subzero";
364}
365
Ben Clayton713b8d32019-12-17 20:37:56 +0000366const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500367 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500368};
369
370enum EmulatedType
371{
372 EmulatedShift = 16,
373 EmulatedV2 = 2 << EmulatedShift,
374 EmulatedV4 = 4 << EmulatedShift,
375 EmulatedV8 = 8 << EmulatedShift,
376 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
377
378 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
379 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
380 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000381 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
382 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500383 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
384};
385
Ben Clayton713b8d32019-12-17 20:37:56 +0000386class Value : public Ice::Operand
387{};
388class SwitchCases : public Ice::InstSwitch
389{};
390class BasicBlock : public Ice::CfgNode
391{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500392
393Ice::Type T(Type *t)
394{
395 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
396 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500397}
398
Nicolas Capens157ba262019-12-10 17:49:14 -0500399Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400400{
Ben Clayton713b8d32019-12-17 20:37:56 +0000401 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500402}
403
404Type *T(EmulatedType t)
405{
Ben Clayton713b8d32019-12-17 20:37:56 +0000406 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500407}
408
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500409std::vector<Ice::Type> T(const std::vector<Type *> &types)
410{
411 std::vector<Ice::Type> result;
412 result.reserve(types.size());
413 for(auto &t : types)
414 {
415 result.push_back(T(t));
416 }
417 return result;
418}
419
Nicolas Capens157ba262019-12-10 17:49:14 -0500420Value *V(Ice::Operand *v)
421{
Ben Clayton713b8d32019-12-17 20:37:56 +0000422 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500423}
424
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500425Ice::Operand *V(Value *v)
426{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500427 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500428}
429
Antonio Maiorano62427e02020-02-13 09:18:05 -0500430std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
431{
432 std::vector<Ice::Operand *> result;
433 result.reserve(values.size());
434 for(auto &v : values)
435 {
436 result.push_back(V(v));
437 }
438 return result;
439}
440
Nicolas Capens157ba262019-12-10 17:49:14 -0500441BasicBlock *B(Ice::CfgNode *b)
442{
Ben Clayton713b8d32019-12-17 20:37:56 +0000443 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500444}
445
446static size_t typeSize(Type *type)
447{
448 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400449 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500450 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800451 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000452 case Type_v2i32: return 8;
453 case Type_v4i16: return 8;
454 case Type_v2i16: return 4;
455 case Type_v8i8: return 8;
456 case Type_v4i8: return 4;
457 case Type_v2f32: return 8;
458 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500459 }
460 }
461
462 return Ice::typeWidthInBytes(T(type));
463}
464
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400465static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500466{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400467 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500468 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
469 {
470 Nucleus::createRetVoid();
471 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400472
473 // Connect the entry block to the top of the initial basic block
474 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
475 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500476}
477
Ben Clayton713b8d32019-12-17 20:37:56 +0000478using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
479using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500480
481inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
482{
Ben Clayton713b8d32019-12-17 20:37:56 +0000483 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500484}
485
486inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
487{
488 return &sectionHeader(elfHeader)[index];
489}
490
491static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
492{
493 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
494
495 uint32_t index = relocation.getSymbol();
496 int table = relocationTable.sh_link;
497 void *symbolValue = nullptr;
498
499 if(index != SHN_UNDEF)
500 {
501 if(table == SHN_UNDEF) return nullptr;
502 const SectionHeader *symbolTable = elfSection(elfHeader, table);
503
504 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
505 if(index >= symtab_entries)
506 {
507 ASSERT(index < symtab_entries && "Symbol Index out of range");
508 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800509 }
510
Nicolas Capens157ba262019-12-10 17:49:14 -0500511 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000512 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500513 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800514
Nicolas Capens157ba262019-12-10 17:49:14 -0500515 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400516 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500517 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000518 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400519 }
520 else
521 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500522 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400523 }
Nicolas Capens66478362016-10-13 15:36:36 -0400524 }
525
Nicolas Capens157ba262019-12-10 17:49:14 -0500526 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000527 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500528
529 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400530 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400531 switch(relocation.getType())
532 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000533 case R_ARM_NONE:
534 // No relocation
535 break;
536 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500537 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000538 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500539 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
540 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
541 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400542 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000543 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500544 {
545 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
546 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
547 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400548 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000549 default:
550 ASSERT(false && "Unsupported relocation type");
551 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400552 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500553 }
554 else
555 {
556 switch(relocation.getType())
557 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000558 case R_386_NONE:
559 // No relocation
560 break;
561 case R_386_32:
562 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
563 break;
564 case R_386_PC32:
565 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
566 break;
567 default:
568 ASSERT(false && "Unsupported relocation type");
569 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500570 }
Nicolas Capens66478362016-10-13 15:36:36 -0400571 }
572
Nicolas Capens157ba262019-12-10 17:49:14 -0500573 return symbolValue;
574}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400575
Nicolas Capens157ba262019-12-10 17:49:14 -0500576static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
577{
578 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
579
580 uint32_t index = relocation.getSymbol();
581 int table = relocationTable.sh_link;
582 void *symbolValue = nullptr;
583
584 if(index != SHN_UNDEF)
585 {
586 if(table == SHN_UNDEF) return nullptr;
587 const SectionHeader *symbolTable = elfSection(elfHeader, table);
588
589 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
590 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400591 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500592 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400593 return nullptr;
594 }
595
Nicolas Capens157ba262019-12-10 17:49:14 -0500596 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000597 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500598 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400599
Nicolas Capens157ba262019-12-10 17:49:14 -0500600 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400601 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500602 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000603 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500604 }
605 else
606 {
607 return nullptr;
608 }
609 }
Nicolas Capens66478362016-10-13 15:36:36 -0400610
Nicolas Capens157ba262019-12-10 17:49:14 -0500611 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000612 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
613 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400614
Nicolas Capens157ba262019-12-10 17:49:14 -0500615 switch(relocation.getType())
616 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000617 case R_X86_64_NONE:
618 // No relocation
619 break;
620 case R_X86_64_64:
621 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
622 break;
623 case R_X86_64_PC32:
624 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
625 break;
626 case R_X86_64_32S:
627 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
628 break;
629 default:
630 ASSERT(false && "Unsupported relocation type");
631 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500632 }
633
634 return symbolValue;
635}
636
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400637struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500638{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400639 const void *entry;
640 size_t codeSize = 0;
641};
642
643std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
644{
645 ASSERT(functionNames.size() > 0);
646 std::vector<EntryPoint> entryPoints(functionNames.size());
647
Ben Clayton713b8d32019-12-17 20:37:56 +0000648 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500649
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400650 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500651 if(!elfHeader->checkMagic())
652 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400653 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500654 }
655
656 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000657 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
658#if defined(__i386__)
659 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
660#elif defined(__x86_64__)
661 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
662#elif defined(__arm__)
663 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
664#elif defined(__aarch64__)
665 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
666#elif defined(__mips__)
667 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
668#else
669# error "Unsupported platform"
670#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500671
Ben Clayton713b8d32019-12-17 20:37:56 +0000672 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500673
674 for(int i = 0; i < elfHeader->e_shnum; i++)
675 {
676 if(sectionHeader[i].sh_type == SHT_PROGBITS)
677 {
678 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
679 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400680 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500681 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400682 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500683
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400684 for(size_t j = 0; j < functionNames.size(); ++j)
685 {
686 if(strstr(sectionName, functionNames[j]) != nullptr)
687 {
688 return j;
689 }
690 }
691
692 UNREACHABLE("Failed to find executable section that matches input function names");
693 return static_cast<size_t>(-1);
694 };
695
696 size_t index = findSectionNameEntryIndex();
697 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
698 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400699 }
700 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500701 else if(sectionHeader[i].sh_type == SHT_REL)
702 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000703 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400704
Nicolas Capens157ba262019-12-10 17:49:14 -0500705 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
706 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000707 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500708 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
709 }
710 }
711 else if(sectionHeader[i].sh_type == SHT_RELA)
712 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000713 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500714
715 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
716 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000717 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500718 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
719 }
720 }
721 }
722
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400723 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500724}
725
726template<typename T>
727struct ExecutableAllocator
728{
729 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000730 template<class U>
731 ExecutableAllocator(const ExecutableAllocator<U> &other)
732 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500733
734 using value_type = T;
735 using size_type = std::size_t;
736
737 T *allocate(size_type n)
738 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000739 return (T *)allocateMemoryPages(
740 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500741 }
742
743 void deallocate(T *p, size_type n)
744 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800745 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500746 }
747};
748
749class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
750{
751 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
752 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
753
754public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000755 ELFMemoryStreamer()
756 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500757 {
758 position = 0;
759 buffer.reserve(0x1000);
760 }
761
762 ~ELFMemoryStreamer() override
763 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500764 }
765
766 void write8(uint8_t Value) override
767 {
768 if(position == (uint64_t)buffer.size())
769 {
770 buffer.push_back(Value);
771 position++;
772 }
773 else if(position < (uint64_t)buffer.size())
774 {
775 buffer[position] = Value;
776 position++;
777 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000778 else
779 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500780 }
781
782 void writeBytes(llvm::StringRef Bytes) override
783 {
784 std::size_t oldSize = buffer.size();
785 buffer.resize(oldSize + Bytes.size());
786 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
787 position += Bytes.size();
788 }
789
790 uint64_t tell() const override { return position; }
791
792 void seek(uint64_t Off) override { position = Off; }
793
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400794 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500795 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400796 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500797
798#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500799 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
800#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400801 for(auto &entryPoint : entryPoints)
802 {
803 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
804 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500805#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500806
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400807 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400808 }
809
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500810 void finalize()
811 {
812 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
813
814 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
815 }
816
Ben Clayton713b8d32019-12-17 20:37:56 +0000817 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400818 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500819 ASSERT(func);
820 funcs[index] = func;
821 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400822
Nicolas Capens157ba262019-12-10 17:49:14 -0500823 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400824 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500825 ASSERT(funcs[index]);
826 return funcs[index];
827 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400828
Antonio Maiorano02a39532020-01-21 15:15:34 -0500829 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500830 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500831 // TODO(b/148086935): Replace with a buffer allocator.
832 size_t space = size + alignment;
833 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
834 void *ptr = buf.get();
835 void *alignedPtr = std::align(alignment, size, ptr, space);
836 ASSERT(alignedPtr);
837 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500838 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500839 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500840 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400841
Nicolas Capens157ba262019-12-10 17:49:14 -0500842private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000843 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500844 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
845 std::size_t position;
846 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500847};
848
Antonio Maiorano62427e02020-02-13 09:18:05 -0500849#ifdef ENABLE_RR_PRINT
850void VPrintf(const std::vector<Value *> &vals)
851{
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500852 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(::printf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500853}
854#endif // ENABLE_RR_PRINT
855
Nicolas Capens157ba262019-12-10 17:49:14 -0500856Nucleus::Nucleus()
857{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400858 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500859
860 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
861 Ice::ClFlags::getParsedClFlags(Flags);
862
Ben Clayton713b8d32019-12-17 20:37:56 +0000863#if defined(__arm__)
864 Flags.setTargetArch(Ice::Target_ARM32);
865 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
866#elif defined(__mips__)
867 Flags.setTargetArch(Ice::Target_MIPS32);
868 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
869#else // x86
870 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
871 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
872#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500873 Flags.setOutFileType(Ice::FT_Elf);
874 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
875 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
876 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
877 Flags.setDisableHybridAssembly(true);
878
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500879 // Emit functions into separate sections in the ELF so we can find them by name
880 Flags.setFunctionSections(true);
881
Nicolas Capens157ba262019-12-10 17:49:14 -0500882 static llvm::raw_os_ostream cout(std::cout);
883 static llvm::raw_os_ostream cerr(std::cerr);
884
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500885 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500886 {
887 // Decorate text asm with liveness info
888 Flags.setDecorateAsm(true);
889 }
890
Ben Clayton713b8d32019-12-17 20:37:56 +0000891 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500892 {
893 std::error_code errorCode;
894 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
895 ::elfFile = new Ice::ELFFileStreamer(*out);
896 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
897 }
898 else
899 {
900 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
901 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
902 ::routine = elfMemory;
903 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400904
905 ASSERT(Variable::unmaterializedVariables == nullptr);
906 Variable::unmaterializedVariables = new std::unordered_set<Variable *>();
Nicolas Capens157ba262019-12-10 17:49:14 -0500907}
908
909Nucleus::~Nucleus()
910{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400911 delete Variable::unmaterializedVariables;
912 Variable::unmaterializedVariables = nullptr;
913
Nicolas Capens157ba262019-12-10 17:49:14 -0500914 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500915 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500916
917 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500918 ::allocator = nullptr;
919
Nicolas Capens157ba262019-12-10 17:49:14 -0500920 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500921 ::function = nullptr;
922
Nicolas Capens157ba262019-12-10 17:49:14 -0500923 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500924 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500925
926 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500927 ::elfFile = nullptr;
928
Nicolas Capens157ba262019-12-10 17:49:14 -0500929 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500930 ::out = nullptr;
931
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400932 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500933 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400934 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500935
936 ::codegenMutex.unlock();
937}
938
939void Nucleus::setDefaultConfig(const Config &cfg)
940{
941 std::unique_lock<std::mutex> lock(::defaultConfigLock);
942 ::defaultConfig() = cfg;
943}
944
945void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
946{
947 std::unique_lock<std::mutex> lock(::defaultConfigLock);
948 auto &config = ::defaultConfig();
949 config = cfgEdit.apply(config);
950}
951
952Config Nucleus::getDefaultConfig()
953{
954 std::unique_lock<std::mutex> lock(::defaultConfigLock);
955 return ::defaultConfig();
956}
957
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500958// This function lowers and produces executable binary code in memory for the input functions,
959// and returns a Routine with the entry points to these functions.
960template<size_t Count>
961static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500962{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500963 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
964 // and GlobalContext::emitItems.
965
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500966 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500967 {
968 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500969 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500970 }
971
972 ::context->emitFileHeader();
973
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500974 // Translate
975
976 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500977 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500978 Ice::Cfg *currFunc = functions[i];
979
980 // Install function allocator in TLS for Cfg-specific container allocators
981 Ice::CfgLocalAllocatorScope allocScope(currFunc);
982
983 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
984
985 rr::optimize(currFunc);
986
987 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500988 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500989
990 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500991 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500992
993 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
994
995 if(subzeroEmitTextAsm)
996 {
997 currFunc->emit();
998 }
999
1000 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -05001001 }
1002
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001003 // Emit items
1004
1005 ::context->lowerGlobals("");
1006
Nicolas Capens157ba262019-12-10 17:49:14 -05001007 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001008
1009 for(size_t i = 0; i < Count; ++i)
1010 {
1011 Ice::Cfg *currFunc = functions[i];
1012
1013 // Accumulate globals from functions to emit into the "last" section at the end
1014 auto globals = currFunc->getGlobalInits();
1015 if(globals && !globals->empty())
1016 {
1017 ::context->getGlobals()->merge(globals.get());
1018 }
1019
1020 auto assembler = currFunc->releaseAssembler();
1021 assembler->alignFunction();
1022 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1023 }
1024
Nicolas Capens157ba262019-12-10 17:49:14 -05001025 ::context->lowerGlobals("last");
1026 ::context->lowerConstants();
1027 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001028
Nicolas Capens157ba262019-12-10 17:49:14 -05001029 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001030 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001031 objectWriter->writeNonUserSections();
1032
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001033 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001034 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1035 ASSERT(entryPoints.size() == Count);
1036 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001037 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001038 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001039 }
1040
1041 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001042
1043 Routine *handoffRoutine = ::routine;
1044 ::routine = nullptr;
1045
1046 return std::shared_ptr<Routine>(handoffRoutine);
1047}
1048
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001049std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1050{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001051 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001052 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1053}
1054
Nicolas Capens157ba262019-12-10 17:49:14 -05001055Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1056{
1057 Ice::Type type = T(t);
1058 int typeSize = Ice::typeWidthInBytes(type);
1059 int totalSize = typeSize * (arraySize ? arraySize : 1);
1060
1061 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1062 auto address = ::function->makeVariable(T(getPointerType(t)));
1063 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1064 ::function->getEntryNode()->getInsts().push_front(alloca);
1065
1066 return V(address);
1067}
1068
1069BasicBlock *Nucleus::createBasicBlock()
1070{
1071 return B(::function->makeNode());
1072}
1073
1074BasicBlock *Nucleus::getInsertBlock()
1075{
1076 return B(::basicBlock);
1077}
1078
1079void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1080{
Ben Clayton713b8d32019-12-17 20:37:56 +00001081 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001082
1083 Variable::materializeAll();
1084
1085 ::basicBlock = basicBlock;
1086}
1087
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001088void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001089{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001090 ASSERT(::function == nullptr);
1091 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001092 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001093 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001094 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001095
1096 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1097
1098 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1099 // becomes invalid if another one is created; for example, when creating await and destroy functions
1100 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1101 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001102 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1103
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001104 ::entryBlock = ::function->getEntryNode();
1105 ::basicBlock = ::function->makeNode();
1106 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001107}
1108
1109Value *Nucleus::getArgument(unsigned int index)
1110{
1111 return V(::function->getArgs()[index]);
1112}
1113
1114void Nucleus::createRetVoid()
1115{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001116 RR_DEBUG_INFO_UPDATE_LOC();
1117
Nicolas Capens157ba262019-12-10 17:49:14 -05001118 // Code generated after this point is unreachable, so any variables
1119 // being read can safely return an undefined value. We have to avoid
1120 // materializing variables after the terminator ret instruction.
1121 Variable::killUnmaterialized();
1122
1123 Ice::InstRet *ret = Ice::InstRet::create(::function);
1124 ::basicBlock->appendInst(ret);
1125}
1126
1127void Nucleus::createRet(Value *v)
1128{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001129 RR_DEBUG_INFO_UPDATE_LOC();
1130
Nicolas Capens157ba262019-12-10 17:49:14 -05001131 // Code generated after this point is unreachable, so any variables
1132 // being read can safely return an undefined value. We have to avoid
1133 // materializing variables after the terminator ret instruction.
1134 Variable::killUnmaterialized();
1135
1136 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1137 ::basicBlock->appendInst(ret);
1138}
1139
1140void Nucleus::createBr(BasicBlock *dest)
1141{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001142 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001143 Variable::materializeAll();
1144
1145 auto br = Ice::InstBr::create(::function, dest);
1146 ::basicBlock->appendInst(br);
1147}
1148
1149void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1150{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001151 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001152 Variable::materializeAll();
1153
1154 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1155 ::basicBlock->appendInst(br);
1156}
1157
1158static bool isCommutative(Ice::InstArithmetic::OpKind op)
1159{
1160 switch(op)
1161 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001162 case Ice::InstArithmetic::Add:
1163 case Ice::InstArithmetic::Fadd:
1164 case Ice::InstArithmetic::Mul:
1165 case Ice::InstArithmetic::Fmul:
1166 case Ice::InstArithmetic::And:
1167 case Ice::InstArithmetic::Or:
1168 case Ice::InstArithmetic::Xor:
1169 return true;
1170 default:
1171 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001172 }
1173}
1174
1175static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1176{
1177 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1178
1179 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1180
1181 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1182 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1183 ::basicBlock->appendInst(arithmetic);
1184
1185 return V(result);
1186}
1187
1188Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001190 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001191 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1192}
1193
1194Value *Nucleus::createSub(Value *lhs, Value *rhs)
1195{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001196 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001197 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1198}
1199
1200Value *Nucleus::createMul(Value *lhs, Value *rhs)
1201{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001202 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001203 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1204}
1205
1206Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1207{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001208 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001209 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1210}
1211
1212Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1213{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001214 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001215 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1216}
1217
1218Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1219{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001220 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001221 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1222}
1223
1224Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1225{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001226 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001227 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1228}
1229
1230Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1231{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001232 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001233 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1234}
1235
1236Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1237{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001238 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001239 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1240}
1241
1242Value *Nucleus::createURem(Value *lhs, Value *rhs)
1243{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001244 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001245 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1246}
1247
1248Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1249{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001250 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001251 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1252}
1253
1254Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1255{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001256 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001257 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1258 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001259 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001260 return nullptr;
1261}
1262
1263RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1264{
1265 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001266}
1267
1268Value *Nucleus::createShl(Value *lhs, Value *rhs)
1269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001271 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1272}
1273
1274Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001277 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1278}
1279
1280Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001282 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001283 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1284}
1285
1286Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001289 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1290}
1291
1292Value *Nucleus::createOr(Value *lhs, Value *rhs)
1293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001295 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1296}
1297
1298Value *Nucleus::createXor(Value *lhs, Value *rhs)
1299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001301 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1302}
1303
1304Value *Nucleus::createNeg(Value *v)
1305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001307 return createSub(createNullValue(T(v->getType())), v);
1308}
1309
1310Value *Nucleus::createFNeg(Value *v)
1311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001312 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001313 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1314 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001315
1316 return createFSub(negativeZero, v);
1317}
1318
1319Value *Nucleus::createNot(Value *v)
1320{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001321 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001322 if(Ice::isScalarIntegerType(v->getType()))
1323 {
1324 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1325 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001326 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001327 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001328 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001329 return createXor(v, createConstantVector(c, T(v->getType())));
1330 }
1331}
1332
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001333static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1334{
1335#if defined(__i386__) || defined(__x86_64__)
1336 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1337 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1338 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1339#else
1340 if(atomic)
1341 {
1342 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1343 }
1344 if(memoryOrder != std::memory_order_relaxed)
1345 {
1346 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1347 }
1348#endif
1349
1350 // Vulkan doesn't allow sequential memory order
1351 ASSERT(memoryOrder != std::memory_order_seq_cst);
1352}
1353
Nicolas Capens157ba262019-12-10 17:49:14 -05001354Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1355{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001356 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001357 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001358
1359 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001360 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001361
Ben Clayton713b8d32019-12-17 20:37:56 +00001362 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001363 {
1364 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001365 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001366 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001367 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001368 auto pointer = RValue<Pointer<Byte>>(ptr);
1369 Int x = *Pointer<Int>(pointer);
1370
1371 Int4 vector;
1372 vector = Insert(vector, x, 0);
1373
Antonio Maiorano02a39532020-01-21 15:15:34 -05001374 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001375 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1376 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001377 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001378 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001379 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001380 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001381 auto pointer = RValue<Pointer<Byte>>(ptr);
1382 Int x = *Pointer<Int>(pointer);
1383 Int y = *Pointer<Int>(pointer + 4);
1384
1385 Int4 vector;
1386 vector = Insert(vector, x, 0);
1387 vector = Insert(vector, y, 1);
1388
Antonio Maiorano02a39532020-01-21 15:15:34 -05001389 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001390 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1391 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001392 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001393 else
1394 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001395 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001396 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001397 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001398 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001399 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001400 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001401 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1402 load->addArg(ptr);
1403 load->addArg(::context->getConstantInt32(typeSize(type)));
1404 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001405 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001406 }
1407 else
1408 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001409 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001410 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001411
Antonio Maiorano02a39532020-01-21 15:15:34 -05001412 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001413 return V(result);
1414}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001415
Nicolas Capens157ba262019-12-10 17:49:14 -05001416Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001418 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001419 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001420
Ben Clayton713b8d32019-12-17 20:37:56 +00001421#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001422 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001423 if(align != 0)
1424 {
1425 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1426 call->addArg(ptr);
1427 call->addArg(::context->getConstantInt64(typeSize(type)));
1428 ::basicBlock->appendInst(call);
1429 }
1430#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001431
Nicolas Capens157ba262019-12-10 17:49:14 -05001432 int valueType = (int)reinterpret_cast<intptr_t>(type);
1433
Ben Clayton713b8d32019-12-17 20:37:56 +00001434 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001435 {
1436 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001437 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001438 if(typeSize(type) == 4)
1439 {
1440 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1441 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1442 ::basicBlock->appendInst(bitcast);
1443
1444 RValue<Int4> v(V(vector));
1445
1446 auto pointer = RValue<Pointer<Byte>>(ptr);
1447 Int x = Extract(v, 0);
1448 *Pointer<Int>(pointer) = x;
1449 }
1450 else if(typeSize(type) == 8)
1451 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001452 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001453 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1454 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1455 ::basicBlock->appendInst(bitcast);
1456
1457 RValue<Int4> v(V(vector));
1458
1459 auto pointer = RValue<Pointer<Byte>>(ptr);
1460 Int x = Extract(v, 0);
1461 *Pointer<Int>(pointer) = x;
1462 Int y = Extract(v, 1);
1463 *Pointer<Int>(pointer + 4) = y;
1464 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001465 else
1466 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001467 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001468 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001469 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001470 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001471 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1472 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1473 store->addArg(value);
1474 store->addArg(ptr);
1475 store->addArg(::context->getConstantInt32(typeSize(type)));
1476 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001477 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001478 }
1479 else
1480 {
1481 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001482
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001483 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001484 ::basicBlock->appendInst(store);
1485 }
1486
1487 return value;
1488}
1489
1490Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1491{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001492 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001493 ASSERT(index->getType() == Ice::IceType_i32);
1494
1495 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1496 {
1497 int32_t offset = constant->getValue() * (int)typeSize(type);
1498
1499 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001500 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001501 return ptr;
1502 }
1503
Nicolas Capens157ba262019-12-10 17:49:14 -05001504 return createAdd(ptr, createConstantInt(offset));
1505 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001506
Nicolas Capens157ba262019-12-10 17:49:14 -05001507 if(!Ice::isByteSizedType(T(type)))
1508 {
1509 index = createMul(index, createConstantInt((int)typeSize(type)));
1510 }
1511
Ben Clayton713b8d32019-12-17 20:37:56 +00001512 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001513 {
1514 if(unsignedIndex)
1515 {
1516 index = createZExt(index, T(Ice::IceType_i64));
1517 }
1518 else
1519 {
1520 index = createSExt(index, T(Ice::IceType_i64));
1521 }
1522 }
1523
1524 return createAdd(ptr, index);
1525}
1526
Antonio Maiorano370cba52019-12-31 11:36:07 -05001527static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1528{
1529 Ice::Variable *result = ::function->makeVariable(value->getType());
1530
1531 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1532 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1533 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1534 auto op = ::context->getConstantInt32(rmwOp);
1535 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1536 inst->addArg(op);
1537 inst->addArg(ptr);
1538 inst->addArg(value);
1539 inst->addArg(order);
1540 ::basicBlock->appendInst(inst);
1541
1542 return V(result);
1543}
1544
Nicolas Capens157ba262019-12-10 17:49:14 -05001545Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1546{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001547 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001548 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001549}
1550
1551Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1552{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001553 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001554 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001555}
1556
1557Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1558{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001559 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001560 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001561}
1562
1563Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1564{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001565 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001566 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001567}
1568
1569Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1570{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001571 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001572 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001573}
1574
1575Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1576{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001577 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001578 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001579}
1580
1581Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1582{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001583 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001584 Ice::Variable *result = ::function->makeVariable(value->getType());
1585
1586 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1587 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1588 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1589 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1590 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1591 inst->addArg(ptr);
1592 inst->addArg(compare);
1593 inst->addArg(value);
1594 inst->addArg(orderEq);
1595 inst->addArg(orderNeq);
1596 ::basicBlock->appendInst(inst);
1597
1598 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001599}
1600
1601static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1602{
1603 if(v->getType() == T(destType))
1604 {
1605 return v;
1606 }
1607
1608 Ice::Variable *result = ::function->makeVariable(T(destType));
1609 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1610 ::basicBlock->appendInst(cast);
1611
1612 return V(result);
1613}
1614
1615Value *Nucleus::createTrunc(Value *v, Type *destType)
1616{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001617 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001618 return createCast(Ice::InstCast::Trunc, v, destType);
1619}
1620
1621Value *Nucleus::createZExt(Value *v, Type *destType)
1622{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001623 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001624 return createCast(Ice::InstCast::Zext, v, destType);
1625}
1626
1627Value *Nucleus::createSExt(Value *v, Type *destType)
1628{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001629 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001630 return createCast(Ice::InstCast::Sext, v, destType);
1631}
1632
1633Value *Nucleus::createFPToUI(Value *v, Type *destType)
1634{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001635 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001636 return createCast(Ice::InstCast::Fptoui, v, destType);
1637}
1638
1639Value *Nucleus::createFPToSI(Value *v, Type *destType)
1640{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001641 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001642 return createCast(Ice::InstCast::Fptosi, v, destType);
1643}
1644
1645Value *Nucleus::createSIToFP(Value *v, Type *destType)
1646{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001647 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001648 return createCast(Ice::InstCast::Sitofp, v, destType);
1649}
1650
1651Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1652{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001653 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001654 return createCast(Ice::InstCast::Fptrunc, v, destType);
1655}
1656
1657Value *Nucleus::createFPExt(Value *v, Type *destType)
1658{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001659 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001660 return createCast(Ice::InstCast::Fpext, v, destType);
1661}
1662
1663Value *Nucleus::createBitCast(Value *v, Type *destType)
1664{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001665 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001666 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1667 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1668 // emulate them by writing to the stack and reading back as the destination type.
1669 if(emulateMismatchedBitCast)
1670 {
1671 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1672 {
1673 Value *address = allocateStackVariable(destType);
1674 createStore(v, address, T(v->getType()));
1675 return createLoad(address, destType);
1676 }
1677 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1678 {
1679 Value *address = allocateStackVariable(T(v->getType()));
1680 createStore(v, address, T(v->getType()));
1681 return createLoad(address, destType);
1682 }
1683 }
1684
1685 return createCast(Ice::InstCast::Bitcast, v, destType);
1686}
1687
1688static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1689{
1690 ASSERT(lhs->getType() == rhs->getType());
1691
1692 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1693 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1694 ::basicBlock->appendInst(cmp);
1695
1696 return V(result);
1697}
1698
1699Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1700{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001701 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001702 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1703}
1704
1705Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1706{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001707 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001708 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1709}
1710
1711Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1712{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001713 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001714 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1715}
1716
1717Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1718{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001719 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001720 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1721}
1722
1723Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1724{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001725 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001726 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1727}
1728
1729Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1730{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001731 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001732 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1733}
1734
1735Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1736{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001737 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001738 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1739}
1740
1741Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1742{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001743 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001744 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1745}
1746
1747Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1748{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001749 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001750 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1751}
1752
1753Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1754{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001755 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001756 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1757}
1758
1759Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1760{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001761 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001762 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1763}
1764
1765static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1766{
1767 ASSERT(lhs->getType() == rhs->getType());
1768 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1769
1770 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1771 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1772 ::basicBlock->appendInst(cmp);
1773
1774 return V(result);
1775}
1776
1777Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001780 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1781}
1782
1783Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1784{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001785 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001786 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1787}
1788
1789Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001792 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1793}
1794
1795Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001798 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1799}
1800
1801Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1802{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001803 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001804 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1805}
1806
1807Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1808{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001809 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001810 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1811}
1812
1813Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1814{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001815 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001816 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1817}
1818
1819Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1820{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001821 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001822 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1823}
1824
1825Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1826{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001827 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001828 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1829}
1830
1831Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1832{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001833 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001834 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1835}
1836
1837Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001840 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1841}
1842
1843Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001846 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1847}
1848
1849Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001852 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1853}
1854
1855Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001858 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1859}
1860
1861Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001864 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001865 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001866 ::basicBlock->appendInst(extract);
1867
1868 return V(result);
1869}
1870
1871Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1872{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001873 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001874 auto result = ::function->makeVariable(vector->getType());
1875 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1876 ::basicBlock->appendInst(insert);
1877
1878 return V(result);
1879}
1880
1881Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1882{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001883 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001884 ASSERT(V1->getType() == V2->getType());
1885
1886 int size = Ice::typeNumElements(V1->getType());
1887 auto result = ::function->makeVariable(V1->getType());
1888 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1889
1890 for(int i = 0; i < size; i++)
1891 {
1892 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1893 }
1894
1895 ::basicBlock->appendInst(shuffle);
1896
1897 return V(result);
1898}
1899
1900Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1901{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001902 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001903 ASSERT(ifTrue->getType() == ifFalse->getType());
1904
1905 auto result = ::function->makeVariable(ifTrue->getType());
1906 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1907 ::basicBlock->appendInst(select);
1908
1909 return V(result);
1910}
1911
1912SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1913{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001914 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001915 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1916 ::basicBlock->appendInst(switchInst);
1917
Ben Clayton713b8d32019-12-17 20:37:56 +00001918 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001919}
1920
1921void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1922{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001923 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001924 switchCases->addBranch(label, label, branch);
1925}
1926
1927void Nucleus::createUnreachable()
1928{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001929 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001930 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1931 ::basicBlock->appendInst(unreachable);
1932}
1933
Antonio Maiorano62427e02020-02-13 09:18:05 -05001934Type *Nucleus::getType(Value *value)
1935{
1936 return T(V(value)->getType());
1937}
1938
1939Type *Nucleus::getContainedType(Type *vectorType)
1940{
1941 Ice::Type vecTy = T(vectorType);
1942 switch(vecTy)
1943 {
1944 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1945 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1946 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1947 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1948 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1949 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1950 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1951 default:
1952 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1953 return {};
1954 }
1955}
1956
Nicolas Capens157ba262019-12-10 17:49:14 -05001957Type *Nucleus::getPointerType(Type *ElementType)
1958{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001959 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001960}
1961
Antonio Maiorano62427e02020-02-13 09:18:05 -05001962static constexpr Ice::Type getNaturalIntType()
1963{
1964 constexpr size_t intSize = sizeof(int);
1965 static_assert(intSize == 4 || intSize == 8, "");
1966 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1967}
1968
1969Type *Nucleus::getPrintfStorageType(Type *valueType)
1970{
1971 Ice::Type valueTy = T(valueType);
1972 switch(valueTy)
1973 {
1974 case Ice::IceType_i32:
1975 return T(getNaturalIntType());
1976
1977 case Ice::IceType_f32:
1978 return T(Ice::IceType_f64);
1979
1980 default:
1981 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1982 return {};
1983 }
1984}
1985
Nicolas Capens157ba262019-12-10 17:49:14 -05001986Value *Nucleus::createNullValue(Type *Ty)
1987{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001988 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001989 if(Ice::isVectorType(T(Ty)))
1990 {
1991 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001992 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001993 return createConstantVector(c, Ty);
1994 }
1995 else
1996 {
1997 return V(::context->getConstantZero(T(Ty)));
1998 }
1999}
2000
2001Value *Nucleus::createConstantLong(int64_t i)
2002{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002003 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002004 return V(::context->getConstantInt64(i));
2005}
2006
2007Value *Nucleus::createConstantInt(int i)
2008{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002009 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002010 return V(::context->getConstantInt32(i));
2011}
2012
2013Value *Nucleus::createConstantInt(unsigned int i)
2014{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002015 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002016 return V(::context->getConstantInt32(i));
2017}
2018
2019Value *Nucleus::createConstantBool(bool b)
2020{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002021 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002022 return V(::context->getConstantInt1(b));
2023}
2024
2025Value *Nucleus::createConstantByte(signed char i)
2026{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002027 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002028 return V(::context->getConstantInt8(i));
2029}
2030
2031Value *Nucleus::createConstantByte(unsigned char i)
2032{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002033 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002034 return V(::context->getConstantInt8(i));
2035}
2036
2037Value *Nucleus::createConstantShort(short i)
2038{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002039 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002040 return V(::context->getConstantInt16(i));
2041}
2042
2043Value *Nucleus::createConstantShort(unsigned short i)
2044{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002045 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002046 return V(::context->getConstantInt16(i));
2047}
2048
2049Value *Nucleus::createConstantFloat(float x)
2050{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002051 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002052 return V(::context->getConstantFloat(x));
2053}
2054
2055Value *Nucleus::createNullPointer(Type *Ty)
2056{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002057 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002058 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002059}
2060
Antonio Maiorano02a39532020-01-21 15:15:34 -05002061static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2062{
2063 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2064}
2065
Nicolas Capens157ba262019-12-10 17:49:14 -05002066Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2067{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002068 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002069 const int vectorSize = 16;
2070 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2071 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002072
2073 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002074 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002075
Antonio Maioranoa0957112020-03-04 15:06:19 -05002076 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002077 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002078
2079 switch((int)reinterpret_cast<intptr_t>(type))
2080 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002081 case Ice::IceType_v4i32:
2082 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002083 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002084 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002085 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002086 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002087 }
2088 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002089 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002090 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002091 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002092 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002093 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002094 }
2095 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002096 case Ice::IceType_v8i16:
2097 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002098 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002099 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002100 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002101 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002102 }
2103 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002104 case Ice::IceType_v16i8:
2105 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002106 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002107 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002108 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002109 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002110 }
2111 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002112 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002113 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002114 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002115 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002116 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002117 }
2118 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002119 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002120 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002121 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002122 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002123 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002124 }
2125 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002126 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002127 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002128 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002129 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002130 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002131 }
2132 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002133 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002134 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002135 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002136 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002137 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002138 }
2139 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002140 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002141 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002142 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002143 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002144 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002145 }
2146 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002147 default:
2148 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002149 }
2150
Antonio Maiorano02a39532020-01-21 15:15:34 -05002151 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002152
Antonio Maiorano02a39532020-01-21 15:15:34 -05002153 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002154 return V(result);
2155}
2156
2157Value *Nucleus::createConstantVector(const double *constants, Type *type)
2158{
Ben Clayton713b8d32019-12-17 20:37:56 +00002159 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002160}
2161
Antonio Maiorano62427e02020-02-13 09:18:05 -05002162Value *Nucleus::createConstantString(const char *v)
2163{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002164 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002165 return V(IceConstantData(v, strlen(v) + 1));
2166}
2167
Nicolas Capens519cf222020-05-08 15:27:19 -04002168Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002169{
2170 return T(Ice::IceType_void);
2171}
2172
Nicolas Capens519cf222020-05-08 15:27:19 -04002173Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002174{
2175 return T(Ice::IceType_i1);
2176}
2177
Nicolas Capens519cf222020-05-08 15:27:19 -04002178Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002179{
2180 return T(Ice::IceType_i8);
2181}
2182
Nicolas Capens519cf222020-05-08 15:27:19 -04002183Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002184{
2185 return T(Ice::IceType_i8);
2186}
2187
Nicolas Capens519cf222020-05-08 15:27:19 -04002188Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002189{
2190 return T(Ice::IceType_i16);
2191}
2192
Nicolas Capens519cf222020-05-08 15:27:19 -04002193Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002194{
2195 return T(Ice::IceType_i16);
2196}
2197
Nicolas Capens519cf222020-05-08 15:27:19 -04002198Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002199{
2200 return T(Type_v4i8);
2201}
2202
Nicolas Capens519cf222020-05-08 15:27:19 -04002203Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002204{
2205 return T(Type_v4i8);
2206}
2207
Ben Clayton713b8d32019-12-17 20:37:56 +00002208namespace {
2209RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002210{
Ben Clayton713b8d32019-12-17 20:37:56 +00002211 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002212}
2213
Ben Clayton713b8d32019-12-17 20:37:56 +00002214RValue<Byte> Extract(RValue<Byte8> val, int i)
2215{
Nicolas Capens519cf222020-05-08 15:27:19 -04002216 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002217}
2218
2219RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2220{
2221 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
2222}
2223} // namespace
2224
Nicolas Capens157ba262019-12-10 17:49:14 -05002225RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2226{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002227 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002228 if(emulateIntrinsics)
2229 {
2230 Byte8 result;
2231 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2232 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2233 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2234 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2235 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2236 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2237 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2238 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2239
2240 return result;
2241 }
2242 else
2243 {
2244 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002245 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002246 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2247 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2248 paddusb->addArg(x.value);
2249 paddusb->addArg(y.value);
2250 ::basicBlock->appendInst(paddusb);
2251
2252 return RValue<Byte8>(V(result));
2253 }
2254}
2255
2256RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2257{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002258 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002259 if(emulateIntrinsics)
2260 {
2261 Byte8 result;
2262 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2263 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2264 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2265 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2266 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2267 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2268 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2269 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2270
2271 return result;
2272 }
2273 else
2274 {
2275 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002276 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002277 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2278 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2279 psubusw->addArg(x.value);
2280 psubusw->addArg(y.value);
2281 ::basicBlock->appendInst(psubusw);
2282
2283 return RValue<Byte8>(V(result));
2284 }
2285}
2286
2287RValue<SByte> Extract(RValue<SByte8> val, int i)
2288{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002289 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens519cf222020-05-08 15:27:19 -04002290 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002291}
2292
2293RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2294{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002295 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002296 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2297}
2298
2299RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2300{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002301 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002302 if(emulateIntrinsics)
2303 {
2304 SByte8 result;
2305 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2306 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2307 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2308 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2309 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2310 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2311 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2312 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2313
2314 return result;
2315 }
2316 else
2317 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002318#if defined(__i386__) || defined(__x86_64__)
2319 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2320 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2321 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002322
Ben Clayton713b8d32019-12-17 20:37:56 +00002323 return As<SByte8>(hi | lo);
2324#else
2325 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2326#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002327 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002328}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002329
Nicolas Capens157ba262019-12-10 17:49:14 -05002330RValue<Int> SignMask(RValue<Byte8> x)
2331{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002332 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002333 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002334 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002335 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2336 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002337 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002338 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002339 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002340 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002341 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002342 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2343 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2344 movmsk->addArg(x.value);
2345 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002346
Nicolas Capens157ba262019-12-10 17:49:14 -05002347 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002348 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002349}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002350
2351// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2352// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05002353// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002354// }
2355
Nicolas Capens157ba262019-12-10 17:49:14 -05002356RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2357{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002358 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002359 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2360}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002361
Nicolas Capens519cf222020-05-08 15:27:19 -04002362Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002363{
2364 return T(Type_v8i8);
2365}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002366
Nicolas Capens598f8d82016-09-26 15:09:10 -04002367// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2368// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002369// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002370// }
2371
2372// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2373// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002374// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002375// }
2376
Nicolas Capens157ba262019-12-10 17:49:14 -05002377RValue<SByte> SaturateSigned(RValue<Short> x)
2378{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002379 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002380 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2381}
2382
2383RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2384{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002385 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002386 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002387 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002388 SByte8 result;
2389 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2390 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2391 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2392 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2393 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2394 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2395 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2396 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002397
Nicolas Capens157ba262019-12-10 17:49:14 -05002398 return result;
2399 }
2400 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002401 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002402 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002403 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002404 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2405 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2406 paddsb->addArg(x.value);
2407 paddsb->addArg(y.value);
2408 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002409
Nicolas Capens157ba262019-12-10 17:49:14 -05002410 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002411 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002412}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002413
Nicolas Capens157ba262019-12-10 17:49:14 -05002414RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2415{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002416 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002417 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002418 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002419 SByte8 result;
2420 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2421 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2422 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2423 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2424 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2425 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2426 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2427 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002428
Nicolas Capens157ba262019-12-10 17:49:14 -05002429 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002430 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002431 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002432 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002433 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002434 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002435 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2436 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2437 psubsb->addArg(x.value);
2438 psubsb->addArg(y.value);
2439 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002440
Nicolas Capens157ba262019-12-10 17:49:14 -05002441 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002442 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002443}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002444
Nicolas Capens157ba262019-12-10 17:49:14 -05002445RValue<Int> SignMask(RValue<SByte8> x)
2446{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002447 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002448 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002449 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002450 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2451 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002452 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002453 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002454 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002455 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002456 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002457 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2458 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2459 movmsk->addArg(x.value);
2460 ::basicBlock->appendInst(movmsk);
2461
2462 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002463 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002464}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002465
Nicolas Capens157ba262019-12-10 17:49:14 -05002466RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2467{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002468 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002469 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2470}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002471
Nicolas Capens157ba262019-12-10 17:49:14 -05002472RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2473{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002474 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002475 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2476}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002477
Nicolas Capens519cf222020-05-08 15:27:19 -04002478Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002479{
2480 return T(Type_v8i8);
2481}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002482
Nicolas Capens519cf222020-05-08 15:27:19 -04002483Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002484{
2485 return T(Ice::IceType_v16i8);
2486}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002487
Nicolas Capens519cf222020-05-08 15:27:19 -04002488Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002489{
2490 return T(Ice::IceType_v16i8);
2491}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002492
Nicolas Capens519cf222020-05-08 15:27:19 -04002493Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002494{
2495 return T(Type_v2i16);
2496}
Nicolas Capensd4227962016-11-09 14:24:25 -05002497
Nicolas Capens519cf222020-05-08 15:27:19 -04002498Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002499{
2500 return T(Type_v2i16);
2501}
Nicolas Capensd4227962016-11-09 14:24:25 -05002502
Nicolas Capens157ba262019-12-10 17:49:14 -05002503Short4::Short4(RValue<Int4> cast)
2504{
Ben Clayton713b8d32019-12-17 20:37:56 +00002505 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens519cf222020-05-08 15:27:19 -04002506 Value *short8 = Nucleus::createBitCast(cast.value, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002507 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2508
2509 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
Nicolas Capens519cf222020-05-08 15:27:19 -04002510 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002511
2512 storeValue(short4);
2513}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002514
2515// Short4::Short4(RValue<Float> cast)
2516// {
2517// }
2518
Nicolas Capens157ba262019-12-10 17:49:14 -05002519Short4::Short4(RValue<Float4> cast)
2520{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002521 // TODO(b/150791192): Generalize and optimize
2522 auto smin = std::numeric_limits<short>::min();
2523 auto smax = std::numeric_limits<short>::max();
2524 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002525}
2526
2527RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2528{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002529 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002530 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002531 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002532 Short4 result;
2533 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2534 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2535 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2536 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002537
2538 return result;
2539 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002540 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002541 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002542 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2543 }
2544}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002545
Nicolas Capens157ba262019-12-10 17:49:14 -05002546RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2547{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002548 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002549 if(emulateIntrinsics)
2550 {
2551 Short4 result;
2552 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2553 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2554 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2555 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2556
2557 return result;
2558 }
2559 else
2560 {
2561 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2562 }
2563}
2564
2565RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2566{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002567 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002568 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2569 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2570 ::basicBlock->appendInst(cmp);
2571
2572 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2573 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2574 ::basicBlock->appendInst(select);
2575
2576 return RValue<Short4>(V(result));
2577}
2578
2579RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2580{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002581 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002582 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2583 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2584 ::basicBlock->appendInst(cmp);
2585
2586 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2587 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2588 ::basicBlock->appendInst(select);
2589
2590 return RValue<Short4>(V(result));
2591}
2592
2593RValue<Short> SaturateSigned(RValue<Int> x)
2594{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002595 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002596 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2597}
2598
2599RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2600{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002601 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002602 if(emulateIntrinsics)
2603 {
2604 Short4 result;
2605 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2606 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2607 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2608 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2609
2610 return result;
2611 }
2612 else
2613 {
2614 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002615 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002616 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2617 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2618 paddsw->addArg(x.value);
2619 paddsw->addArg(y.value);
2620 ::basicBlock->appendInst(paddsw);
2621
2622 return RValue<Short4>(V(result));
2623 }
2624}
2625
2626RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2627{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002628 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002629 if(emulateIntrinsics)
2630 {
2631 Short4 result;
2632 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2633 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2634 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2635 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2636
2637 return result;
2638 }
2639 else
2640 {
2641 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002642 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002643 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2644 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2645 psubsw->addArg(x.value);
2646 psubsw->addArg(y.value);
2647 ::basicBlock->appendInst(psubsw);
2648
2649 return RValue<Short4>(V(result));
2650 }
2651}
2652
2653RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2654{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002655 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002656 if(emulateIntrinsics)
2657 {
2658 Short4 result;
2659 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2660 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2661 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2662 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2663
2664 return result;
2665 }
2666 else
2667 {
2668 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002669 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002670 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2671 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2672 pmulhw->addArg(x.value);
2673 pmulhw->addArg(y.value);
2674 ::basicBlock->appendInst(pmulhw);
2675
2676 return RValue<Short4>(V(result));
2677 }
2678}
2679
2680RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2681{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002682 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002683 if(emulateIntrinsics)
2684 {
2685 Int2 result;
2686 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2687 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2688
2689 return result;
2690 }
2691 else
2692 {
2693 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002694 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002695 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2696 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2697 pmaddwd->addArg(x.value);
2698 pmaddwd->addArg(y.value);
2699 ::basicBlock->appendInst(pmaddwd);
2700
2701 return As<Int2>(V(result));
2702 }
2703}
2704
2705RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2706{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002707 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002708 if(emulateIntrinsics)
2709 {
2710 SByte8 result;
2711 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2712 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2713 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2714 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2715 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2716 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2717 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2718 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2719
2720 return result;
2721 }
2722 else
2723 {
2724 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002725 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002726 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2727 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2728 pack->addArg(x.value);
2729 pack->addArg(y.value);
2730 ::basicBlock->appendInst(pack);
2731
2732 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2733 }
2734}
2735
2736RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2737{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002738 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002739 if(emulateIntrinsics)
2740 {
2741 Byte8 result;
2742 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2743 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2744 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2745 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2746 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2747 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2748 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2749 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2750
2751 return result;
2752 }
2753 else
2754 {
2755 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002756 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002757 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2758 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2759 pack->addArg(x.value);
2760 pack->addArg(y.value);
2761 ::basicBlock->appendInst(pack);
2762
2763 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2764 }
2765}
2766
2767RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2768{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002769 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002770 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2771}
2772
2773RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2774{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002775 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002776 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2777}
2778
Nicolas Capens519cf222020-05-08 15:27:19 -04002779Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002780{
2781 return T(Type_v4i16);
2782}
2783
2784UShort4::UShort4(RValue<Float4> cast, bool saturate)
2785{
2786 if(saturate)
2787 {
2788 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002789 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002790 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2791 // PackUnsigned takes care of 0x0000 saturation.
2792 Int4 int4(Min(cast, Float4(0xFFFF)));
2793 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002794 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002795 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002796 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002797 // ARM saturates the 32-bit integer result on overflow/undeflow.
2798 Int4 int4(cast);
2799 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002800 }
2801 else
2802 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002803 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002804 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002805 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002806 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002807 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002808 *this = Short4(Int4(cast));
2809 }
2810}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002811
Nicolas Capens157ba262019-12-10 17:49:14 -05002812RValue<UShort> Extract(RValue<UShort4> val, int i)
2813{
Nicolas Capens519cf222020-05-08 15:27:19 -04002814 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002815}
2816
2817RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2818{
2819 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2820}
2821
2822RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2823{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002824 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002825 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002826
Nicolas Capens157ba262019-12-10 17:49:14 -05002827 {
2828 UShort4 result;
2829 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2830 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2831 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2832 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2833
2834 return result;
2835 }
2836 else
2837 {
2838 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2839 }
2840}
2841
2842RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2843{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002844 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002845 if(emulateIntrinsics)
2846 {
2847 UShort4 result;
2848 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2849 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2850 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2851 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2852
2853 return result;
2854 }
2855 else
2856 {
2857 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2858 }
2859}
2860
2861RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002864 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2865 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2866 ::basicBlock->appendInst(cmp);
2867
2868 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2869 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2870 ::basicBlock->appendInst(select);
2871
2872 return RValue<UShort4>(V(result));
2873}
2874
2875RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2876{
2877 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2878 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2879 ::basicBlock->appendInst(cmp);
2880
2881 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2882 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2883 ::basicBlock->appendInst(select);
2884
2885 return RValue<UShort4>(V(result));
2886}
2887
2888RValue<UShort> SaturateUnsigned(RValue<Int> x)
2889{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002890 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002891 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2892}
2893
2894RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2895{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002896 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002897 if(emulateIntrinsics)
2898 {
2899 UShort4 result;
2900 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2901 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2902 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2903 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2904
2905 return result;
2906 }
2907 else
2908 {
2909 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002910 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002911 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2912 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2913 paddusw->addArg(x.value);
2914 paddusw->addArg(y.value);
2915 ::basicBlock->appendInst(paddusw);
2916
2917 return RValue<UShort4>(V(result));
2918 }
2919}
2920
2921RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2922{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002923 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002924 if(emulateIntrinsics)
2925 {
2926 UShort4 result;
2927 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2928 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2929 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2930 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2931
2932 return result;
2933 }
2934 else
2935 {
2936 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002937 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002938 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2939 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2940 psubusw->addArg(x.value);
2941 psubusw->addArg(y.value);
2942 ::basicBlock->appendInst(psubusw);
2943
2944 return RValue<UShort4>(V(result));
2945 }
2946}
2947
2948RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2949{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002950 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002951 if(emulateIntrinsics)
2952 {
2953 UShort4 result;
2954 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2955 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2956 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2957 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2958
2959 return result;
2960 }
2961 else
2962 {
2963 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002964 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002965 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2966 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2967 pmulhuw->addArg(x.value);
2968 pmulhuw->addArg(y.value);
2969 ::basicBlock->appendInst(pmulhuw);
2970
2971 return RValue<UShort4>(V(result));
2972 }
2973}
2974
2975RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2976{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002977 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002978 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2979
2980 // Scalarized implementation.
2981 Int4 result;
2982 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2983 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2984 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2985 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2986
2987 return result;
2988}
2989
2990RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2991{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002992 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002993 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2994
2995 if(false) // Partial product based implementation.
2996 {
2997 auto xh = x >> 16;
2998 auto yh = y >> 16;
2999 auto xl = x & UInt4(0x0000FFFF);
3000 auto yl = y & UInt4(0x0000FFFF);
3001 auto xlyh = xl * yh;
3002 auto xhyl = xh * yl;
3003 auto xlyhh = xlyh >> 16;
3004 auto xhylh = xhyl >> 16;
3005 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3006 auto xhyll = xhyl & UInt4(0x0000FFFF);
3007 auto xlylh = (xl * yl) >> 16;
3008 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3009
3010 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003011 }
3012
Nicolas Capens157ba262019-12-10 17:49:14 -05003013 // Scalarized implementation.
3014 Int4 result;
3015 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3016 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3017 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3018 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3019
3020 return As<UInt4>(result);
3021}
3022
3023RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3024{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003025 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003026 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003027 return UShort4(0);
3028}
3029
Nicolas Capens519cf222020-05-08 15:27:19 -04003030Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003031{
3032 return T(Type_v4i16);
3033}
3034
3035RValue<Short> Extract(RValue<Short8> val, int i)
3036{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003037 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens519cf222020-05-08 15:27:19 -04003038 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003039}
3040
3041RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3042{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003043 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003044 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
3045}
3046
3047RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3048{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003049 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003050 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003051 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003052 Short8 result;
3053 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3054 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3055 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3056 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3057 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3058 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3059 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3060 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003061
Nicolas Capens157ba262019-12-10 17:49:14 -05003062 return result;
3063 }
3064 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003065 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003066 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003067 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003068}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003069
Nicolas Capens157ba262019-12-10 17:49:14 -05003070RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3071{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003072 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003073 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003074 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003075 Short8 result;
3076 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3077 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3078 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3079 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3080 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3081 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3082 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3083 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003084
Nicolas Capens157ba262019-12-10 17:49:14 -05003085 return result;
3086 }
3087 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003088 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003089 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003090 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003091}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003092
Nicolas Capens157ba262019-12-10 17:49:14 -05003093RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3094{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003095 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003096 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003097 return Int4(0);
3098}
3099
3100RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3101{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003102 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003103 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003104 return Short8(0);
3105}
3106
Nicolas Capens519cf222020-05-08 15:27:19 -04003107Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003108{
3109 return T(Ice::IceType_v8i16);
3110}
3111
3112RValue<UShort> Extract(RValue<UShort8> val, int i)
3113{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003114 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens519cf222020-05-08 15:27:19 -04003115 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003116}
3117
3118RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3119{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003120 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003121 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
3122}
3123
3124RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3125{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003126 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003127 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003128 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003129 UShort8 result;
3130 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3131 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3132 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3133 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3134 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3135 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3136 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3137 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003138
Nicolas Capens157ba262019-12-10 17:49:14 -05003139 return result;
3140 }
3141 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003142 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003143 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003144 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003145}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003146
Nicolas Capens157ba262019-12-10 17:49:14 -05003147RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3148{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003149 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003150 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003151 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003152 UShort8 result;
3153 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3154 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3155 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3156 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3157 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3158 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3159 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3160 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003161
Nicolas Capens157ba262019-12-10 17:49:14 -05003162 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003163 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003164 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003165 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003166 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003167 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003168}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003169
Nicolas Capens157ba262019-12-10 17:49:14 -05003170RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3171{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003172 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003173 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003174 return UShort8(0);
3175}
3176
Nicolas Capens519cf222020-05-08 15:27:19 -04003177Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003178{
3179 return T(Ice::IceType_v8i16);
3180}
3181
Ben Clayton713b8d32019-12-17 20:37:56 +00003182RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003183{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003184 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003185 RValue<Int> res = val;
3186 val += 1;
3187 return res;
3188}
3189
Ben Clayton713b8d32019-12-17 20:37:56 +00003190const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003191{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003192 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003193 val += 1;
3194 return val;
3195}
3196
Ben Clayton713b8d32019-12-17 20:37:56 +00003197RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003198{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003199 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003200 RValue<Int> res = val;
3201 val -= 1;
3202 return res;
3203}
3204
Ben Clayton713b8d32019-12-17 20:37:56 +00003205const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003206{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003207 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003208 val -= 1;
3209 return val;
3210}
3211
3212RValue<Int> RoundInt(RValue<Float> cast)
3213{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003214 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003215 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003216 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003217 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3218 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003219 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003220 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003221 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003222 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003223 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003224 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3225 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3226 nearbyint->addArg(cast.value);
3227 ::basicBlock->appendInst(nearbyint);
3228
3229 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003230 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003231}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003232
Nicolas Capens519cf222020-05-08 15:27:19 -04003233Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003234{
3235 return T(Ice::IceType_i32);
3236}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003237
Nicolas Capens519cf222020-05-08 15:27:19 -04003238Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003239{
3240 return T(Ice::IceType_i64);
3241}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003242
Nicolas Capens157ba262019-12-10 17:49:14 -05003243UInt::UInt(RValue<Float> cast)
3244{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003245 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003246 // Smallest positive value representable in UInt, but not in Int
3247 const unsigned int ustart = 0x80000000u;
3248 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003249
Nicolas Capens157ba262019-12-10 17:49:14 -05003250 // If the value is negative, store 0, otherwise store the result of the conversion
3251 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003252 // Check if the value can be represented as an Int
3253 IfThenElse(cast >= ustartf,
3254 // If the value is too large, subtract ustart and re-add it after conversion.
3255 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3256 // Otherwise, just convert normally
3257 Int(cast)))
3258 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05003259}
Nicolas Capensa8086512016-11-07 17:32:17 -05003260
Ben Clayton713b8d32019-12-17 20:37:56 +00003261RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003262{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003263 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003264 RValue<UInt> res = val;
3265 val += 1;
3266 return res;
3267}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003268
Ben Clayton713b8d32019-12-17 20:37:56 +00003269const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003270{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003271 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003272 val += 1;
3273 return val;
3274}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003275
Ben Clayton713b8d32019-12-17 20:37:56 +00003276RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003277{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003278 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003279 RValue<UInt> res = val;
3280 val -= 1;
3281 return res;
3282}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003283
Ben Clayton713b8d32019-12-17 20:37:56 +00003284const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003285{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003286 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003287 val -= 1;
3288 return val;
3289}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003290
Nicolas Capens598f8d82016-09-26 15:09:10 -04003291// RValue<UInt> RoundUInt(RValue<Float> cast)
3292// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003293// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003294// }
3295
Nicolas Capens519cf222020-05-08 15:27:19 -04003296Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003297{
3298 return T(Ice::IceType_i32);
3299}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003300
3301// Int2::Int2(RValue<Int> cast)
3302// {
Nicolas Capens519cf222020-05-08 15:27:19 -04003303// Value *extend = Nucleus::createZExt(cast.value, Long::type());
3304// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003305//
3306// Constant *shuffle[2];
3307// shuffle[0] = Nucleus::createConstantInt(0);
3308// shuffle[1] = Nucleus::createConstantInt(0);
3309//
Nicolas Capens519cf222020-05-08 15:27:19 -04003310// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003311//
3312// storeValue(replicate);
3313// }
3314
Nicolas Capens157ba262019-12-10 17:49:14 -05003315RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3316{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003317 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003318 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003319 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003320 Int2 result;
3321 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3322 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003323
Nicolas Capens157ba262019-12-10 17:49:14 -05003324 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003325 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003326 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003327 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003328 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003329 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003330}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331
Nicolas Capens157ba262019-12-10 17:49:14 -05003332RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3333{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003334 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003335 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003336 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003337 Int2 result;
3338 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3339 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3340
3341 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003342 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003343 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003344 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003345 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003346 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003347}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003348
Nicolas Capens519cf222020-05-08 15:27:19 -04003349Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003350{
3351 return T(Type_v2i32);
3352}
3353
3354RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3355{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003356 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003357 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003358 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003359 UInt2 result;
3360 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3361 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003362
Nicolas Capens157ba262019-12-10 17:49:14 -05003363 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003364 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003365 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003366 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003367 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003368 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003369}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003370
Nicolas Capens157ba262019-12-10 17:49:14 -05003371RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3372{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003373 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003374 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003375 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003376 UInt2 result;
3377 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3378 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003379
Nicolas Capens157ba262019-12-10 17:49:14 -05003380 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003381 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003382 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003383 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003384 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003385 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003386}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003387
Nicolas Capens519cf222020-05-08 15:27:19 -04003388Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003389{
3390 return T(Type_v2i32);
3391}
3392
Ben Clayton713b8d32019-12-17 20:37:56 +00003393Int4::Int4(RValue<Byte4> cast)
3394 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003395{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003396 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens519cf222020-05-08 15:27:19 -04003397 Value *x = Nucleus::createBitCast(cast.value, Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003398 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3399
3400 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003401 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003402 Value *b = Nucleus::createBitCast(a, Byte16::type());
3403 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003404
Ben Clayton713b8d32019-12-17 20:37:56 +00003405 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003406 Value *d = Nucleus::createBitCast(c, Short8::type());
3407 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003408
Nicolas Capens519cf222020-05-08 15:27:19 -04003409 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003410 storeValue(f);
3411}
3412
Ben Clayton713b8d32019-12-17 20:37:56 +00003413Int4::Int4(RValue<SByte4> cast)
3414 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003415{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003416 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens519cf222020-05-08 15:27:19 -04003417 Value *x = Nucleus::createBitCast(cast.value, Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003418 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3419
Ben Clayton713b8d32019-12-17 20:37:56 +00003420 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003421 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003422 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3423
Ben Clayton713b8d32019-12-17 20:37:56 +00003424 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003425 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003426 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3427
3428 *this = As<Int4>(e) >> 24;
3429}
3430
Ben Clayton713b8d32019-12-17 20:37:56 +00003431Int4::Int4(RValue<Short4> cast)
3432 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003433{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003434 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003435 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003436 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3437
3438 *this = As<Int4>(c) >> 16;
3439}
3440
Ben Clayton713b8d32019-12-17 20:37:56 +00003441Int4::Int4(RValue<UShort4> cast)
3442 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003443{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003444 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003445 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003446 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003447 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003448 storeValue(d);
3449}
3450
Ben Clayton713b8d32019-12-17 20:37:56 +00003451Int4::Int4(RValue<Int> rhs)
3452 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003453{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003454 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens519cf222020-05-08 15:27:19 -04003455 Value *vector = Nucleus::createBitCast(rhs.value, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003456
Ben Clayton713b8d32019-12-17 20:37:56 +00003457 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003458 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3459
3460 storeValue(replicate);
3461}
3462
3463RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3464{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003465 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003466 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003467 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003468 Int4 result;
3469 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3470 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3471 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3472 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003473
Nicolas Capens157ba262019-12-10 17:49:14 -05003474 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003475 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003476 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003477 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003478 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003479 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003480}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003481
Nicolas Capens157ba262019-12-10 17:49:14 -05003482RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3483{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003484 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003485 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003486 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003487 Int4 result;
3488 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3489 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3490 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3491 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003492
Nicolas Capens157ba262019-12-10 17:49:14 -05003493 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003494 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003495 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003496 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003497 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003498 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003499}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003500
Nicolas Capens157ba262019-12-10 17:49:14 -05003501RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3502{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003503 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003504 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
3505}
3506
3507RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3508{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003509 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003510 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
3511}
3512
3513RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3514{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003515 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003516 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
3517}
3518
3519RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3520{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003521 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003522 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
3523}
3524
3525RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3526{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003527 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003528 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
3529}
3530
3531RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3532{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003533 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003534 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
3535}
3536
3537RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3538{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003539 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003540 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3541 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3542 ::basicBlock->appendInst(cmp);
3543
3544 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3545 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3546 ::basicBlock->appendInst(select);
3547
3548 return RValue<Int4>(V(result));
3549}
3550
3551RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3552{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003553 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003554 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3555 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3556 ::basicBlock->appendInst(cmp);
3557
3558 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3559 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3560 ::basicBlock->appendInst(select);
3561
3562 return RValue<Int4>(V(result));
3563}
3564
3565RValue<Int4> RoundInt(RValue<Float4> cast)
3566{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003567 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003568 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003569 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003570 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3571 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003572 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003573 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003574 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003575 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003576 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003577 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3578 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3579 nearbyint->addArg(cast.value);
3580 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003581
3582 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003583 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003584}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003585
Nicolas Capens157ba262019-12-10 17:49:14 -05003586RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3587{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003588 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003589 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003590 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003591 Short8 result;
3592 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3593 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3594 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3595 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3596 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3597 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3598 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3599 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003600
Nicolas Capens157ba262019-12-10 17:49:14 -05003601 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003602 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003603 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003604 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003605 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003606 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003607 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3608 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3609 pack->addArg(x.value);
3610 pack->addArg(y.value);
3611 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003612
Nicolas Capens157ba262019-12-10 17:49:14 -05003613 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003614 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003615}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003616
Nicolas Capens157ba262019-12-10 17:49:14 -05003617RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3618{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003619 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003620 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003621 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003622 RValue<Int4> sx = As<Int4>(x);
3623 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003624
Nicolas Capens157ba262019-12-10 17:49:14 -05003625 RValue<Int4> sy = As<Int4>(y);
3626 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003627
Nicolas Capens157ba262019-12-10 17:49:14 -05003628 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003629 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003630 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003631 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003632 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003633 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003634 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3635 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3636 pack->addArg(x.value);
3637 pack->addArg(y.value);
3638 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003639
Nicolas Capens157ba262019-12-10 17:49:14 -05003640 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003641 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003642}
Nicolas Capens33438a62017-09-27 11:47:35 -04003643
Nicolas Capens157ba262019-12-10 17:49:14 -05003644RValue<Int> SignMask(RValue<Int4> x)
3645{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003646 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003647 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003648 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003649 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3650 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003651 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003652 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003653 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003654 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003655 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003656 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3657 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3658 movmsk->addArg(x.value);
3659 ::basicBlock->appendInst(movmsk);
3660
3661 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003662 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003663}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003664
Nicolas Capens519cf222020-05-08 15:27:19 -04003665Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003666{
3667 return T(Ice::IceType_v4i32);
3668}
3669
Ben Clayton713b8d32019-12-17 20:37:56 +00003670UInt4::UInt4(RValue<Float4> cast)
3671 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003672{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003673 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003674 // Smallest positive value representable in UInt, but not in Int
3675 const unsigned int ustart = 0x80000000u;
3676 const float ustartf = float(ustart);
3677
3678 // Check if the value can be represented as an Int
3679 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3680 // If the value is too large, subtract ustart and re-add it after conversion.
3681 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003682 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003683 (~uiValue & Int4(cast));
3684 // If the value is negative, store 0, otherwise store the result of the conversion
3685 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3686}
3687
Ben Clayton713b8d32019-12-17 20:37:56 +00003688UInt4::UInt4(RValue<UInt> rhs)
3689 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003690{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003691 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens519cf222020-05-08 15:27:19 -04003692 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003693
Ben Clayton713b8d32019-12-17 20:37:56 +00003694 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003695 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3696
3697 storeValue(replicate);
3698}
3699
3700RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3701{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003702 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003703 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003704 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003705 UInt4 result;
3706 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3707 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3708 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3709 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003710
Nicolas Capens157ba262019-12-10 17:49:14 -05003711 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003712 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003713 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003714 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003715 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003716 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003717}
Ben Clayton88816fa2019-05-15 17:08:14 +01003718
Nicolas Capens157ba262019-12-10 17:49:14 -05003719RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3720{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003721 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003722 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003723 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003724 UInt4 result;
3725 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3726 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3727 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3728 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003729
Nicolas Capens157ba262019-12-10 17:49:14 -05003730 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003731 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003732 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003733 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003734 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003735 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003736}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003737
Nicolas Capens157ba262019-12-10 17:49:14 -05003738RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3739{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003740 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003741 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3742}
3743
3744RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3745{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003746 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003747 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3748}
3749
3750RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3751{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003752 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003753 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3754}
3755
3756RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3757{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003758 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003759 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3760}
3761
3762RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3763{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003764 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003765 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3766}
3767
3768RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3769{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003770 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003771 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3772}
3773
3774RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3775{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003776 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003777 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3778 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3779 ::basicBlock->appendInst(cmp);
3780
3781 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3782 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3783 ::basicBlock->appendInst(select);
3784
3785 return RValue<UInt4>(V(result));
3786}
3787
3788RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3789{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003790 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003791 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3792 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3793 ::basicBlock->appendInst(cmp);
3794
3795 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3796 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3797 ::basicBlock->appendInst(select);
3798
3799 return RValue<UInt4>(V(result));
3800}
3801
Nicolas Capens519cf222020-05-08 15:27:19 -04003802Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003803{
3804 return T(Ice::IceType_v4i32);
3805}
3806
Nicolas Capens519cf222020-05-08 15:27:19 -04003807Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003808{
3809 return T(Ice::IceType_i16);
3810}
3811
3812RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3813{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003814 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003815 return 1.0f / x;
3816}
3817
3818RValue<Float> RcpSqrt_pp(RValue<Float> x)
3819{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003820 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003821 return Rcp_pp(Sqrt(x));
3822}
3823
3824RValue<Float> Sqrt(RValue<Float> x)
3825{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003826 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003827 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003828 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003829 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3830 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3831 sqrt->addArg(x.value);
3832 ::basicBlock->appendInst(sqrt);
3833
3834 return RValue<Float>(V(result));
3835}
3836
3837RValue<Float> Round(RValue<Float> x)
3838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003840 return Float4(Round(Float4(x))).x;
3841}
3842
3843RValue<Float> Trunc(RValue<Float> x)
3844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003846 return Float4(Trunc(Float4(x))).x;
3847}
3848
3849RValue<Float> Frac(RValue<Float> x)
3850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003852 return Float4(Frac(Float4(x))).x;
3853}
3854
3855RValue<Float> Floor(RValue<Float> x)
3856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003858 return Float4(Floor(Float4(x))).x;
3859}
3860
3861RValue<Float> Ceil(RValue<Float> x)
3862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003864 return Float4(Ceil(Float4(x))).x;
3865}
3866
Nicolas Capens519cf222020-05-08 15:27:19 -04003867Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003868{
3869 return T(Ice::IceType_f32);
3870}
3871
Nicolas Capens519cf222020-05-08 15:27:19 -04003872Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003873{
3874 return T(Type_v2f32);
3875}
3876
Ben Clayton713b8d32019-12-17 20:37:56 +00003877Float4::Float4(RValue<Float> rhs)
3878 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003879{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003880 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens519cf222020-05-08 15:27:19 -04003881 Value *vector = Nucleus::createBitCast(rhs.value, Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003882
Ben Clayton713b8d32019-12-17 20:37:56 +00003883 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003884 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3885
3886 storeValue(replicate);
3887}
3888
3889RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3890{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003891 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003892 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3893 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3894 ::basicBlock->appendInst(cmp);
3895
3896 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3897 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3898 ::basicBlock->appendInst(select);
3899
3900 return RValue<Float4>(V(result));
3901}
3902
3903RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3904{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003905 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003906 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3907 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3908 ::basicBlock->appendInst(cmp);
3909
3910 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3911 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3912 ::basicBlock->appendInst(select);
3913
3914 return RValue<Float4>(V(result));
3915}
3916
3917RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3918{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003919 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003920 return Float4(1.0f) / x;
3921}
3922
3923RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3924{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003925 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003926 return Rcp_pp(Sqrt(x));
3927}
3928
3929RValue<Float4> Sqrt(RValue<Float4> x)
3930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003932 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003933 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003934 Float4 result;
3935 result.x = Sqrt(Float(Float4(x).x));
3936 result.y = Sqrt(Float(Float4(x).y));
3937 result.z = Sqrt(Float(Float4(x).z));
3938 result.w = Sqrt(Float(Float4(x).w));
3939
3940 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003941 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003942 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003943 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003944 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003945 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003946 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3947 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3948 sqrt->addArg(x.value);
3949 ::basicBlock->appendInst(sqrt);
3950
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003951 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003952 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003953}
Nicolas Capens157ba262019-12-10 17:49:14 -05003954
3955RValue<Int> SignMask(RValue<Float4> x)
3956{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003957 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003958 if(emulateIntrinsics || CPUID::ARM)
3959 {
3960 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3961 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3962 }
3963 else
3964 {
3965 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003966 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003967 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3968 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3969 movmsk->addArg(x.value);
3970 ::basicBlock->appendInst(movmsk);
3971
3972 return RValue<Int>(V(result));
3973 }
3974}
3975
3976RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3977{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003978 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003979 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3980}
3981
3982RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3983{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003984 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003985 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3986}
3987
3988RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3989{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003990 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003991 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3992}
3993
3994RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3995{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003996 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003997 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3998}
3999
4000RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4001{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004002 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004003 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
4004}
4005
4006RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4007{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004008 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004009 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
4010}
4011
4012RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4013{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004014 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004015 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
4016}
4017
4018RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4019{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004020 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004021 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
4022}
4023
4024RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4025{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004026 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004027 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
4028}
4029
4030RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4031{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004032 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004033 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
4034}
4035
4036RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4037{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004038 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004039 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
4040}
4041
4042RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4043{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004044 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004045 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
4046}
4047
4048RValue<Float4> Round(RValue<Float4> x)
4049{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004050 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004051 if(emulateIntrinsics || CPUID::ARM)
4052 {
4053 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4054 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4055 }
4056 else if(CPUID::SSE4_1)
4057 {
4058 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004059 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004060 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4061 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4062 round->addArg(x.value);
4063 round->addArg(::context->getConstantInt32(0));
4064 ::basicBlock->appendInst(round);
4065
4066 return RValue<Float4>(V(result));
4067 }
4068 else
4069 {
4070 return Float4(RoundInt(x));
4071 }
4072}
4073
4074RValue<Float4> Trunc(RValue<Float4> x)
4075{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004076 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004077 if(CPUID::SSE4_1)
4078 {
4079 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004080 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004081 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4082 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4083 round->addArg(x.value);
4084 round->addArg(::context->getConstantInt32(3));
4085 ::basicBlock->appendInst(round);
4086
4087 return RValue<Float4>(V(result));
4088 }
4089 else
4090 {
4091 return Float4(Int4(x));
4092 }
4093}
4094
4095RValue<Float4> Frac(RValue<Float4> x)
4096{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004097 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004098 Float4 frc;
4099
4100 if(CPUID::SSE4_1)
4101 {
4102 frc = x - Floor(x);
4103 }
4104 else
4105 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004106 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004107
Ben Clayton713b8d32019-12-17 20:37:56 +00004108 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004109 }
4110
4111 // x - floor(x) can be 1.0 for very small negative x.
4112 // Clamp against the value just below 1.0.
4113 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4114}
4115
4116RValue<Float4> Floor(RValue<Float4> x)
4117{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004118 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004119 if(CPUID::SSE4_1)
4120 {
4121 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004122 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004123 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4124 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4125 round->addArg(x.value);
4126 round->addArg(::context->getConstantInt32(1));
4127 ::basicBlock->appendInst(round);
4128
4129 return RValue<Float4>(V(result));
4130 }
4131 else
4132 {
4133 return x - Frac(x);
4134 }
4135}
4136
4137RValue<Float4> Ceil(RValue<Float4> x)
4138{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004139 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004140 if(CPUID::SSE4_1)
4141 {
4142 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004143 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004144 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4145 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4146 round->addArg(x.value);
4147 round->addArg(::context->getConstantInt32(2));
4148 ::basicBlock->appendInst(round);
4149
4150 return RValue<Float4>(V(result));
4151 }
4152 else
4153 {
4154 return -Floor(-x);
4155 }
4156}
4157
Nicolas Capens519cf222020-05-08 15:27:19 -04004158Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004159{
4160 return T(Ice::IceType_v4f32);
4161}
4162
4163RValue<Long> Ticks()
4164{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004165 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004166 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004167 return Long(Int(0));
4168}
4169
Ben Clayton713b8d32019-12-17 20:37:56 +00004170RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004171{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004172 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004173 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004174}
4175
Ben Clayton713b8d32019-12-17 20:37:56 +00004176RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004177{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004178 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004179 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004180}
4181
Ben Clayton713b8d32019-12-17 20:37:56 +00004182Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004183{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004184 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano16ae92a2020-03-10 10:53:24 -04004185 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004186}
4187
4188void Breakpoint()
4189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004190 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004191 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004192 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4193 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4194 ::basicBlock->appendInst(trap);
4195}
4196
Ben Clayton713b8d32019-12-17 20:37:56 +00004197void Nucleus::createFence(std::memory_order memoryOrder)
4198{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004199 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004200 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4201 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4202 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4203 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4204 inst->addArg(order);
4205 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004206}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004207
Ben Clayton713b8d32019-12-17 20:37:56 +00004208Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4209{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004210 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004211 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004212 return nullptr;
4213}
4214void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4215{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004216 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004217 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004218}
Nicolas Capens157ba262019-12-10 17:49:14 -05004219
4220RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4221{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004222 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004223 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4224}
4225
4226RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4227{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004228 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004229 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4230}
4231
4232void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4233{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004234 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004235 return emulated::Scatter(base, val, offsets, mask, alignment);
4236}
4237
4238void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4239{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004240 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004241 return emulated::Scatter(base, val, offsets, mask, alignment);
4242}
4243
4244RValue<Float> Exp2(RValue<Float> x)
4245{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004246 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004247 return emulated::Exp2(x);
4248}
4249
4250RValue<Float> Log2(RValue<Float> x)
4251{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004252 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004253 return emulated::Log2(x);
4254}
4255
4256RValue<Float4> Sin(RValue<Float4> x)
4257{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004258 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004259 return emulated::Sin(x);
4260}
4261
4262RValue<Float4> Cos(RValue<Float4> x)
4263{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004264 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004265 return emulated::Cos(x);
4266}
4267
4268RValue<Float4> Tan(RValue<Float4> x)
4269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004271 return emulated::Tan(x);
4272}
4273
4274RValue<Float4> Asin(RValue<Float4> x)
4275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004277 return emulated::Asin(x);
4278}
4279
4280RValue<Float4> Acos(RValue<Float4> x)
4281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004282 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004283 return emulated::Acos(x);
4284}
4285
4286RValue<Float4> Atan(RValue<Float4> x)
4287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004289 return emulated::Atan(x);
4290}
4291
4292RValue<Float4> Sinh(RValue<Float4> x)
4293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004295 return emulated::Sinh(x);
4296}
4297
4298RValue<Float4> Cosh(RValue<Float4> x)
4299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004301 return emulated::Cosh(x);
4302}
4303
4304RValue<Float4> Tanh(RValue<Float4> x)
4305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004307 return emulated::Tanh(x);
4308}
4309
4310RValue<Float4> Asinh(RValue<Float4> x)
4311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004313 return emulated::Asinh(x);
4314}
4315
4316RValue<Float4> Acosh(RValue<Float4> x)
4317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004319 return emulated::Acosh(x);
4320}
4321
4322RValue<Float4> Atanh(RValue<Float4> x)
4323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004325 return emulated::Atanh(x);
4326}
4327
4328RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004331 return emulated::Atan2(x, y);
4332}
4333
4334RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004337 return emulated::Pow(x, y);
4338}
4339
4340RValue<Float4> Exp(RValue<Float4> x)
4341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004343 return emulated::Exp(x);
4344}
4345
4346RValue<Float4> Log(RValue<Float4> x)
4347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004348 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004349 return emulated::Log(x);
4350}
4351
4352RValue<Float4> Exp2(RValue<Float4> x)
4353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004354 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004355 return emulated::Exp2(x);
4356}
4357
4358RValue<Float4> Log2(RValue<Float4> x)
4359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004360 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004361 return emulated::Log2(x);
4362}
4363
4364RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4365{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004366 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004367 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004368 {
Ben Claytonce54c592020-02-07 11:30:51 +00004369 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004370 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004371 }
4372 else
4373 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004374 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004375 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4376 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4377 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4378 ctlz->addArg(x.value);
4379 ::basicBlock->appendInst(ctlz);
4380
4381 return RValue<UInt>(V(result));
4382 }
4383}
4384
4385RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4386{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004387 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004388 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004389 {
Ben Claytonce54c592020-02-07 11:30:51 +00004390 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004391 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004392 }
4393 else
4394 {
4395 // TODO: implement vectorized version in Subzero
4396 UInt4 result;
4397 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4398 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4399 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4400 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4401 return result;
4402 }
4403}
4404
4405RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4406{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004407 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004408 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004409 {
Ben Claytonce54c592020-02-07 11:30:51 +00004410 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004411 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004412 }
4413 else
4414 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004415 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004416 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4417 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4418 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4419 ctlz->addArg(x.value);
4420 ::basicBlock->appendInst(ctlz);
4421
4422 return RValue<UInt>(V(result));
4423 }
4424}
4425
4426RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4427{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004428 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004429 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004430 {
Ben Claytonce54c592020-02-07 11:30:51 +00004431 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004432 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004433 }
4434 else
4435 {
4436 // TODO: implement vectorized version in Subzero
4437 UInt4 result;
4438 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4439 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4440 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4441 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4442 return result;
4443 }
4444}
4445
Antonio Maiorano370cba52019-12-31 11:36:07 -05004446RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4447{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004448 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004449 return emulated::MinAtomic(x, y, memoryOrder);
4450}
4451
4452RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4453{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004454 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004455 return emulated::MinAtomic(x, y, memoryOrder);
4456}
4457
4458RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4459{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004460 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004461 return emulated::MaxAtomic(x, y, memoryOrder);
4462}
4463
4464RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4465{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004466 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004467 return emulated::MaxAtomic(x, y, memoryOrder);
4468}
4469
Antonio Maioranoaae33732020-02-14 14:52:34 -05004470void EmitDebugLocation()
4471{
4472#ifdef ENABLE_RR_DEBUG_INFO
4473# ifdef ENABLE_RR_EMIT_PRINT_LOCATION
4474 emitPrintLocation(getCallerBacktrace());
4475# endif // ENABLE_RR_EMIT_PRINT_LOCATION
4476#endif // ENABLE_RR_DEBUG_INFO
4477}
Ben Clayton713b8d32019-12-17 20:37:56 +00004478void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004479void FlushDebug() {}
4480
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004481namespace {
4482namespace coro {
4483
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004484// Instance data per generated coroutine
4485// This is the "handle" type used for Coroutine functions
4486// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4487struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004488{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004489 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004490 bool done = false; // the coroutine should stop at the next yield()
4491 bool terminated = false; // the coroutine has finished.
4492 bool inRoutine = false; // is the coroutine currently executing?
4493 marl::Scheduler::Fiber *mainFiber = nullptr;
4494 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004495 void *promisePtr = nullptr;
4496};
4497
4498CoroutineData *createCoroutineData()
4499{
4500 return new CoroutineData{};
4501}
4502
4503void destroyCoroutineData(CoroutineData *coroData)
4504{
4505 delete coroData;
4506}
4507
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004508// suspend() pauses execution of the coroutine, and resumes execution from the
4509// caller's call to await().
4510// Returns true if await() is called again, or false if coroutine_destroy()
4511// is called.
4512bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004513{
Ben Claytonc3466532020-03-24 11:54:05 +00004514 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4515 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4516 ASSERT(coroData->inRoutine);
4517 coroData->inRoutine = false;
4518 coroData->mainFiber->notify();
4519 while(!coroData->inRoutine)
4520 {
4521 coroData->routineFiber->wait();
4522 }
4523 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004524}
4525
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004526// resume() is called by await(), blocking until the coroutine calls yield()
4527// or the coroutine terminates.
4528void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004529{
Ben Claytonc3466532020-03-24 11:54:05 +00004530 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4531 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4532 ASSERT(!coroData->inRoutine);
4533 coroData->inRoutine = true;
4534 coroData->routineFiber->notify();
4535 while(coroData->inRoutine)
4536 {
4537 coroData->mainFiber->wait();
4538 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004539}
4540
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004541// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4542// until the coroutine ends, and deletes the coroutine data.
4543void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004544{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004545 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004546 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4547 ASSERT(!coroData->inRoutine);
4548 if(!coroData->terminated)
4549 {
4550 coroData->done = true;
4551 coroData->inRoutine = true;
4552 coroData->routineFiber->notify();
4553 while(!coroData->terminated)
4554 {
4555 coroData->mainFiber->wait();
4556 }
4557 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004558 if(coroData->useInternalScheduler)
4559 {
4560 ::getOrCreateScheduler().unbind();
4561 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004562 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004563}
4564
4565namespace detail {
4566thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4567} // namespace detail
4568
4569void setHandleParam(Nucleus::CoroutineHandle handle)
4570{
4571 ASSERT(!detail::coroHandle);
4572 detail::coroHandle = handle;
4573}
4574
4575Nucleus::CoroutineHandle getHandleParam()
4576{
4577 ASSERT(detail::coroHandle);
4578 auto handle = detail::coroHandle;
4579 detail::coroHandle = {};
4580 return handle;
4581}
4582
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004583bool isDone(Nucleus::CoroutineHandle handle)
4584{
4585 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004586 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004587}
4588
4589void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4590{
4591 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4592 coroData->promisePtr = promisePtr;
4593}
4594
4595void *getPromisePtr(Nucleus::CoroutineHandle handle)
4596{
4597 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4598 return coroData->promisePtr;
4599}
4600
4601} // namespace coro
4602} // namespace
4603
4604// Used to generate coroutines.
4605// Lifetime: from yield to acquireCoroutine
4606class CoroutineGenerator
4607{
4608public:
4609 CoroutineGenerator()
4610 {
4611 }
4612
4613 // Inserts instructions at the top of the current function to make it a coroutine.
4614 void generateCoroutineBegin()
4615 {
4616 // Begin building the main coroutine_begin() function.
4617 // We insert these instructions at the top of the entry node,
4618 // before existing reactor-generated instructions.
4619
4620 // CoroutineHandle coroutine_begin(<Arguments>)
4621 // {
4622 // this->handle = coro::getHandleParam();
4623 //
4624 // YieldType promise;
4625 // coro::setPromisePtr(handle, &promise); // For await
4626 //
4627 // ... <REACTOR CODE> ...
4628 //
4629
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004630 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004631 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004632
4633 // YieldType promise;
4634 // coro::setPromisePtr(handle, &promise); // For await
4635 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004636 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004637 }
4638
4639 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4640 void generateYield(Value *val)
4641 {
4642 // ... <REACTOR CODE> ...
4643 //
4644 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004645 // if (!coro::suspend(handle)) {
4646 // return false; // coroutine has been stopped by the caller.
4647 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004648 //
4649 // ... <REACTOR CODE> ...
4650
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004651 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004652 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004653
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004654 // if (!coro::suspend(handle)) {
4655 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4656 auto doneBlock = Nucleus::createBasicBlock();
4657 auto resumeBlock = Nucleus::createBasicBlock();
4658 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4659
4660 // return false; // coroutine has been stopped by the caller.
4661 ::basicBlock = doneBlock;
4662 Nucleus::createRetVoid(); // coroutine return value is ignored.
4663
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004664 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004665 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004666 }
4667
4668 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4669
4670 // Generates the await function for the current coroutine.
4671 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4672 static FunctionUniquePtr generateAwaitFunction()
4673 {
4674 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4675 // {
4676 // if (coro::isDone())
4677 // {
4678 // return false;
4679 // }
4680 // else // resume
4681 // {
4682 // YieldType* promise = coro::getPromisePtr(handle);
4683 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004684 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004685 // return true;
4686 // }
4687 // }
4688
4689 // Subzero doesn't support bool types (IceType_i1) as return type
4690 const Ice::Type ReturnType = Ice::IceType_i32;
4691 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4692 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4693
4694 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4695 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4696
4697 Ice::Variable *handle = awaitFunc->getArgs()[0];
4698 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4699
4700 auto doneBlock = awaitFunc->makeNode();
4701 {
4702 // return false;
4703 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4704 doneBlock->appendInst(ret);
4705 }
4706
4707 auto resumeBlock = awaitFunc->makeNode();
4708 {
4709 // YieldType* promise = coro::getPromisePtr(handle);
4710 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4711
4712 // *out = *promise;
4713 // Load promise value
4714 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4715 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4716 resumeBlock->appendInst(load);
4717 // Then store it in output param
4718 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4719 resumeBlock->appendInst(store);
4720
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004721 // coro::resume(handle);
4722 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004723
4724 // return true;
4725 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4726 resumeBlock->appendInst(ret);
4727 }
4728
4729 // if (coro::isDone())
4730 // {
4731 // <doneBlock>
4732 // }
4733 // else // resume
4734 // {
4735 // <resumeBlock>
4736 // }
4737 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004738 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004739 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4740 bb->appendInst(br);
4741
4742 return FunctionUniquePtr{ awaitFunc };
4743 }
4744
4745 // Generates the destroy function for the current coroutine.
4746 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4747 static FunctionUniquePtr generateDestroyFunction()
4748 {
4749 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4750 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004751 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004752 // return;
4753 // }
4754
4755 const Ice::Type ReturnType = Ice::IceType_void;
4756 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4757
4758 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4759 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4760
4761 Ice::Variable *handle = destroyFunc->getArgs()[0];
4762
4763 auto *bb = destroyFunc->getEntryNode();
4764
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004765 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4766 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004767
4768 // return;
4769 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4770 bb->appendInst(ret);
4771
4772 return FunctionUniquePtr{ destroyFunc };
4773 }
4774
4775private:
4776 Ice::Variable *handle{};
4777 Ice::Variable *promise{};
4778};
4779
4780static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4781{
4782 // This doubles up as our coroutine handle
4783 auto coroData = coro::createCoroutineData();
4784
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004785 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4786 if(coroData->useInternalScheduler)
4787 {
4788 ::getOrCreateScheduler().bind();
4789 }
4790
Ben Clayton76e9e532020-03-16 20:35:04 +00004791 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004792 // Store handle in TLS so that the coroutine can grab it right away, before
4793 // any fiber switch occurs.
4794 coro::setHandleParam(coroData);
4795
Ben Claytonc3466532020-03-24 11:54:05 +00004796 ASSERT(!coroData->routineFiber);
4797 coroData->routineFiber = marl::Scheduler::Fiber::current();
4798
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004799 beginFunc();
4800
Ben Claytonc3466532020-03-24 11:54:05 +00004801 ASSERT(coroData->inRoutine);
4802 coroData->done = true; // coroutine is done.
4803 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4804 coroData->inRoutine = false;
4805 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004806 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004807
Ben Claytonc3466532020-03-24 11:54:05 +00004808 ASSERT(!coroData->mainFiber);
4809 coroData->mainFiber = marl::Scheduler::Fiber::current();
4810
4811 // block until the first yield or coroutine end
4812 ASSERT(!coroData->inRoutine);
4813 coroData->inRoutine = true;
4814 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4815 while(coroData->inRoutine)
4816 {
4817 coroData->mainFiber->wait();
4818 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004819
4820 return coroData;
4821}
4822
4823void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4824{
4825 // Start by creating a regular function
4826 createFunction(yieldType, params);
4827
4828 // Save in case yield() is called
4829 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4830 ::coroYieldType = yieldType;
4831}
4832
4833void Nucleus::yield(Value *val)
4834{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004835 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004836 Variable::materializeAll();
4837
4838 // On first yield, we start generating coroutine functions
4839 if(!::coroGen)
4840 {
4841 ::coroGen = std::make_shared<CoroutineGenerator>();
4842 ::coroGen->generateCoroutineBegin();
4843 }
4844
4845 ASSERT(::coroGen);
4846 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004847}
4848
Ben Clayton713b8d32019-12-17 20:37:56 +00004849static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4850{
4851 return false;
4852}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004853
4854static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4855{
4856}
Nicolas Capens157ba262019-12-10 17:49:14 -05004857
4858std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4859{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004860 if(::coroGen)
4861 {
4862 // Finish generating coroutine functions
4863 {
4864 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004865 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004866 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004867
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004868 auto awaitFunc = ::coroGen->generateAwaitFunction();
4869 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004870
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004871 // At this point, we no longer need the CoroutineGenerator.
4872 ::coroGen.reset();
4873 ::coroYieldType = nullptr;
4874
4875 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4876 { name, "await", "destroy" },
4877 cfgEdit);
4878
4879 return routine;
4880 }
4881 else
4882 {
4883 {
4884 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004885 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004886 }
4887
4888 ::coroYieldType = nullptr;
4889
4890 // Not an actual coroutine (no yields), so return stubs for await and destroy
4891 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4892
4893 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4894 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4895 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4896 return routine;
4897 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004898}
4899
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004900Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004901{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004902 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4903
4904 if(isCoroutine)
4905 {
4906 return rr::invokeCoroutineBegin(func);
4907 }
4908 else
4909 {
4910 // For regular routines, just invoke the begin func directly
4911 return func();
4912 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004913}
Nicolas Capens157ba262019-12-10 17:49:14 -05004914
4915} // namespace rr