blob: 53377cadaaa348c05e483f3619208d89784615ae [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano9c14bda2020-09-18 16:33:36 -040016#include "EmulatedIntrinsics.hpp"
17#include "OptimalIntrinsics.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050018#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000019#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050020#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040021
Nicolas Capens1a3ce872018-10-10 10:42:36 -040022#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000023#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040024
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040026#include "src/IceCfgNode.h"
27#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceELFStreamer.h"
29#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040030#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000031#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040032
Ben Clayton713b8d32019-12-17 20:37:56 +000033#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040034#include "llvm/Support/FileSystem.h"
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -050035#include "llvm/Support/ManagedStatic.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040036#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040037
Antonio Maiorano8bce0672020-02-28 13:13:45 -050038#include "marl/event.h"
39
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000041# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040042#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040043
Nicolas Capensbd65da92017-01-05 16:31:06 -050044#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000045# ifndef WIN32_LEAN_AND_MEAN
46# define WIN32_LEAN_AND_MEAN
47# endif // !WIN32_LEAN_AND_MEAN
48# ifndef NOMINMAX
49# define NOMINMAX
50# endif // !NOMINMAX
51# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050052#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040053
Ben Clayton683bad82020-02-10 23:57:09 +000054#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040055#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000056#include <limits>
57#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040058
Antonio Maiorano02a39532020-01-21 15:15:34 -050059// Subzero utility functions
60// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050062namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050063
64Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
65{
66 uint32_t sequenceNumber = 0;
Nicolas Capensff010f92021-02-01 12:22:53 -050067 auto *function = Ice::Cfg::create(context, sequenceNumber).release();
68
69 function->setStackSizeLimit(512 * 1024); // 512 KiB
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050070
71 Ice::CfgLocalAllocatorScope allocScope{ function };
72
73 for(auto type : paramTypes)
74 {
75 Ice::Variable *arg = function->makeVariable(type);
76 function->addArg(arg);
77 }
78
79 Ice::CfgNode *node = function->makeNode();
80 function->setEntryNode(node);
81
82 return function;
83}
84
85Ice::Type getPointerType(Ice::Type elementType)
86{
87 if(sizeof(void *) == 8)
88 {
89 return Ice::IceType_i64;
90 }
91 else
92 {
93 return Ice::IceType_i32;
94 }
95}
96
97Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
98{
99 int typeSize = Ice::typeWidthInBytes(type);
100 int totalSize = typeSize * (arraySize ? arraySize : 1);
101
102 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
103 auto address = function->makeVariable(getPointerType(type));
104 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
105 function->getEntryNode()->getInsts().push_front(alloca);
106
107 return address;
108}
109
110Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500111{
112 if(sizeof(void *) == 8)
113 {
114 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
115 }
116 else
117 {
118 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
119 }
120}
121
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400122// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
123Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500124
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400125// Wrapper for calls on C functions with Ice types
126Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
127{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500128 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400129
130 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
131 // then truncate result to bool.
132 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
133 const bool returningBool = (retTy == Ice::IceType_i1);
134 if(returningBool)
135 {
136 ret = function->makeVariable(Ice::IceType_i32);
137 }
138 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500139 {
140 ret = function->makeVariable(retTy);
141 }
142
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400143 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500144 for(auto arg : iceArgs)
145 {
146 call->addArg(arg);
147 }
148
149 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400150
151 if(returningBool)
152 {
153 // Truncate result to bool so that if any (lsb) bits were set, result will be true
154 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
155 }
156
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500157 return ret;
158}
159
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400160Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
161{
162 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
163 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
164}
165
Antonio Maiorano62427e02020-02-13 09:18:05 -0500166// Wrapper for calls on C functions with Ice types
167template<typename Return, typename... CArgs, typename... RArgs>
168Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
169{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400170 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
171
Nicolas Capens519cf222020-05-08 15:27:19 -0400172 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500173 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500174 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500175}
176
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400177Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
178{
179 Ice::Variable *to = function->makeVariable(toType);
180 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
181 basicBlock->appendInst(cast);
182 return to;
183}
184
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500185Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500186{
Antonio Maiorano02a39532020-01-21 15:15:34 -0500187 Ice::Variable *result = function->makeVariable(type);
188 auto load = Ice::InstLoad::create(function, result, ptr, align);
189 basicBlock->appendInst(load);
190
191 return result;
192}
193
194} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500195} // namespace
196
Ben Clayton713b8d32019-12-17 20:37:56 +0000197namespace rr {
198class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500199class CoroutineGenerator;
200} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500201
202namespace {
203
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -0500204// Used to automatically invoke llvm_shutdown() when driver is unloaded
205llvm::llvm_shutdown_obj llvmShutdownObj;
206
Nicolas Capens157ba262019-12-10 17:49:14 -0500207// Default configuration settings. Must be accessed under mutex lock.
208std::mutex defaultConfigLock;
209rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000210{
Nicolas Capens157ba262019-12-10 17:49:14 -0500211 // This uses a static in a function to avoid the cost of a global static
212 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
213 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000214 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500215 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000216}
217
Nicolas Capens157ba262019-12-10 17:49:14 -0500218Ice::GlobalContext *context = nullptr;
219Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400220Ice::CfgNode *entryBlock = nullptr;
221Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500222Ice::CfgNode *basicBlock = nullptr;
223Ice::CfgLocalAllocatorScope *allocator = nullptr;
224rr::ELFMemoryStreamer *routine = nullptr;
225
226std::mutex codegenMutex;
227
228Ice::ELFFileStreamer *elfFile = nullptr;
229Ice::Fdstream *out = nullptr;
230
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500231// Coroutine globals
232rr::Type *coroYieldType = nullptr;
233std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500234marl::Scheduler &getOrCreateScheduler()
235{
236 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100237 marl::Scheduler::Config cfg;
238 cfg.setWorkerThreadCount(8);
239 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500240 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500241
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500242 return *scheduler;
243}
Nicolas Capens157ba262019-12-10 17:49:14 -0500244} // Anonymous namespace
245
246namespace {
247
248#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000249# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500250#endif
251
Ben Clayton713b8d32019-12-17 20:37:56 +0000252#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
253# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500254#endif
255
Antonio Maiorano370cba52019-12-31 11:36:07 -0500256Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400257{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500258 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100259 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500260 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000261 case rr::Optimization::Level::None: return Ice::Opt_m1;
262 case rr::Optimization::Level::Less: return Ice::Opt_m1;
263 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500264 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
265 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100266 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500267 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400268}
269
Antonio Maiorano370cba52019-12-31 11:36:07 -0500270Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
271{
272 switch(memoryOrder)
273 {
274 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
275 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
276 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
277 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
278 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
279 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
280 }
281 return Ice::Intrinsics::MemoryOrderInvalid;
282}
283
Nicolas Capens157ba262019-12-10 17:49:14 -0500284class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500285{
Nicolas Capens157ba262019-12-10 17:49:14 -0500286public:
287 const static bool ARM;
288 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400289
Nicolas Capens157ba262019-12-10 17:49:14 -0500290private:
291 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100292 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000293#if defined(__i386__) || defined(__x86_64__)
294# if defined(_WIN32)
295 __cpuid(registers, info);
296# else
297 __asm volatile("cpuid"
298 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
299 : "a"(info));
300# endif
301#else
302 registers[0] = 0;
303 registers[1] = 0;
304 registers[2] = 0;
305 registers[3] = 0;
306#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100307 }
308
Nicolas Capens157ba262019-12-10 17:49:14 -0500309 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500310 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000311#if defined(__arm__) || defined(__aarch64__)
312 return true;
313#elif defined(__i386__) || defined(__x86_64__)
314 return false;
315#elif defined(__mips__)
316 return false;
317#else
318# error "Unknown architecture"
319#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500320 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500321
Nicolas Capens157ba262019-12-10 17:49:14 -0500322 static bool detectSSE4_1()
323 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000324#if defined(__i386__) || defined(__x86_64__)
325 int registers[4];
326 cpuid(registers, 1);
327 return (registers[2] & 0x00080000) != 0;
328#else
329 return false;
330#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500331 }
332};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500333
Nicolas Capens157ba262019-12-10 17:49:14 -0500334const bool CPUID::ARM = CPUID::detectARM();
335const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
336const bool emulateIntrinsics = false;
337const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400338
Nicolas Capens157ba262019-12-10 17:49:14 -0500339constexpr bool subzeroDumpEnabled = false;
340constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500341
342#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500343static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
344static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500345#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500346
347} // anonymous namespace
348
349namespace rr {
350
Antonio Maioranoab210f92019-12-13 16:26:24 -0500351std::string BackendName()
352{
353 return "Subzero";
354}
355
Ben Clayton713b8d32019-12-17 20:37:56 +0000356const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500357 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500358};
359
360enum EmulatedType
361{
362 EmulatedShift = 16,
363 EmulatedV2 = 2 << EmulatedShift,
364 EmulatedV4 = 4 << EmulatedShift,
365 EmulatedV8 = 8 << EmulatedShift,
366 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
367
368 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
369 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
370 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000371 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
372 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500373 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
374};
375
Ben Clayton713b8d32019-12-17 20:37:56 +0000376class Value : public Ice::Operand
377{};
378class SwitchCases : public Ice::InstSwitch
379{};
380class BasicBlock : public Ice::CfgNode
381{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500382
383Ice::Type T(Type *t)
384{
385 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
386 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500387}
388
Nicolas Capens157ba262019-12-10 17:49:14 -0500389Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400390{
Ben Clayton713b8d32019-12-17 20:37:56 +0000391 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500392}
393
394Type *T(EmulatedType t)
395{
Ben Clayton713b8d32019-12-17 20:37:56 +0000396 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500397}
398
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500399std::vector<Ice::Type> T(const std::vector<Type *> &types)
400{
401 std::vector<Ice::Type> result;
402 result.reserve(types.size());
403 for(auto &t : types)
404 {
405 result.push_back(T(t));
406 }
407 return result;
408}
409
Nicolas Capens157ba262019-12-10 17:49:14 -0500410Value *V(Ice::Operand *v)
411{
Ben Clayton713b8d32019-12-17 20:37:56 +0000412 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500413}
414
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500415Ice::Operand *V(Value *v)
416{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500417 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500418}
419
Antonio Maiorano62427e02020-02-13 09:18:05 -0500420std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
421{
422 std::vector<Ice::Operand *> result;
423 result.reserve(values.size());
424 for(auto &v : values)
425 {
426 result.push_back(V(v));
427 }
428 return result;
429}
430
Nicolas Capens157ba262019-12-10 17:49:14 -0500431BasicBlock *B(Ice::CfgNode *b)
432{
Ben Clayton713b8d32019-12-17 20:37:56 +0000433 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500434}
435
436static size_t typeSize(Type *type)
437{
438 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400439 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500440 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800441 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000442 case Type_v2i32: return 8;
443 case Type_v4i16: return 8;
444 case Type_v2i16: return 4;
445 case Type_v8i8: return 8;
446 case Type_v4i8: return 4;
447 case Type_v2f32: return 8;
448 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500449 }
450 }
451
452 return Ice::typeWidthInBytes(T(type));
453}
454
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400455static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500456{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400457 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500458 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
459 {
460 Nucleus::createRetVoid();
461 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400462
463 // Connect the entry block to the top of the initial basic block
464 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
465 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500466}
467
Ben Clayton713b8d32019-12-17 20:37:56 +0000468using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
469using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500470
471inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
472{
Ben Clayton713b8d32019-12-17 20:37:56 +0000473 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500474}
475
476inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
477{
478 return &sectionHeader(elfHeader)[index];
479}
480
481static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
482{
483 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
484
485 uint32_t index = relocation.getSymbol();
486 int table = relocationTable.sh_link;
487 void *symbolValue = nullptr;
488
489 if(index != SHN_UNDEF)
490 {
491 if(table == SHN_UNDEF) return nullptr;
492 const SectionHeader *symbolTable = elfSection(elfHeader, table);
493
494 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
495 if(index >= symtab_entries)
496 {
497 ASSERT(index < symtab_entries && "Symbol Index out of range");
498 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800499 }
500
Nicolas Capens157ba262019-12-10 17:49:14 -0500501 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000502 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500503 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800504
Nicolas Capens157ba262019-12-10 17:49:14 -0500505 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400506 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500507 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000508 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400509 }
510 else
511 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500512 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400513 }
Nicolas Capens66478362016-10-13 15:36:36 -0400514 }
515
Nicolas Capens157ba262019-12-10 17:49:14 -0500516 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000517 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500518
519 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400520 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400521 switch(relocation.getType())
522 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000523 case R_ARM_NONE:
524 // No relocation
525 break;
526 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500527 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000528 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500529 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
530 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
531 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400532 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000533 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500534 {
535 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
536 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
537 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400538 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000539 default:
540 ASSERT(false && "Unsupported relocation type");
541 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400542 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500543 }
544 else
545 {
546 switch(relocation.getType())
547 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000548 case R_386_NONE:
549 // No relocation
550 break;
551 case R_386_32:
552 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
553 break;
554 case R_386_PC32:
555 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
556 break;
557 default:
558 ASSERT(false && "Unsupported relocation type");
559 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500560 }
Nicolas Capens66478362016-10-13 15:36:36 -0400561 }
562
Nicolas Capens157ba262019-12-10 17:49:14 -0500563 return symbolValue;
564}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400565
Nicolas Capens157ba262019-12-10 17:49:14 -0500566static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
567{
568 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
569
570 uint32_t index = relocation.getSymbol();
571 int table = relocationTable.sh_link;
572 void *symbolValue = nullptr;
573
574 if(index != SHN_UNDEF)
575 {
576 if(table == SHN_UNDEF) return nullptr;
577 const SectionHeader *symbolTable = elfSection(elfHeader, table);
578
579 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
580 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400581 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500582 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400583 return nullptr;
584 }
585
Nicolas Capens157ba262019-12-10 17:49:14 -0500586 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000587 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500588 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400589
Nicolas Capens157ba262019-12-10 17:49:14 -0500590 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400591 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500592 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000593 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500594 }
595 else
596 {
597 return nullptr;
598 }
599 }
Nicolas Capens66478362016-10-13 15:36:36 -0400600
Nicolas Capens157ba262019-12-10 17:49:14 -0500601 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000602 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
603 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400604
Nicolas Capens157ba262019-12-10 17:49:14 -0500605 switch(relocation.getType())
606 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000607 case R_X86_64_NONE:
608 // No relocation
609 break;
610 case R_X86_64_64:
611 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
612 break;
613 case R_X86_64_PC32:
614 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
615 break;
616 case R_X86_64_32S:
617 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
618 break;
619 default:
620 ASSERT(false && "Unsupported relocation type");
621 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500622 }
623
624 return symbolValue;
625}
626
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400627struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500628{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400629 const void *entry;
630 size_t codeSize = 0;
631};
632
633std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
634{
635 ASSERT(functionNames.size() > 0);
636 std::vector<EntryPoint> entryPoints(functionNames.size());
637
Ben Clayton713b8d32019-12-17 20:37:56 +0000638 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500639
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400640 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500641 if(!elfHeader->checkMagic())
642 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400643 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500644 }
645
646 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000647 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
648#if defined(__i386__)
649 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
650#elif defined(__x86_64__)
651 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
652#elif defined(__arm__)
653 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
654#elif defined(__aarch64__)
655 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
656#elif defined(__mips__)
657 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
658#else
659# error "Unsupported platform"
660#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500661
Ben Clayton713b8d32019-12-17 20:37:56 +0000662 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500663
664 for(int i = 0; i < elfHeader->e_shnum; i++)
665 {
666 if(sectionHeader[i].sh_type == SHT_PROGBITS)
667 {
668 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
669 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400670 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500671 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400672 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500673
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400674 for(size_t j = 0; j < functionNames.size(); ++j)
675 {
676 if(strstr(sectionName, functionNames[j]) != nullptr)
677 {
678 return j;
679 }
680 }
681
682 UNREACHABLE("Failed to find executable section that matches input function names");
683 return static_cast<size_t>(-1);
684 };
685
686 size_t index = findSectionNameEntryIndex();
687 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
688 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400689 }
690 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500691 else if(sectionHeader[i].sh_type == SHT_REL)
692 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000693 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400694
Nicolas Capens157ba262019-12-10 17:49:14 -0500695 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
696 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000697 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500698 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
699 }
700 }
701 else if(sectionHeader[i].sh_type == SHT_RELA)
702 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000703 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500704
705 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
706 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000707 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500708 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
709 }
710 }
711 }
712
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400713 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500714}
715
716template<typename T>
717struct ExecutableAllocator
718{
719 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000720 template<class U>
721 ExecutableAllocator(const ExecutableAllocator<U> &other)
722 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500723
724 using value_type = T;
725 using size_type = std::size_t;
726
727 T *allocate(size_type n)
728 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000729 return (T *)allocateMemoryPages(
730 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500731 }
732
733 void deallocate(T *p, size_type n)
734 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800735 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500736 }
737};
738
739class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
740{
741 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
742 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
743
744public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000745 ELFMemoryStreamer()
746 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500747 {
748 position = 0;
749 buffer.reserve(0x1000);
750 }
751
752 ~ELFMemoryStreamer() override
753 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500754 }
755
756 void write8(uint8_t Value) override
757 {
758 if(position == (uint64_t)buffer.size())
759 {
760 buffer.push_back(Value);
761 position++;
762 }
763 else if(position < (uint64_t)buffer.size())
764 {
765 buffer[position] = Value;
766 position++;
767 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000768 else
769 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500770 }
771
772 void writeBytes(llvm::StringRef Bytes) override
773 {
774 std::size_t oldSize = buffer.size();
775 buffer.resize(oldSize + Bytes.size());
776 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
777 position += Bytes.size();
778 }
779
780 uint64_t tell() const override { return position; }
781
782 void seek(uint64_t Off) override { position = Off; }
783
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400784 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500785 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400786 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500787
788#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500789 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
790#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400791 for(auto &entryPoint : entryPoints)
792 {
793 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
794 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500795#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500796
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400797 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400798 }
799
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500800 void finalize()
801 {
802 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
803
804 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
805 }
806
Ben Clayton713b8d32019-12-17 20:37:56 +0000807 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400808 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500809 ASSERT(func);
810 funcs[index] = func;
811 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400812
Nicolas Capens157ba262019-12-10 17:49:14 -0500813 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400814 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500815 ASSERT(funcs[index]);
816 return funcs[index];
817 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400818
Antonio Maiorano02a39532020-01-21 15:15:34 -0500819 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500820 {
Nicolas Capens4e75f452021-01-28 01:52:56 -0500821 // Check if we already have a suitable constant.
822 for(const auto &c : constantsPool)
823 {
824 void *ptr = c.data.get();
825 size_t space = c.space;
826
827 void *alignedPtr = std::align(alignment, size, ptr, space);
828
829 if(space < size)
830 {
831 continue;
832 }
833
834 if(memcmp(data, alignedPtr, size) == 0)
835 {
836 return alignedPtr;
837 }
838 }
839
Antonio Maiorano02a39532020-01-21 15:15:34 -0500840 // TODO(b/148086935): Replace with a buffer allocator.
841 size_t space = size + alignment;
842 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
843 void *ptr = buf.get();
844 void *alignedPtr = std::align(alignment, size, ptr, space);
845 ASSERT(alignedPtr);
846 memcpy(alignedPtr, data, size);
Nicolas Capens4e75f452021-01-28 01:52:56 -0500847 constantsPool.emplace_back(std::move(buf), space);
848
Antonio Maiorano02a39532020-01-21 15:15:34 -0500849 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500850 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400851
Nicolas Capens157ba262019-12-10 17:49:14 -0500852private:
Nicolas Capens4e75f452021-01-28 01:52:56 -0500853 struct Constant
854 {
855 Constant(std::unique_ptr<uint8_t[]> data, size_t space)
856 : data(std::move(data))
857 , space(space)
858 {}
859
860 std::unique_ptr<uint8_t[]> data;
861 size_t space;
862 };
863
Ben Clayton713b8d32019-12-17 20:37:56 +0000864 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500865 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
866 std::size_t position;
Nicolas Capens4e75f452021-01-28 01:52:56 -0500867 std::vector<Constant> constantsPool;
Nicolas Capens157ba262019-12-10 17:49:14 -0500868};
869
Antonio Maiorano62427e02020-02-13 09:18:05 -0500870#ifdef ENABLE_RR_PRINT
871void VPrintf(const std::vector<Value *> &vals)
872{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400873 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500874}
875#endif // ENABLE_RR_PRINT
876
Nicolas Capens157ba262019-12-10 17:49:14 -0500877Nucleus::Nucleus()
878{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400879 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500880
881 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
882 Ice::ClFlags::getParsedClFlags(Flags);
883
Ben Clayton713b8d32019-12-17 20:37:56 +0000884#if defined(__arm__)
885 Flags.setTargetArch(Ice::Target_ARM32);
886 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
887#elif defined(__mips__)
888 Flags.setTargetArch(Ice::Target_MIPS32);
889 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
890#else // x86
891 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
892 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
893#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500894 Flags.setOutFileType(Ice::FT_Elf);
895 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
896 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
897 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
898 Flags.setDisableHybridAssembly(true);
899
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500900 // Emit functions into separate sections in the ELF so we can find them by name
901 Flags.setFunctionSections(true);
902
Nicolas Capens157ba262019-12-10 17:49:14 -0500903 static llvm::raw_os_ostream cout(std::cout);
904 static llvm::raw_os_ostream cerr(std::cerr);
905
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500906 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500907 {
908 // Decorate text asm with liveness info
909 Flags.setDecorateAsm(true);
910 }
911
Ben Clayton713b8d32019-12-17 20:37:56 +0000912 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500913 {
914 std::error_code errorCode;
915 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
916 ::elfFile = new Ice::ELFFileStreamer(*out);
917 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
918 }
919 else
920 {
921 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
922 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
923 ::routine = elfMemory;
924 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400925
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400926#if !__has_feature(memory_sanitizer)
927 // thread_local variables in shared libraries are initialized at load-time,
928 // but this is not observed by MemorySanitizer if the loader itself was not
929 // instrumented, leading to false-positive unitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400930 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400931#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500932 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500933}
934
935Nucleus::~Nucleus()
936{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400937 delete Variable::unmaterializedVariables;
938 Variable::unmaterializedVariables = nullptr;
939
Nicolas Capens157ba262019-12-10 17:49:14 -0500940 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500941 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500942
943 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500944 ::allocator = nullptr;
945
Nicolas Capens157ba262019-12-10 17:49:14 -0500946 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500947 ::function = nullptr;
948
Nicolas Capens157ba262019-12-10 17:49:14 -0500949 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500950 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500951
952 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500953 ::elfFile = nullptr;
954
Nicolas Capens157ba262019-12-10 17:49:14 -0500955 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500956 ::out = nullptr;
957
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400958 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500959 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400960 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500961
962 ::codegenMutex.unlock();
963}
964
965void Nucleus::setDefaultConfig(const Config &cfg)
966{
967 std::unique_lock<std::mutex> lock(::defaultConfigLock);
968 ::defaultConfig() = cfg;
969}
970
971void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
972{
973 std::unique_lock<std::mutex> lock(::defaultConfigLock);
974 auto &config = ::defaultConfig();
975 config = cfgEdit.apply(config);
976}
977
978Config Nucleus::getDefaultConfig()
979{
980 std::unique_lock<std::mutex> lock(::defaultConfigLock);
981 return ::defaultConfig();
982}
983
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500984// This function lowers and produces executable binary code in memory for the input functions,
985// and returns a Routine with the entry points to these functions.
986template<size_t Count>
987static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500988{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500989 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
990 // and GlobalContext::emitItems.
991
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500992 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500993 {
994 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500995 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500996 }
997
998 ::context->emitFileHeader();
999
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001000 // Translate
1001
1002 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -05001003 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001004 Ice::Cfg *currFunc = functions[i];
1005
1006 // Install function allocator in TLS for Cfg-specific container allocators
1007 Ice::CfgLocalAllocatorScope allocScope(currFunc);
1008
1009 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
1010
1011 rr::optimize(currFunc);
1012
1013 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001014 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001015
1016 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001017 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001018
1019 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1020
1021 if(subzeroEmitTextAsm)
1022 {
1023 currFunc->emit();
1024 }
1025
1026 currFunc->emitIAS();
Nicolas Capensff010f92021-02-01 12:22:53 -05001027
1028 if(currFunc->hasError())
1029 {
1030 return nullptr;
1031 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001032 }
1033
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001034 // Emit items
1035
1036 ::context->lowerGlobals("");
1037
Nicolas Capens157ba262019-12-10 17:49:14 -05001038 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001039
1040 for(size_t i = 0; i < Count; ++i)
1041 {
1042 Ice::Cfg *currFunc = functions[i];
1043
1044 // Accumulate globals from functions to emit into the "last" section at the end
1045 auto globals = currFunc->getGlobalInits();
1046 if(globals && !globals->empty())
1047 {
1048 ::context->getGlobals()->merge(globals.get());
1049 }
1050
1051 auto assembler = currFunc->releaseAssembler();
1052 assembler->alignFunction();
1053 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1054 }
1055
Nicolas Capens157ba262019-12-10 17:49:14 -05001056 ::context->lowerGlobals("last");
1057 ::context->lowerConstants();
1058 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001059
Nicolas Capens157ba262019-12-10 17:49:14 -05001060 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001061 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001062 objectWriter->writeNonUserSections();
1063
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001064 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001065 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1066 ASSERT(entryPoints.size() == Count);
1067 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001068 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001069 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001070 }
1071
1072 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001073
1074 Routine *handoffRoutine = ::routine;
1075 ::routine = nullptr;
1076
1077 return std::shared_ptr<Routine>(handoffRoutine);
1078}
1079
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001080std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1081{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001082 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001083 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1084}
1085
Nicolas Capens157ba262019-12-10 17:49:14 -05001086Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1087{
1088 Ice::Type type = T(t);
1089 int typeSize = Ice::typeWidthInBytes(type);
1090 int totalSize = typeSize * (arraySize ? arraySize : 1);
1091
1092 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1093 auto address = ::function->makeVariable(T(getPointerType(t)));
1094 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1095 ::function->getEntryNode()->getInsts().push_front(alloca);
1096
1097 return V(address);
1098}
1099
1100BasicBlock *Nucleus::createBasicBlock()
1101{
1102 return B(::function->makeNode());
1103}
1104
1105BasicBlock *Nucleus::getInsertBlock()
1106{
1107 return B(::basicBlock);
1108}
1109
1110void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1111{
Ben Clayton713b8d32019-12-17 20:37:56 +00001112 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001113
1114 Variable::materializeAll();
1115
1116 ::basicBlock = basicBlock;
1117}
1118
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001119void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001120{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001121 ASSERT(::function == nullptr);
1122 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001123 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001124 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001125 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001126
1127 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1128
1129 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1130 // becomes invalid if another one is created; for example, when creating await and destroy functions
1131 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1132 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001133 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1134
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001135 ::entryBlock = ::function->getEntryNode();
1136 ::basicBlock = ::function->makeNode();
1137 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001138}
1139
1140Value *Nucleus::getArgument(unsigned int index)
1141{
1142 return V(::function->getArgs()[index]);
1143}
1144
1145void Nucleus::createRetVoid()
1146{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001147 RR_DEBUG_INFO_UPDATE_LOC();
1148
Nicolas Capens157ba262019-12-10 17:49:14 -05001149 // Code generated after this point is unreachable, so any variables
1150 // being read can safely return an undefined value. We have to avoid
1151 // materializing variables after the terminator ret instruction.
1152 Variable::killUnmaterialized();
1153
1154 Ice::InstRet *ret = Ice::InstRet::create(::function);
1155 ::basicBlock->appendInst(ret);
1156}
1157
1158void Nucleus::createRet(Value *v)
1159{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001160 RR_DEBUG_INFO_UPDATE_LOC();
1161
Nicolas Capens157ba262019-12-10 17:49:14 -05001162 // Code generated after this point is unreachable, so any variables
1163 // being read can safely return an undefined value. We have to avoid
1164 // materializing variables after the terminator ret instruction.
1165 Variable::killUnmaterialized();
1166
1167 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1168 ::basicBlock->appendInst(ret);
1169}
1170
1171void Nucleus::createBr(BasicBlock *dest)
1172{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001173 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001174 Variable::materializeAll();
1175
1176 auto br = Ice::InstBr::create(::function, dest);
1177 ::basicBlock->appendInst(br);
1178}
1179
1180void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1181{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001182 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001183 Variable::materializeAll();
1184
1185 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1186 ::basicBlock->appendInst(br);
1187}
1188
1189static bool isCommutative(Ice::InstArithmetic::OpKind op)
1190{
1191 switch(op)
1192 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001193 case Ice::InstArithmetic::Add:
1194 case Ice::InstArithmetic::Fadd:
1195 case Ice::InstArithmetic::Mul:
1196 case Ice::InstArithmetic::Fmul:
1197 case Ice::InstArithmetic::And:
1198 case Ice::InstArithmetic::Or:
1199 case Ice::InstArithmetic::Xor:
1200 return true;
1201 default:
1202 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001203 }
1204}
1205
1206static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1207{
1208 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1209
1210 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1211
1212 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1213 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1214 ::basicBlock->appendInst(arithmetic);
1215
1216 return V(result);
1217}
1218
1219Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1220{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001221 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001222 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1223}
1224
1225Value *Nucleus::createSub(Value *lhs, Value *rhs)
1226{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001227 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001228 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1229}
1230
1231Value *Nucleus::createMul(Value *lhs, Value *rhs)
1232{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001233 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001234 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1235}
1236
1237Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1238{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001239 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001240 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1241}
1242
1243Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1244{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001245 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001246 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1247}
1248
1249Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1250{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001251 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001252 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1253}
1254
1255Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1256{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001257 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001258 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1259}
1260
1261Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1262{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001263 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001264 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1265}
1266
1267Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1268{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001269 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001270 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1271}
1272
1273Value *Nucleus::createURem(Value *lhs, Value *rhs)
1274{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001275 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001276 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1277}
1278
1279Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1280{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001281 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001282 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1283}
1284
1285Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1286{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001287 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001288 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1289 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001290 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001291 return nullptr;
1292}
1293
1294RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1295{
1296 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001297}
1298
1299Value *Nucleus::createShl(Value *lhs, Value *rhs)
1300{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001301 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001302 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1303}
1304
1305Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1306{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001307 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001308 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1309}
1310
1311Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1312{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001313 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001314 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1315}
1316
1317Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1318{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001319 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001320 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1321}
1322
1323Value *Nucleus::createOr(Value *lhs, Value *rhs)
1324{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001325 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001326 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1327}
1328
1329Value *Nucleus::createXor(Value *lhs, Value *rhs)
1330{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001331 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001332 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1333}
1334
1335Value *Nucleus::createNeg(Value *v)
1336{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001337 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001338 return createSub(createNullValue(T(v->getType())), v);
1339}
1340
1341Value *Nucleus::createFNeg(Value *v)
1342{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001343 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001344 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1345 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001346
1347 return createFSub(negativeZero, v);
1348}
1349
1350Value *Nucleus::createNot(Value *v)
1351{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001352 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001353 if(Ice::isScalarIntegerType(v->getType()))
1354 {
1355 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1356 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001357 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001358 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001359 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001360 return createXor(v, createConstantVector(c, T(v->getType())));
1361 }
1362}
1363
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001364static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1365{
1366#if defined(__i386__) || defined(__x86_64__)
1367 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1368 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1369 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1370#else
1371 if(atomic)
1372 {
1373 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1374 }
1375 if(memoryOrder != std::memory_order_relaxed)
1376 {
1377 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1378 }
1379#endif
1380
1381 // Vulkan doesn't allow sequential memory order
1382 ASSERT(memoryOrder != std::memory_order_seq_cst);
1383}
1384
Nicolas Capens157ba262019-12-10 17:49:14 -05001385Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1386{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001387 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001388 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001389
1390 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001391 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001392
Ben Clayton713b8d32019-12-17 20:37:56 +00001393 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001394 {
1395 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001396 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001397 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001398 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001399 auto pointer = RValue<Pointer<Byte>>(ptr);
1400 Int x = *Pointer<Int>(pointer);
1401
1402 Int4 vector;
1403 vector = Insert(vector, x, 0);
1404
Antonio Maiorano02a39532020-01-21 15:15:34 -05001405 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001406 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1407 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001408 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001409 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001410 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001411 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001412 auto pointer = RValue<Pointer<Byte>>(ptr);
1413 Int x = *Pointer<Int>(pointer);
1414 Int y = *Pointer<Int>(pointer + 4);
1415
1416 Int4 vector;
1417 vector = Insert(vector, x, 0);
1418 vector = Insert(vector, y, 1);
1419
Antonio Maiorano02a39532020-01-21 15:15:34 -05001420 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001421 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1422 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001423 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001424 else
1425 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001426 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001427 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001428 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001429 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Antonio Maiorano02a39532020-01-21 15:15:34 -05001430 result = ::function->makeVariable(T(type));
Nicolas Capens33a77f72021-02-08 15:04:38 -05001431 auto load = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001432 load->addArg(ptr);
1433 load->addArg(::context->getConstantInt32(typeSize(type)));
1434 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001435 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001436 }
1437 else
1438 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001439 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001440 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001441
Antonio Maiorano02a39532020-01-21 15:15:34 -05001442 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001443 return V(result);
1444}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001445
Nicolas Capens157ba262019-12-10 17:49:14 -05001446Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1447{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001448 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001449 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001450
Ben Clayton713b8d32019-12-17 20:37:56 +00001451#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001452 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001453 if(align != 0)
1454 {
1455 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1456 call->addArg(ptr);
1457 call->addArg(::context->getConstantInt64(typeSize(type)));
1458 ::basicBlock->appendInst(call);
1459 }
1460#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001461
Nicolas Capens157ba262019-12-10 17:49:14 -05001462 int valueType = (int)reinterpret_cast<intptr_t>(type);
1463
Ben Clayton713b8d32019-12-17 20:37:56 +00001464 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001465 {
1466 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001467 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001468 if(typeSize(type) == 4)
1469 {
1470 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1471 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1472 ::basicBlock->appendInst(bitcast);
1473
1474 RValue<Int4> v(V(vector));
1475
1476 auto pointer = RValue<Pointer<Byte>>(ptr);
1477 Int x = Extract(v, 0);
1478 *Pointer<Int>(pointer) = x;
1479 }
1480 else if(typeSize(type) == 8)
1481 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001482 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001483 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1484 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1485 ::basicBlock->appendInst(bitcast);
1486
1487 RValue<Int4> v(V(vector));
1488
1489 auto pointer = RValue<Pointer<Byte>>(ptr);
1490 Int x = Extract(v, 0);
1491 *Pointer<Int>(pointer) = x;
1492 Int y = Extract(v, 1);
1493 *Pointer<Int>(pointer + 4) = y;
1494 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001495 else
1496 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001497 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001498 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001499 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001500 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001501 auto store = Ice::InstIntrinsic::create(::function, 3, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05001502 store->addArg(value);
1503 store->addArg(ptr);
1504 store->addArg(::context->getConstantInt32(typeSize(type)));
1505 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001506 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001507 }
1508 else
1509 {
1510 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001511
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001512 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001513 ::basicBlock->appendInst(store);
1514 }
1515
1516 return value;
1517}
1518
1519Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1520{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001521 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001522 ASSERT(index->getType() == Ice::IceType_i32);
1523
1524 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1525 {
1526 int32_t offset = constant->getValue() * (int)typeSize(type);
1527
1528 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001529 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001530 return ptr;
1531 }
1532
Nicolas Capens157ba262019-12-10 17:49:14 -05001533 return createAdd(ptr, createConstantInt(offset));
1534 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001535
Nicolas Capens157ba262019-12-10 17:49:14 -05001536 if(!Ice::isByteSizedType(T(type)))
1537 {
1538 index = createMul(index, createConstantInt((int)typeSize(type)));
1539 }
1540
Ben Clayton713b8d32019-12-17 20:37:56 +00001541 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001542 {
1543 if(unsignedIndex)
1544 {
1545 index = createZExt(index, T(Ice::IceType_i64));
1546 }
1547 else
1548 {
1549 index = createSExt(index, T(Ice::IceType_i64));
1550 }
1551 }
1552
1553 return createAdd(ptr, index);
1554}
1555
Antonio Maiorano370cba52019-12-31 11:36:07 -05001556static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1557{
1558 Ice::Variable *result = ::function->makeVariable(value->getType());
1559
1560 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001561 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001562 auto op = ::context->getConstantInt32(rmwOp);
1563 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1564 inst->addArg(op);
1565 inst->addArg(ptr);
1566 inst->addArg(value);
1567 inst->addArg(order);
1568 ::basicBlock->appendInst(inst);
1569
1570 return V(result);
1571}
1572
Nicolas Capens157ba262019-12-10 17:49:14 -05001573Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1574{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001575 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001576 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001577}
1578
1579Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1580{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001581 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001582 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001583}
1584
1585Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1586{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001587 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001588 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001589}
1590
1591Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1592{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001593 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001594 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001595}
1596
1597Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1598{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001599 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001600 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001601}
1602
1603Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1604{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001605 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001606 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001607}
1608
1609Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1610{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001611 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001612 Ice::Variable *result = ::function->makeVariable(value->getType());
1613
1614 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens33a77f72021-02-08 15:04:38 -05001615 auto inst = Ice::InstIntrinsic::create(::function, 0, result, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05001616 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1617 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1618 inst->addArg(ptr);
1619 inst->addArg(compare);
1620 inst->addArg(value);
1621 inst->addArg(orderEq);
1622 inst->addArg(orderNeq);
1623 ::basicBlock->appendInst(inst);
1624
1625 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001626}
1627
1628static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1629{
1630 if(v->getType() == T(destType))
1631 {
1632 return v;
1633 }
1634
1635 Ice::Variable *result = ::function->makeVariable(T(destType));
1636 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1637 ::basicBlock->appendInst(cast);
1638
1639 return V(result);
1640}
1641
1642Value *Nucleus::createTrunc(Value *v, Type *destType)
1643{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001644 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001645 return createCast(Ice::InstCast::Trunc, v, destType);
1646}
1647
1648Value *Nucleus::createZExt(Value *v, Type *destType)
1649{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001650 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001651 return createCast(Ice::InstCast::Zext, v, destType);
1652}
1653
1654Value *Nucleus::createSExt(Value *v, Type *destType)
1655{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001656 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001657 return createCast(Ice::InstCast::Sext, v, destType);
1658}
1659
1660Value *Nucleus::createFPToUI(Value *v, Type *destType)
1661{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001662 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001663 return createCast(Ice::InstCast::Fptoui, v, destType);
1664}
1665
1666Value *Nucleus::createFPToSI(Value *v, Type *destType)
1667{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001668 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001669 return createCast(Ice::InstCast::Fptosi, v, destType);
1670}
1671
1672Value *Nucleus::createSIToFP(Value *v, Type *destType)
1673{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001674 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001675 return createCast(Ice::InstCast::Sitofp, v, destType);
1676}
1677
1678Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1679{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001680 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001681 return createCast(Ice::InstCast::Fptrunc, v, destType);
1682}
1683
1684Value *Nucleus::createFPExt(Value *v, Type *destType)
1685{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001686 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001687 return createCast(Ice::InstCast::Fpext, v, destType);
1688}
1689
1690Value *Nucleus::createBitCast(Value *v, Type *destType)
1691{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001692 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001693 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1694 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1695 // emulate them by writing to the stack and reading back as the destination type.
1696 if(emulateMismatchedBitCast)
1697 {
1698 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1699 {
1700 Value *address = allocateStackVariable(destType);
1701 createStore(v, address, T(v->getType()));
1702 return createLoad(address, destType);
1703 }
1704 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1705 {
1706 Value *address = allocateStackVariable(T(v->getType()));
1707 createStore(v, address, T(v->getType()));
1708 return createLoad(address, destType);
1709 }
1710 }
1711
1712 return createCast(Ice::InstCast::Bitcast, v, destType);
1713}
1714
1715static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1716{
1717 ASSERT(lhs->getType() == rhs->getType());
1718
1719 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1720 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1721 ::basicBlock->appendInst(cmp);
1722
1723 return V(result);
1724}
1725
Nicolas Capens157ba262019-12-10 17:49:14 -05001726Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1727{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001728 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001729 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1730}
1731
1732Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1733{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001734 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001735 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1736}
1737
1738Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1739{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001740 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001741 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1742}
1743
1744Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1745{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001746 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001747 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1748}
1749
1750Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1751{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001752 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001753 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1754}
1755
1756Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1757{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001758 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001759 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1760}
1761
1762Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1763{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001764 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001765 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1766}
1767
1768Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1769{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001770 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001771 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1772}
1773
1774Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1775{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001776 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001777 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1778}
1779
1780Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1781{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001782 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001783 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1784}
1785
1786static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1787{
1788 ASSERT(lhs->getType() == rhs->getType());
1789 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1790
1791 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1792 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1793 ::basicBlock->appendInst(cmp);
1794
1795 return V(result);
1796}
1797
1798Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1799{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001800 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001801 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1802}
1803
1804Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1805{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001806 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001807 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1808}
1809
1810Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1811{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001812 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001813 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1814}
1815
1816Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1817{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001818 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001819 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1820}
1821
1822Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1823{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001824 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001825 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1826}
1827
1828Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1829{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001830 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001831 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1832}
1833
1834Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1835{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001836 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001837 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1838}
1839
1840Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1841{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001842 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001843 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1844}
1845
1846Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1847{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001848 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001849 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1850}
1851
1852Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1853{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001854 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001855 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1856}
1857
1858Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1859{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001860 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001861 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1862}
1863
1864Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1865{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001866 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001867 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1868}
1869
1870Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1871{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001872 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001873 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1874}
1875
1876Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1877{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001878 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001879 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1880}
1881
1882Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1883{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001884 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001885 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001886 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001887 ::basicBlock->appendInst(extract);
1888
1889 return V(result);
1890}
1891
1892Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1893{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001894 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001895 auto result = ::function->makeVariable(vector->getType());
1896 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1897 ::basicBlock->appendInst(insert);
1898
1899 return V(result);
1900}
1901
1902Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1903{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001904 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001905 ASSERT(V1->getType() == V2->getType());
1906
1907 int size = Ice::typeNumElements(V1->getType());
1908 auto result = ::function->makeVariable(V1->getType());
1909 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1910
1911 for(int i = 0; i < size; i++)
1912 {
1913 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1914 }
1915
1916 ::basicBlock->appendInst(shuffle);
1917
1918 return V(result);
1919}
1920
1921Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1922{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001923 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001924 ASSERT(ifTrue->getType() == ifFalse->getType());
1925
1926 auto result = ::function->makeVariable(ifTrue->getType());
1927 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1928 ::basicBlock->appendInst(select);
1929
1930 return V(result);
1931}
1932
1933SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1934{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001935 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001936 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1937 ::basicBlock->appendInst(switchInst);
1938
Ben Clayton713b8d32019-12-17 20:37:56 +00001939 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001940}
1941
1942void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1943{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001944 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001945 switchCases->addBranch(label, label, branch);
1946}
1947
1948void Nucleus::createUnreachable()
1949{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001950 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001951 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1952 ::basicBlock->appendInst(unreachable);
1953}
1954
Antonio Maiorano62427e02020-02-13 09:18:05 -05001955Type *Nucleus::getType(Value *value)
1956{
1957 return T(V(value)->getType());
1958}
1959
1960Type *Nucleus::getContainedType(Type *vectorType)
1961{
1962 Ice::Type vecTy = T(vectorType);
1963 switch(vecTy)
1964 {
1965 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1966 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1967 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1968 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1969 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1970 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1971 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1972 default:
1973 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1974 return {};
1975 }
1976}
1977
Nicolas Capens157ba262019-12-10 17:49:14 -05001978Type *Nucleus::getPointerType(Type *ElementType)
1979{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001980 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001981}
1982
Antonio Maiorano62427e02020-02-13 09:18:05 -05001983static constexpr Ice::Type getNaturalIntType()
1984{
1985 constexpr size_t intSize = sizeof(int);
1986 static_assert(intSize == 4 || intSize == 8, "");
1987 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1988}
1989
1990Type *Nucleus::getPrintfStorageType(Type *valueType)
1991{
1992 Ice::Type valueTy = T(valueType);
1993 switch(valueTy)
1994 {
1995 case Ice::IceType_i32:
1996 return T(getNaturalIntType());
1997
1998 case Ice::IceType_f32:
1999 return T(Ice::IceType_f64);
2000
2001 default:
2002 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
2003 return {};
2004 }
2005}
2006
Nicolas Capens157ba262019-12-10 17:49:14 -05002007Value *Nucleus::createNullValue(Type *Ty)
2008{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002009 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002010 if(Ice::isVectorType(T(Ty)))
2011 {
2012 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002013 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002014 return createConstantVector(c, Ty);
2015 }
2016 else
2017 {
2018 return V(::context->getConstantZero(T(Ty)));
2019 }
2020}
2021
2022Value *Nucleus::createConstantLong(int64_t i)
2023{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002024 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002025 return V(::context->getConstantInt64(i));
2026}
2027
2028Value *Nucleus::createConstantInt(int i)
2029{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002030 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002031 return V(::context->getConstantInt32(i));
2032}
2033
2034Value *Nucleus::createConstantInt(unsigned int i)
2035{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002036 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002037 return V(::context->getConstantInt32(i));
2038}
2039
2040Value *Nucleus::createConstantBool(bool b)
2041{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002042 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002043 return V(::context->getConstantInt1(b));
2044}
2045
2046Value *Nucleus::createConstantByte(signed char i)
2047{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002048 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002049 return V(::context->getConstantInt8(i));
2050}
2051
2052Value *Nucleus::createConstantByte(unsigned char i)
2053{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002054 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002055 return V(::context->getConstantInt8(i));
2056}
2057
2058Value *Nucleus::createConstantShort(short i)
2059{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002060 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002061 return V(::context->getConstantInt16(i));
2062}
2063
2064Value *Nucleus::createConstantShort(unsigned short i)
2065{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002066 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002067 return V(::context->getConstantInt16(i));
2068}
2069
2070Value *Nucleus::createConstantFloat(float x)
2071{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002072 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002073 return V(::context->getConstantFloat(x));
2074}
2075
2076Value *Nucleus::createNullPointer(Type *Ty)
2077{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002078 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002079 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002080}
2081
Antonio Maiorano02a39532020-01-21 15:15:34 -05002082static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2083{
2084 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2085}
2086
Nicolas Capens157ba262019-12-10 17:49:14 -05002087Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2088{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002089 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002090 const int vectorSize = 16;
2091 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2092 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002093
2094 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002095 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002096
Antonio Maioranoa0957112020-03-04 15:06:19 -05002097 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002098 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002099
2100 switch((int)reinterpret_cast<intptr_t>(type))
2101 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002102 case Ice::IceType_v4i32:
2103 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002104 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002105 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002106 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002107 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002108 }
2109 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002110 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002111 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002112 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002113 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002114 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002115 }
2116 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002117 case Ice::IceType_v8i16:
2118 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002119 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002120 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002121 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002122 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002123 }
2124 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002125 case Ice::IceType_v16i8:
2126 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002127 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002128 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002129 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002130 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002131 }
2132 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002133 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002134 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002135 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002136 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002137 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002138 }
2139 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002140 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002141 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002142 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002143 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002144 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002145 }
2146 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002147 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002148 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002149 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002150 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002151 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002152 }
2153 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002154 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002155 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002156 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002157 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002158 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002159 }
2160 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002161 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002162 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002163 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002164 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002165 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002166 }
2167 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002168 default:
2169 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002170 }
2171
Antonio Maiorano02a39532020-01-21 15:15:34 -05002172 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002173
Antonio Maiorano02a39532020-01-21 15:15:34 -05002174 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002175 return V(result);
2176}
2177
2178Value *Nucleus::createConstantVector(const double *constants, Type *type)
2179{
Ben Clayton713b8d32019-12-17 20:37:56 +00002180 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002181}
2182
Antonio Maiorano62427e02020-02-13 09:18:05 -05002183Value *Nucleus::createConstantString(const char *v)
2184{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002185 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002186 return V(IceConstantData(v, strlen(v) + 1));
2187}
2188
Nicolas Capens519cf222020-05-08 15:27:19 -04002189Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002190{
2191 return T(Ice::IceType_void);
2192}
2193
Nicolas Capens519cf222020-05-08 15:27:19 -04002194Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002195{
2196 return T(Ice::IceType_i1);
2197}
2198
Nicolas Capens519cf222020-05-08 15:27:19 -04002199Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002200{
2201 return T(Ice::IceType_i8);
2202}
2203
Nicolas Capens519cf222020-05-08 15:27:19 -04002204Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002205{
2206 return T(Ice::IceType_i8);
2207}
2208
Nicolas Capens519cf222020-05-08 15:27:19 -04002209Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002210{
2211 return T(Ice::IceType_i16);
2212}
2213
Nicolas Capens519cf222020-05-08 15:27:19 -04002214Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002215{
2216 return T(Ice::IceType_i16);
2217}
2218
Nicolas Capens519cf222020-05-08 15:27:19 -04002219Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002220{
2221 return T(Type_v4i8);
2222}
2223
Nicolas Capens519cf222020-05-08 15:27:19 -04002224Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002225{
2226 return T(Type_v4i8);
2227}
2228
Ben Clayton713b8d32019-12-17 20:37:56 +00002229namespace {
2230RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002231{
Ben Clayton713b8d32019-12-17 20:37:56 +00002232 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002233}
2234
Ben Clayton713b8d32019-12-17 20:37:56 +00002235RValue<Byte> Extract(RValue<Byte8> val, int i)
2236{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002237 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002238}
2239
2240RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2241{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002242 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002243}
2244} // namespace
2245
Nicolas Capens157ba262019-12-10 17:49:14 -05002246RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2247{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002248 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002249 if(emulateIntrinsics)
2250 {
2251 Byte8 result;
2252 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2253 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2254 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2255 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2256 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2257 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2258 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2259 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2260
2261 return result;
2262 }
2263 else
2264 {
2265 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002266 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002267 auto paddusb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002268 paddusb->addArg(x.value());
2269 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002270 ::basicBlock->appendInst(paddusb);
2271
2272 return RValue<Byte8>(V(result));
2273 }
2274}
2275
2276RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2277{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002278 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002279 if(emulateIntrinsics)
2280 {
2281 Byte8 result;
2282 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2283 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2284 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2285 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2286 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2287 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2288 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2289 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2290
2291 return result;
2292 }
2293 else
2294 {
2295 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002296 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002297 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002298 psubusw->addArg(x.value());
2299 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002300 ::basicBlock->appendInst(psubusw);
2301
2302 return RValue<Byte8>(V(result));
2303 }
2304}
2305
2306RValue<SByte> Extract(RValue<SByte8> val, int i)
2307{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002308 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002309 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002310}
2311
2312RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2313{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002314 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002315 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002316}
2317
2318RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2319{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002320 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002321 if(emulateIntrinsics)
2322 {
2323 SByte8 result;
2324 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2325 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2326 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2327 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2328 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2329 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2330 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2331 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2332
2333 return result;
2334 }
2335 else
2336 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002337#if defined(__i386__) || defined(__x86_64__)
2338 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2339 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2340 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002341
Ben Clayton713b8d32019-12-17 20:37:56 +00002342 return As<SByte8>(hi | lo);
2343#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002344 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002345#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002346 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002347}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002348
Nicolas Capens157ba262019-12-10 17:49:14 -05002349RValue<Int> SignMask(RValue<Byte8> x)
2350{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002351 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002352 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002353 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002354 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2355 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002356 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002357 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002358 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002359 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002360 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002361 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002362 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002363 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002364
Nicolas Capens157ba262019-12-10 17:49:14 -05002365 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002366 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002367}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002368
2369// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2370// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002371// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002372// }
2373
Nicolas Capens157ba262019-12-10 17:49:14 -05002374RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2375{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002376 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002377 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002378}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002379
Nicolas Capens519cf222020-05-08 15:27:19 -04002380Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002381{
2382 return T(Type_v8i8);
2383}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002384
Nicolas Capens598f8d82016-09-26 15:09:10 -04002385// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2386// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002387// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002388// }
2389
2390// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2391// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002392// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002393// }
2394
Nicolas Capens157ba262019-12-10 17:49:14 -05002395RValue<SByte> SaturateSigned(RValue<Short> x)
2396{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002397 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002398 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2399}
2400
2401RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2402{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002403 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002404 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002405 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002406 SByte8 result;
2407 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2408 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2409 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2410 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2411 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2412 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2413 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2414 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002415
Nicolas Capens157ba262019-12-10 17:49:14 -05002416 return result;
2417 }
2418 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002419 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002420 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002421 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002422 auto paddsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002423 paddsb->addArg(x.value());
2424 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002425 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002426
Nicolas Capens157ba262019-12-10 17:49:14 -05002427 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002428 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002429}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002430
Nicolas Capens157ba262019-12-10 17:49:14 -05002431RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2432{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002433 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002434 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002435 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002436 SByte8 result;
2437 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2438 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2439 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2440 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2441 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2442 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2443 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2444 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002445
Nicolas Capens157ba262019-12-10 17:49:14 -05002446 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002447 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002448 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002449 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002450 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002451 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002452 auto psubsb = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002453 psubsb->addArg(x.value());
2454 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002455 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002456
Nicolas Capens157ba262019-12-10 17:49:14 -05002457 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002458 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002459}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002460
Nicolas Capens157ba262019-12-10 17:49:14 -05002461RValue<Int> SignMask(RValue<SByte8> x)
2462{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002463 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002464 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002465 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002466 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2467 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002468 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002469 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002470 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002471 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002472 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002473 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002474 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002475 ::basicBlock->appendInst(movmsk);
2476
2477 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002478 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002479}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002480
Nicolas Capens157ba262019-12-10 17:49:14 -05002481RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2482{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002483 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002484 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002485}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002486
Nicolas Capens157ba262019-12-10 17:49:14 -05002487RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2488{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002489 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002490 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002491}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002492
Nicolas Capens519cf222020-05-08 15:27:19 -04002493Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002494{
2495 return T(Type_v8i8);
2496}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002497
Nicolas Capens519cf222020-05-08 15:27:19 -04002498Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002499{
2500 return T(Ice::IceType_v16i8);
2501}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002502
Nicolas Capens519cf222020-05-08 15:27:19 -04002503Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002504{
2505 return T(Ice::IceType_v16i8);
2506}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002507
Nicolas Capens519cf222020-05-08 15:27:19 -04002508Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002509{
2510 return T(Type_v2i16);
2511}
Nicolas Capensd4227962016-11-09 14:24:25 -05002512
Nicolas Capens519cf222020-05-08 15:27:19 -04002513Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002514{
2515 return T(Type_v2i16);
2516}
Nicolas Capensd4227962016-11-09 14:24:25 -05002517
Nicolas Capens157ba262019-12-10 17:49:14 -05002518Short4::Short4(RValue<Int4> cast)
2519{
Ben Clayton713b8d32019-12-17 20:37:56 +00002520 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002521 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002522 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2523
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002524 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002525 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002526
2527 storeValue(short4);
2528}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002529
2530// Short4::Short4(RValue<Float> cast)
2531// {
2532// }
2533
Nicolas Capens157ba262019-12-10 17:49:14 -05002534Short4::Short4(RValue<Float4> cast)
2535{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002536 // TODO(b/150791192): Generalize and optimize
2537 auto smin = std::numeric_limits<short>::min();
2538 auto smax = std::numeric_limits<short>::max();
2539 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002540}
2541
2542RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2543{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002544 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002545 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002546 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002547 Short4 result;
2548 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2549 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2550 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2551 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002552
2553 return result;
2554 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002555 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002556 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002557 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002558 }
2559}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002560
Nicolas Capens157ba262019-12-10 17:49:14 -05002561RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2562{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002563 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002564 if(emulateIntrinsics)
2565 {
2566 Short4 result;
2567 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2568 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2569 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2570 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2571
2572 return result;
2573 }
2574 else
2575 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002576 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002577 }
2578}
2579
2580RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2581{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002582 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002583 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002584 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002585 ::basicBlock->appendInst(cmp);
2586
2587 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002588 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002589 ::basicBlock->appendInst(select);
2590
2591 return RValue<Short4>(V(result));
2592}
2593
2594RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2595{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002596 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002597 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002598 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002599 ::basicBlock->appendInst(cmp);
2600
2601 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002602 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002603 ::basicBlock->appendInst(select);
2604
2605 return RValue<Short4>(V(result));
2606}
2607
2608RValue<Short> SaturateSigned(RValue<Int> x)
2609{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002610 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002611 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2612}
2613
2614RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2615{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002616 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002617 if(emulateIntrinsics)
2618 {
2619 Short4 result;
2620 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2621 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2622 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2623 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2624
2625 return result;
2626 }
2627 else
2628 {
2629 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002630 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002631 auto paddsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002632 paddsw->addArg(x.value());
2633 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002634 ::basicBlock->appendInst(paddsw);
2635
2636 return RValue<Short4>(V(result));
2637 }
2638}
2639
2640RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2641{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002642 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002643 if(emulateIntrinsics)
2644 {
2645 Short4 result;
2646 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2647 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2648 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2649 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2650
2651 return result;
2652 }
2653 else
2654 {
2655 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002656 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002657 auto psubsw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002658 psubsw->addArg(x.value());
2659 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002660 ::basicBlock->appendInst(psubsw);
2661
2662 return RValue<Short4>(V(result));
2663 }
2664}
2665
2666RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2667{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002668 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002669 if(emulateIntrinsics)
2670 {
2671 Short4 result;
2672 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2673 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2674 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2675 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2676
2677 return result;
2678 }
2679 else
2680 {
2681 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002682 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002683 auto pmulhw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002684 pmulhw->addArg(x.value());
2685 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002686 ::basicBlock->appendInst(pmulhw);
2687
2688 return RValue<Short4>(V(result));
2689 }
2690}
2691
2692RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2693{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002694 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002695 if(emulateIntrinsics)
2696 {
2697 Int2 result;
2698 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2699 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2700
2701 return result;
2702 }
2703 else
2704 {
2705 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002706 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002707 auto pmaddwd = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002708 pmaddwd->addArg(x.value());
2709 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002710 ::basicBlock->appendInst(pmaddwd);
2711
2712 return As<Int2>(V(result));
2713 }
2714}
2715
2716RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2717{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002718 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002719 if(emulateIntrinsics)
2720 {
2721 SByte8 result;
2722 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2723 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2724 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2725 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2726 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2727 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2728 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2729 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2730
2731 return result;
2732 }
2733 else
2734 {
2735 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002736 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002737 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002738 pack->addArg(x.value());
2739 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002740 ::basicBlock->appendInst(pack);
2741
2742 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2743 }
2744}
2745
2746RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2747{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002748 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002749 if(emulateIntrinsics)
2750 {
2751 Byte8 result;
2752 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2753 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2754 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2755 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2756 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2757 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2758 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2759 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2760
2761 return result;
2762 }
2763 else
2764 {
2765 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002766 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002767 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002768 pack->addArg(x.value());
2769 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002770 ::basicBlock->appendInst(pack);
2771
2772 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2773 }
2774}
2775
2776RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2777{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002778 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002779 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002780}
2781
2782RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2783{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002784 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002785 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002786}
2787
Nicolas Capens519cf222020-05-08 15:27:19 -04002788Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002789{
2790 return T(Type_v4i16);
2791}
2792
2793UShort4::UShort4(RValue<Float4> cast, bool saturate)
2794{
2795 if(saturate)
2796 {
2797 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002798 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002799 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2800 // PackUnsigned takes care of 0x0000 saturation.
2801 Int4 int4(Min(cast, Float4(0xFFFF)));
2802 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002803 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002804 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002805 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002806 // ARM saturates the 32-bit integer result on overflow/undeflow.
2807 Int4 int4(cast);
2808 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002809 }
2810 else
2811 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002812 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002813 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002814 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002815 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002816 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002817 *this = Short4(Int4(cast));
2818 }
2819}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002820
Nicolas Capens157ba262019-12-10 17:49:14 -05002821RValue<UShort> Extract(RValue<UShort4> val, int i)
2822{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002823 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002824}
2825
2826RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2827{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002828 return RValue<UShort4>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002829}
2830
2831RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2832{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002833 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002834 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002835
Nicolas Capens157ba262019-12-10 17:49:14 -05002836 {
2837 UShort4 result;
2838 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2839 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2840 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2841 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2842
2843 return result;
2844 }
2845 else
2846 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002847 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002848 }
2849}
2850
2851RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002854 if(emulateIntrinsics)
2855 {
2856 UShort4 result;
2857 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2858 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2859 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2860 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2861
2862 return result;
2863 }
2864 else
2865 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002866 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002867 }
2868}
2869
2870RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2871{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002872 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002873 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002874 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002875 ::basicBlock->appendInst(cmp);
2876
2877 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002878 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002879 ::basicBlock->appendInst(select);
2880
2881 return RValue<UShort4>(V(result));
2882}
2883
2884RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2885{
2886 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002887 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002888 ::basicBlock->appendInst(cmp);
2889
2890 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002891 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002892 ::basicBlock->appendInst(select);
2893
2894 return RValue<UShort4>(V(result));
2895}
2896
2897RValue<UShort> SaturateUnsigned(RValue<Int> x)
2898{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002899 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002900 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2901}
2902
2903RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2904{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002905 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002906 if(emulateIntrinsics)
2907 {
2908 UShort4 result;
2909 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2910 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2911 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2912 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2913
2914 return result;
2915 }
2916 else
2917 {
2918 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002919 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002920 auto paddusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002921 paddusw->addArg(x.value());
2922 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002923 ::basicBlock->appendInst(paddusw);
2924
2925 return RValue<UShort4>(V(result));
2926 }
2927}
2928
2929RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002932 if(emulateIntrinsics)
2933 {
2934 UShort4 result;
2935 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2936 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2937 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2938 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2939
2940 return result;
2941 }
2942 else
2943 {
2944 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002945 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002946 auto psubusw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002947 psubusw->addArg(x.value());
2948 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002949 ::basicBlock->appendInst(psubusw);
2950
2951 return RValue<UShort4>(V(result));
2952 }
2953}
2954
2955RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2956{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002957 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002958 if(emulateIntrinsics)
2959 {
2960 UShort4 result;
2961 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2962 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2963 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2964 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2965
2966 return result;
2967 }
2968 else
2969 {
2970 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002971 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05002972 auto pmulhuw = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002973 pmulhuw->addArg(x.value());
2974 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002975 ::basicBlock->appendInst(pmulhuw);
2976
2977 return RValue<UShort4>(V(result));
2978 }
2979}
2980
2981RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2982{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002983 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002984 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2985
2986 // Scalarized implementation.
2987 Int4 result;
2988 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2989 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2990 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2991 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2992
2993 return result;
2994}
2995
2996RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2997{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002998 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002999 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3000
3001 if(false) // Partial product based implementation.
3002 {
3003 auto xh = x >> 16;
3004 auto yh = y >> 16;
3005 auto xl = x & UInt4(0x0000FFFF);
3006 auto yl = y & UInt4(0x0000FFFF);
3007 auto xlyh = xl * yh;
3008 auto xhyl = xh * yl;
3009 auto xlyhh = xlyh >> 16;
3010 auto xhylh = xhyl >> 16;
3011 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3012 auto xhyll = xhyl & UInt4(0x0000FFFF);
3013 auto xlylh = (xl * yl) >> 16;
3014 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3015
3016 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003017 }
3018
Nicolas Capens157ba262019-12-10 17:49:14 -05003019 // Scalarized implementation.
3020 Int4 result;
3021 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3022 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3023 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3024 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3025
3026 return As<UInt4>(result);
3027}
3028
3029RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3030{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003031 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003032 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003033 return UShort4(0);
3034}
3035
Nicolas Capens519cf222020-05-08 15:27:19 -04003036Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003037{
3038 return T(Type_v4i16);
3039}
3040
3041RValue<Short> Extract(RValue<Short8> val, int i)
3042{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003043 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003044 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003045}
3046
3047RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3048{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003049 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003050 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003051}
3052
3053RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3054{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003055 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003056 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003057 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003058 Short8 result;
3059 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3060 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3061 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3062 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3063 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3064 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3065 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3066 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003067
Nicolas Capens157ba262019-12-10 17:49:14 -05003068 return result;
3069 }
3070 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003071 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003072 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003073 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003074}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003075
Nicolas Capens157ba262019-12-10 17:49:14 -05003076RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3077{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003078 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003079 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003080 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003081 Short8 result;
3082 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3083 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3084 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3085 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3086 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3087 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3088 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3089 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003090
Nicolas Capens157ba262019-12-10 17:49:14 -05003091 return result;
3092 }
3093 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003094 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003095 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003096 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003097}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003098
Nicolas Capens157ba262019-12-10 17:49:14 -05003099RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3100{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003101 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003102 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003103 return Int4(0);
3104}
3105
3106RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3107{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003108 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003109 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003110 return Short8(0);
3111}
3112
Nicolas Capens519cf222020-05-08 15:27:19 -04003113Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003114{
3115 return T(Ice::IceType_v8i16);
3116}
3117
3118RValue<UShort> Extract(RValue<UShort8> val, int i)
3119{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003120 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003121 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003122}
3123
3124RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3125{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003126 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003127 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003128}
3129
3130RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3131{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003132 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003133 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003134 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003135 UShort8 result;
3136 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3137 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3138 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3139 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3140 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3141 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3142 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3143 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003144
Nicolas Capens157ba262019-12-10 17:49:14 -05003145 return result;
3146 }
3147 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003148 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003149 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003150 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003151}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003152
Nicolas Capens157ba262019-12-10 17:49:14 -05003153RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3154{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003155 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003156 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003157 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003158 UShort8 result;
3159 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3160 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3161 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3162 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3163 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3164 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3165 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3166 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003167
Nicolas Capens157ba262019-12-10 17:49:14 -05003168 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003169 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003170 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003171 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003172 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003173 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003174}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003175
Nicolas Capens157ba262019-12-10 17:49:14 -05003176RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3177{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003178 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003179 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003180 return UShort8(0);
3181}
3182
Nicolas Capens519cf222020-05-08 15:27:19 -04003183Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003184{
3185 return T(Ice::IceType_v8i16);
3186}
3187
Ben Clayton713b8d32019-12-17 20:37:56 +00003188RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003190 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003191 RValue<Int> res = val;
3192 val += 1;
3193 return res;
3194}
3195
Ben Clayton713b8d32019-12-17 20:37:56 +00003196const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003197{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003198 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003199 val += 1;
3200 return val;
3201}
3202
Ben Clayton713b8d32019-12-17 20:37:56 +00003203RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003204{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003205 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003206 RValue<Int> res = val;
3207 val -= 1;
3208 return res;
3209}
3210
Ben Clayton713b8d32019-12-17 20:37:56 +00003211const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003212{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003213 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003214 val -= 1;
3215 return val;
3216}
3217
3218RValue<Int> RoundInt(RValue<Float> cast)
3219{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003220 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003221 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003222 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003223 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3224 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003225 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003226 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003227 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003228 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003229 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003230 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003231 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003232 ::basicBlock->appendInst(nearbyint);
3233
3234 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003235 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003236}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003237
Nicolas Capens519cf222020-05-08 15:27:19 -04003238Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003239{
3240 return T(Ice::IceType_i32);
3241}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003242
Nicolas Capens519cf222020-05-08 15:27:19 -04003243Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003244{
3245 return T(Ice::IceType_i64);
3246}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003247
Nicolas Capens157ba262019-12-10 17:49:14 -05003248UInt::UInt(RValue<Float> cast)
3249{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003250 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003251 // Smallest positive value representable in UInt, but not in Int
3252 const unsigned int ustart = 0x80000000u;
3253 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003254
Nicolas Capens157ba262019-12-10 17:49:14 -05003255 // If the value is negative, store 0, otherwise store the result of the conversion
3256 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003257 // Check if the value can be represented as an Int
3258 IfThenElse(cast >= ustartf,
3259 // If the value is too large, subtract ustart and re-add it after conversion.
3260 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3261 // Otherwise, just convert normally
3262 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003263 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003264}
Nicolas Capensa8086512016-11-07 17:32:17 -05003265
Ben Clayton713b8d32019-12-17 20:37:56 +00003266RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003268 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003269 RValue<UInt> res = val;
3270 val += 1;
3271 return res;
3272}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003273
Ben Clayton713b8d32019-12-17 20:37:56 +00003274const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003277 val += 1;
3278 return val;
3279}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003280
Ben Clayton713b8d32019-12-17 20:37:56 +00003281RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003282{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003283 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003284 RValue<UInt> res = val;
3285 val -= 1;
3286 return res;
3287}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003288
Ben Clayton713b8d32019-12-17 20:37:56 +00003289const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003292 val -= 1;
3293 return val;
3294}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003295
Nicolas Capens598f8d82016-09-26 15:09:10 -04003296// RValue<UInt> RoundUInt(RValue<Float> cast)
3297// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003298// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003299// }
3300
Nicolas Capens519cf222020-05-08 15:27:19 -04003301Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003302{
3303 return T(Ice::IceType_i32);
3304}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003305
3306// Int2::Int2(RValue<Int> cast)
3307// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003308// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003309// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003310//
3311// Constant *shuffle[2];
3312// shuffle[0] = Nucleus::createConstantInt(0);
3313// shuffle[1] = Nucleus::createConstantInt(0);
3314//
Nicolas Capens519cf222020-05-08 15:27:19 -04003315// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003316//
3317// storeValue(replicate);
3318// }
3319
Nicolas Capens157ba262019-12-10 17:49:14 -05003320RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3321{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003322 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003323 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003324 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003325 Int2 result;
3326 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3327 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003328
Nicolas Capens157ba262019-12-10 17:49:14 -05003329 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003330 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003331 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003332 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003333 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003334 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003335}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003336
Nicolas Capens157ba262019-12-10 17:49:14 -05003337RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3338{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003339 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003340 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003341 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003342 Int2 result;
3343 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3344 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3345
3346 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003347 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003348 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003349 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003350 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003351 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003352}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003353
Nicolas Capens519cf222020-05-08 15:27:19 -04003354Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003355{
3356 return T(Type_v2i32);
3357}
3358
3359RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3360{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003361 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003362 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003363 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003364 UInt2 result;
3365 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3366 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003367
Nicolas Capens157ba262019-12-10 17:49:14 -05003368 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003369 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003370 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003371 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003372 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003373 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003374}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003375
Nicolas Capens157ba262019-12-10 17:49:14 -05003376RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3377{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003378 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003379 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003380 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003381 UInt2 result;
3382 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3383 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003384
Nicolas Capens157ba262019-12-10 17:49:14 -05003385 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003386 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003387 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003388 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003389 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003390 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003391}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003392
Nicolas Capens519cf222020-05-08 15:27:19 -04003393Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003394{
3395 return T(Type_v2i32);
3396}
3397
Ben Clayton713b8d32019-12-17 20:37:56 +00003398Int4::Int4(RValue<Byte4> cast)
3399 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003400{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003401 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003402 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003403 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3404
3405 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003406 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003407 Value *b = Nucleus::createBitCast(a, Byte16::type());
3408 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003409
Ben Clayton713b8d32019-12-17 20:37:56 +00003410 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003411 Value *d = Nucleus::createBitCast(c, Short8::type());
3412 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003413
Nicolas Capens519cf222020-05-08 15:27:19 -04003414 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003415 storeValue(f);
3416}
3417
Ben Clayton713b8d32019-12-17 20:37:56 +00003418Int4::Int4(RValue<SByte4> cast)
3419 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003420{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003421 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003422 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003423 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3424
Ben Clayton713b8d32019-12-17 20:37:56 +00003425 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003426 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003427 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3428
Ben Clayton713b8d32019-12-17 20:37:56 +00003429 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003430 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003431 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3432
3433 *this = As<Int4>(e) >> 24;
3434}
3435
Ben Clayton713b8d32019-12-17 20:37:56 +00003436Int4::Int4(RValue<Short4> cast)
3437 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003438{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003439 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003440 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003441 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003442
3443 *this = As<Int4>(c) >> 16;
3444}
3445
Ben Clayton713b8d32019-12-17 20:37:56 +00003446Int4::Int4(RValue<UShort4> cast)
3447 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003448{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003449 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003450 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003451 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003452 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003453 storeValue(d);
3454}
3455
Ben Clayton713b8d32019-12-17 20:37:56 +00003456Int4::Int4(RValue<Int> rhs)
3457 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003458{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003459 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003460 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003461
Ben Clayton713b8d32019-12-17 20:37:56 +00003462 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003463 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3464
3465 storeValue(replicate);
3466}
3467
3468RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3469{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003470 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003471 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003472 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003473 Int4 result;
3474 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3475 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3476 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3477 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003478
Nicolas Capens157ba262019-12-10 17:49:14 -05003479 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003480 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003481 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003482 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003483 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003484 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003485}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003486
Nicolas Capens157ba262019-12-10 17:49:14 -05003487RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3488{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003489 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003490 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003491 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003492 Int4 result;
3493 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3494 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3495 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3496 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003497
Nicolas Capens157ba262019-12-10 17:49:14 -05003498 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003499 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003500 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003501 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003502 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003503 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003504}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003505
Nicolas Capens157ba262019-12-10 17:49:14 -05003506RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3507{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003508 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003509 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003510}
3511
3512RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3513{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003514 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003515 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003516}
3517
3518RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3519{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003520 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003521 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003522}
3523
3524RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3525{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003526 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003527 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003528}
3529
3530RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3531{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003532 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003533 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003534}
3535
3536RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3537{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003538 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003539 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003540}
3541
3542RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3543{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003544 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003545 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003546 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003547 ::basicBlock->appendInst(cmp);
3548
3549 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003550 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003551 ::basicBlock->appendInst(select);
3552
3553 return RValue<Int4>(V(result));
3554}
3555
3556RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3557{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003558 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003559 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003560 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003561 ::basicBlock->appendInst(cmp);
3562
3563 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003564 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003565 ::basicBlock->appendInst(select);
3566
3567 return RValue<Int4>(V(result));
3568}
3569
3570RValue<Int4> RoundInt(RValue<Float4> cast)
3571{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003572 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003573 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003574 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003575 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3576 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003577 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003578 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003579 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003580 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003581 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003582 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003583 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003584 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003585
3586 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003587 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003588}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003589
Nicolas Capenseeb81842021-01-12 17:44:40 -05003590RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3591{
3592 RR_DEBUG_INFO_UPDATE_LOC();
3593
3594 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3595 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3596 // saturate to 0x80000000.
3597 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3598
3599 if(emulateIntrinsics || CPUID::ARM)
3600 {
3601 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3602 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3603 }
3604 else
3605 {
3606 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3607 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003608 auto nearbyint = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capenseeb81842021-01-12 17:44:40 -05003609 nearbyint->addArg(clamped.value());
3610 ::basicBlock->appendInst(nearbyint);
3611
3612 return RValue<Int4>(V(result));
3613 }
3614}
3615
Nicolas Capens157ba262019-12-10 17:49:14 -05003616RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3617{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003618 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003619 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003620 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003621 Short8 result;
3622 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3623 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3624 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3625 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3626 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3627 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3628 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3629 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003630
Nicolas Capens157ba262019-12-10 17:49:14 -05003631 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003632 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003633 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003634 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003635 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003636 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003637 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003638 pack->addArg(x.value());
3639 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003640 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003641
Nicolas Capens157ba262019-12-10 17:49:14 -05003642 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003643 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003644}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003645
Nicolas Capens157ba262019-12-10 17:49:14 -05003646RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3647{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003648 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003649 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003650 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003651 RValue<Int4> sx = As<Int4>(x);
3652 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003653
Nicolas Capens157ba262019-12-10 17:49:14 -05003654 RValue<Int4> sy = As<Int4>(y);
3655 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003656
Nicolas Capens157ba262019-12-10 17:49:14 -05003657 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003658 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003659 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003660 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003661 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003662 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003663 auto pack = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003664 pack->addArg(x.value());
3665 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003666 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003667
Nicolas Capens157ba262019-12-10 17:49:14 -05003668 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003669 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003670}
Nicolas Capens33438a62017-09-27 11:47:35 -04003671
Nicolas Capens157ba262019-12-10 17:49:14 -05003672RValue<Int> SignMask(RValue<Int4> x)
3673{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003674 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003675 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003676 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003677 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3678 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003679 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003680 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003681 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003682 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003683 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003684 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003685 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003686 ::basicBlock->appendInst(movmsk);
3687
3688 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003689 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003690}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003691
Nicolas Capens519cf222020-05-08 15:27:19 -04003692Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003693{
3694 return T(Ice::IceType_v4i32);
3695}
3696
Ben Clayton713b8d32019-12-17 20:37:56 +00003697UInt4::UInt4(RValue<Float4> cast)
3698 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003699{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003700 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003701 // Smallest positive value representable in UInt, but not in Int
3702 const unsigned int ustart = 0x80000000u;
3703 const float ustartf = float(ustart);
3704
3705 // Check if the value can be represented as an Int
3706 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3707 // If the value is too large, subtract ustart and re-add it after conversion.
3708 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003709 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003710 (~uiValue & Int4(cast));
3711 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003712 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003713}
3714
Ben Clayton713b8d32019-12-17 20:37:56 +00003715UInt4::UInt4(RValue<UInt> rhs)
3716 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003717{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003718 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003719 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003720
Ben Clayton713b8d32019-12-17 20:37:56 +00003721 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003722 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3723
3724 storeValue(replicate);
3725}
3726
3727RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3728{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003729 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003730 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003731 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003732 UInt4 result;
3733 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3734 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3735 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3736 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003737
Nicolas Capens157ba262019-12-10 17:49:14 -05003738 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003739 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003740 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003741 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003742 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003743 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003744}
Ben Clayton88816fa2019-05-15 17:08:14 +01003745
Nicolas Capens157ba262019-12-10 17:49:14 -05003746RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3747{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003748 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003749 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003750 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003751 UInt4 result;
3752 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3753 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3754 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3755 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003756
Nicolas Capens157ba262019-12-10 17:49:14 -05003757 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003758 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003759 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003760 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003761 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003762 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003763}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003764
Nicolas Capens157ba262019-12-10 17:49:14 -05003765RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3766{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003767 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003768 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003769}
3770
3771RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3772{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003773 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003774 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003775}
3776
3777RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003780 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003781}
3782
3783RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3784{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003785 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003786 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003787}
3788
3789RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003792 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003793}
3794
3795RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003798 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003799}
3800
3801RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3802{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003803 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003804 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003805 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003806 ::basicBlock->appendInst(cmp);
3807
3808 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003809 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003810 ::basicBlock->appendInst(select);
3811
3812 return RValue<UInt4>(V(result));
3813}
3814
3815RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3816{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003817 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003818 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003819 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003820 ::basicBlock->appendInst(cmp);
3821
3822 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003823 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003824 ::basicBlock->appendInst(select);
3825
3826 return RValue<UInt4>(V(result));
3827}
3828
Nicolas Capens519cf222020-05-08 15:27:19 -04003829Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003830{
3831 return T(Ice::IceType_v4i32);
3832}
3833
Nicolas Capens519cf222020-05-08 15:27:19 -04003834Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003835{
3836 return T(Ice::IceType_i16);
3837}
3838
3839RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3840{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003841 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003842 return 1.0f / x;
3843}
3844
3845RValue<Float> RcpSqrt_pp(RValue<Float> x)
3846{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003847 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003848 return Rcp_pp(Sqrt(x));
3849}
3850
3851RValue<Float> Sqrt(RValue<Float> x)
3852{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003853 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003854 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003855 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05003856 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003857 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003858 ::basicBlock->appendInst(sqrt);
3859
3860 return RValue<Float>(V(result));
3861}
3862
3863RValue<Float> Round(RValue<Float> x)
3864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003866 return Float4(Round(Float4(x))).x;
3867}
3868
3869RValue<Float> Trunc(RValue<Float> x)
3870{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003871 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003872 return Float4(Trunc(Float4(x))).x;
3873}
3874
3875RValue<Float> Frac(RValue<Float> x)
3876{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003877 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003878 return Float4(Frac(Float4(x))).x;
3879}
3880
3881RValue<Float> Floor(RValue<Float> x)
3882{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003883 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003884 return Float4(Floor(Float4(x))).x;
3885}
3886
3887RValue<Float> Ceil(RValue<Float> x)
3888{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003889 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003890 return Float4(Ceil(Float4(x))).x;
3891}
3892
Nicolas Capens519cf222020-05-08 15:27:19 -04003893Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003894{
3895 return T(Ice::IceType_f32);
3896}
3897
Nicolas Capens519cf222020-05-08 15:27:19 -04003898Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003899{
3900 return T(Type_v2f32);
3901}
3902
Ben Clayton713b8d32019-12-17 20:37:56 +00003903Float4::Float4(RValue<Float> rhs)
3904 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003905{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003906 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003907 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003908
Ben Clayton713b8d32019-12-17 20:37:56 +00003909 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003910 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3911
3912 storeValue(replicate);
3913}
3914
3915RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3916{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003917 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003918 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003919 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003920 ::basicBlock->appendInst(cmp);
3921
3922 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003923 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003924 ::basicBlock->appendInst(select);
3925
3926 return RValue<Float4>(V(result));
3927}
3928
3929RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003932 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003933 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003934 ::basicBlock->appendInst(cmp);
3935
3936 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003937 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003938 ::basicBlock->appendInst(select);
3939
3940 return RValue<Float4>(V(result));
3941}
3942
3943RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3944{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003945 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003946 return Float4(1.0f) / x;
3947}
3948
3949RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3950{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003951 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003952 return Rcp_pp(Sqrt(x));
3953}
3954
Antonio Maioranod1561872020-12-14 14:03:53 -05003955bool HasRcpApprox()
3956{
3957 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3958 return false;
3959}
3960
3961RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
3962{
3963 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3964 UNREACHABLE("RValue<Float4> RcpApprox()");
3965 return { 0.0f };
3966}
3967
3968RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
3969{
3970 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3971 UNREACHABLE("RValue<Float> RcpApprox()");
3972 return { 0.0f };
3973}
3974
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05003975bool HasRcpSqrtApprox()
3976{
3977 return false;
3978}
3979
3980RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
3981{
3982 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3983 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
3984 return { 0.0f };
3985}
3986
3987RValue<Float> RcpSqrtApprox(RValue<Float> x)
3988{
3989 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3990 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
3991 return { 0.0f };
3992}
3993
Nicolas Capens157ba262019-12-10 17:49:14 -05003994RValue<Float4> Sqrt(RValue<Float4> x)
3995{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003996 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003997 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003998 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003999 Float4 result;
4000 result.x = Sqrt(Float(Float4(x).x));
4001 result.y = Sqrt(Float(Float4(x).y));
4002 result.z = Sqrt(Float(Float4(x).z));
4003 result.w = Sqrt(Float(Float4(x).w));
4004
4005 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04004006 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004007 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04004008 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004009 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004010 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004011 auto sqrt = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004012 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04004013 ::basicBlock->appendInst(sqrt);
4014
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04004015 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04004016 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04004017}
Nicolas Capens157ba262019-12-10 17:49:14 -05004018
4019RValue<Int> SignMask(RValue<Float4> x)
4020{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004021 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004022 if(emulateIntrinsics || CPUID::ARM)
4023 {
4024 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
4025 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
4026 }
4027 else
4028 {
4029 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004030 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004031 auto movmsk = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004032 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004033 ::basicBlock->appendInst(movmsk);
4034
4035 return RValue<Int>(V(result));
4036 }
4037}
4038
4039RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
4040{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004041 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004042 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004043}
4044
4045RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
4046{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004047 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004048 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004049}
4050
4051RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4052{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004053 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004054 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004055}
4056
4057RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4058{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004059 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004060 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004061}
4062
4063RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4064{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004065 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004066 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004067}
4068
4069RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4070{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004071 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004072 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004073}
4074
4075RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4076{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004077 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004078 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004079}
4080
4081RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4082{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004083 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004084 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004085}
4086
4087RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4088{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004089 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004090 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004091}
4092
4093RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4094{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004095 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004096 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004097}
4098
4099RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4100{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004101 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004102 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004103}
4104
4105RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4106{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004107 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004108 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004109}
4110
4111RValue<Float4> Round(RValue<Float4> x)
4112{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004113 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004114 if(emulateIntrinsics || CPUID::ARM)
4115 {
4116 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4117 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4118 }
4119 else if(CPUID::SSE4_1)
4120 {
4121 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004122 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004123 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004124 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004125 round->addArg(::context->getConstantInt32(0));
4126 ::basicBlock->appendInst(round);
4127
4128 return RValue<Float4>(V(result));
4129 }
4130 else
4131 {
4132 return Float4(RoundInt(x));
4133 }
4134}
4135
4136RValue<Float4> Trunc(RValue<Float4> x)
4137{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004138 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004139 if(CPUID::SSE4_1)
4140 {
4141 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004142 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004143 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004144 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004145 round->addArg(::context->getConstantInt32(3));
4146 ::basicBlock->appendInst(round);
4147
4148 return RValue<Float4>(V(result));
4149 }
4150 else
4151 {
4152 return Float4(Int4(x));
4153 }
4154}
4155
4156RValue<Float4> Frac(RValue<Float4> x)
4157{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004158 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004159 Float4 frc;
4160
4161 if(CPUID::SSE4_1)
4162 {
4163 frc = x - Floor(x);
4164 }
4165 else
4166 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004167 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004168
Ben Clayton713b8d32019-12-17 20:37:56 +00004169 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004170 }
4171
4172 // x - floor(x) can be 1.0 for very small negative x.
4173 // Clamp against the value just below 1.0.
4174 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4175}
4176
4177RValue<Float4> Floor(RValue<Float4> x)
4178{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004179 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004180 if(CPUID::SSE4_1)
4181 {
4182 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004183 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004184 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004185 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004186 round->addArg(::context->getConstantInt32(1));
4187 ::basicBlock->appendInst(round);
4188
4189 return RValue<Float4>(V(result));
4190 }
4191 else
4192 {
4193 return x - Frac(x);
4194 }
4195}
4196
4197RValue<Float4> Ceil(RValue<Float4> x)
4198{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004199 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004200 if(CPUID::SSE4_1)
4201 {
4202 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004203 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004204 auto round = Ice::InstIntrinsic::create(::function, 2, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004205 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004206 round->addArg(::context->getConstantInt32(2));
4207 ::basicBlock->appendInst(round);
4208
4209 return RValue<Float4>(V(result));
4210 }
4211 else
4212 {
4213 return -Floor(-x);
4214 }
4215}
4216
Nicolas Capens519cf222020-05-08 15:27:19 -04004217Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004218{
4219 return T(Ice::IceType_v4f32);
4220}
4221
4222RValue<Long> Ticks()
4223{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004224 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004225 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004226 return Long(Int(0));
4227}
4228
Ben Clayton713b8d32019-12-17 20:37:56 +00004229RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004230{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004231 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004232 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004233}
4234
Ben Clayton713b8d32019-12-17 20:37:56 +00004235RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004236{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004237 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004238 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004239}
4240
Ben Clayton713b8d32019-12-17 20:37:56 +00004241Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004242{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004243 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004244 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004245}
4246
4247void Breakpoint()
4248{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004249 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004250 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004251 auto trap = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Nicolas Capens157ba262019-12-10 17:49:14 -05004252 ::basicBlock->appendInst(trap);
4253}
4254
Ben Clayton713b8d32019-12-17 20:37:56 +00004255void Nucleus::createFence(std::memory_order memoryOrder)
4256{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004257 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004258 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004259 auto inst = Ice::InstIntrinsic::create(::function, 0, nullptr, intrinsic);
Antonio Maiorano370cba52019-12-31 11:36:07 -05004260 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4261 inst->addArg(order);
4262 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004263}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004264
Ben Clayton713b8d32019-12-17 20:37:56 +00004265Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4266{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004267 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004268 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004269 return nullptr;
4270}
4271void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4272{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004273 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004274 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004275}
Nicolas Capens157ba262019-12-10 17:49:14 -05004276
4277RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4278{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004279 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004280 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4281}
4282
4283RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004285 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004286 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4287}
4288
4289void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004292 return emulated::Scatter(base, val, offsets, mask, alignment);
4293}
4294
4295void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4296{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004297 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004298 return emulated::Scatter(base, val, offsets, mask, alignment);
4299}
4300
4301RValue<Float> Exp2(RValue<Float> x)
4302{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004303 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004304 return emulated::Exp2(x);
4305}
4306
4307RValue<Float> Log2(RValue<Float> x)
4308{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004309 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004310 return emulated::Log2(x);
4311}
4312
4313RValue<Float4> Sin(RValue<Float4> x)
4314{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004315 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004316 return optimal::Sin(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004317}
4318
4319RValue<Float4> Cos(RValue<Float4> x)
4320{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004321 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004322 return optimal::Cos(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004323}
4324
4325RValue<Float4> Tan(RValue<Float4> x)
4326{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004327 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004328 return optimal::Tan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004329}
4330
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004331RValue<Float4> Asin(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004332{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004333 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004334 if(p == Precision::Full)
4335 {
4336 return emulated::Asin(x);
4337 }
4338 return optimal::Asin_8_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004339}
4340
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004341RValue<Float4> Acos(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004342{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004343 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004344 // Surprisingly, deqp-vk's precision.acos.highp/mediump tests pass when using the 4-term polynomial approximation
4345 // version of acos, unlike for Asin, which requires higher precision algorithms.
4346 return optimal::Acos_4_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004347}
4348
4349RValue<Float4> Atan(RValue<Float4> x)
4350{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004351 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004352 return optimal::Atan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004353}
4354
4355RValue<Float4> Sinh(RValue<Float4> x)
4356{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004357 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004358 return optimal::Sinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004359}
4360
4361RValue<Float4> Cosh(RValue<Float4> x)
4362{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004363 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004364 return optimal::Cosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004365}
4366
4367RValue<Float4> Tanh(RValue<Float4> x)
4368{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004369 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004370 return optimal::Tanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004371}
4372
4373RValue<Float4> Asinh(RValue<Float4> x)
4374{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004375 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004376 return optimal::Asinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004377}
4378
4379RValue<Float4> Acosh(RValue<Float4> x)
4380{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004381 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004382 return optimal::Acosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004383}
4384
4385RValue<Float4> Atanh(RValue<Float4> x)
4386{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004387 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004388 return optimal::Atanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004389}
4390
4391RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4392{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004393 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004394 return optimal::Atan2(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004395}
4396
4397RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4398{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004399 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004400 return optimal::Pow(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004401}
4402
4403RValue<Float4> Exp(RValue<Float4> x)
4404{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004405 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004406 return optimal::Exp(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004407}
4408
4409RValue<Float4> Log(RValue<Float4> x)
4410{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004411 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004412 return optimal::Log(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004413}
4414
4415RValue<Float4> Exp2(RValue<Float4> x)
4416{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004417 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004418 return optimal::Exp2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004419}
4420
4421RValue<Float4> Log2(RValue<Float4> x)
4422{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004423 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004424 return optimal::Log2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004425}
4426
4427RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4428{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004429 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004430 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004431 {
Ben Claytonce54c592020-02-07 11:30:51 +00004432 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004433 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004434 }
4435 else
4436 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004437 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004438 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004439 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004440 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004441 ::basicBlock->appendInst(ctlz);
4442
4443 return RValue<UInt>(V(result));
4444 }
4445}
4446
4447RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4448{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004449 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004450 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004451 {
Ben Claytonce54c592020-02-07 11:30:51 +00004452 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004453 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004454 }
4455 else
4456 {
4457 // TODO: implement vectorized version in Subzero
4458 UInt4 result;
4459 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4460 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4461 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4462 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4463 return result;
4464 }
4465}
4466
4467RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4468{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004469 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004470 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004471 {
Ben Claytonce54c592020-02-07 11:30:51 +00004472 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004473 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004474 }
4475 else
4476 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004477 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004478 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens33a77f72021-02-08 15:04:38 -05004479 auto ctlz = Ice::InstIntrinsic::create(::function, 1, result, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004480 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004481 ::basicBlock->appendInst(ctlz);
4482
4483 return RValue<UInt>(V(result));
4484 }
4485}
4486
4487RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4488{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004489 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004490 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004491 {
Ben Claytonce54c592020-02-07 11:30:51 +00004492 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004493 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004494 }
4495 else
4496 {
4497 // TODO: implement vectorized version in Subzero
4498 UInt4 result;
4499 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4500 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4501 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4502 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4503 return result;
4504 }
4505}
4506
Antonio Maiorano370cba52019-12-31 11:36:07 -05004507RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4508{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004509 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004510 return emulated::MinAtomic(x, y, memoryOrder);
4511}
4512
4513RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4514{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004515 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004516 return emulated::MinAtomic(x, y, memoryOrder);
4517}
4518
4519RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4520{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004521 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004522 return emulated::MaxAtomic(x, y, memoryOrder);
4523}
4524
4525RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4526{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004527 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004528 return emulated::MaxAtomic(x, y, memoryOrder);
4529}
4530
Antonio Maioranoaae33732020-02-14 14:52:34 -05004531void EmitDebugLocation()
4532{
4533#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004534 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004535#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004536}
Ben Clayton713b8d32019-12-17 20:37:56 +00004537void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004538void FlushDebug() {}
4539
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004540namespace {
4541namespace coro {
4542
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004543// Instance data per generated coroutine
4544// This is the "handle" type used for Coroutine functions
4545// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4546struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004547{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004548 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004549 bool done = false; // the coroutine should stop at the next yield()
4550 bool terminated = false; // the coroutine has finished.
4551 bool inRoutine = false; // is the coroutine currently executing?
4552 marl::Scheduler::Fiber *mainFiber = nullptr;
4553 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004554 void *promisePtr = nullptr;
4555};
4556
4557CoroutineData *createCoroutineData()
4558{
4559 return new CoroutineData{};
4560}
4561
4562void destroyCoroutineData(CoroutineData *coroData)
4563{
4564 delete coroData;
4565}
4566
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004567// suspend() pauses execution of the coroutine, and resumes execution from the
4568// caller's call to await().
4569// Returns true if await() is called again, or false if coroutine_destroy()
4570// is called.
4571bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004572{
Ben Claytonc3466532020-03-24 11:54:05 +00004573 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4574 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4575 ASSERT(coroData->inRoutine);
4576 coroData->inRoutine = false;
4577 coroData->mainFiber->notify();
4578 while(!coroData->inRoutine)
4579 {
4580 coroData->routineFiber->wait();
4581 }
4582 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004583}
4584
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004585// resume() is called by await(), blocking until the coroutine calls yield()
4586// or the coroutine terminates.
4587void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004588{
Ben Claytonc3466532020-03-24 11:54:05 +00004589 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4590 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4591 ASSERT(!coroData->inRoutine);
4592 coroData->inRoutine = true;
4593 coroData->routineFiber->notify();
4594 while(coroData->inRoutine)
4595 {
4596 coroData->mainFiber->wait();
4597 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004598}
4599
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004600// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4601// until the coroutine ends, and deletes the coroutine data.
4602void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004603{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004604 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004605 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4606 ASSERT(!coroData->inRoutine);
4607 if(!coroData->terminated)
4608 {
4609 coroData->done = true;
4610 coroData->inRoutine = true;
4611 coroData->routineFiber->notify();
4612 while(!coroData->terminated)
4613 {
4614 coroData->mainFiber->wait();
4615 }
4616 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004617 if(coroData->useInternalScheduler)
4618 {
4619 ::getOrCreateScheduler().unbind();
4620 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004621 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004622}
4623
4624namespace detail {
4625thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4626} // namespace detail
4627
4628void setHandleParam(Nucleus::CoroutineHandle handle)
4629{
4630 ASSERT(!detail::coroHandle);
4631 detail::coroHandle = handle;
4632}
4633
4634Nucleus::CoroutineHandle getHandleParam()
4635{
4636 ASSERT(detail::coroHandle);
4637 auto handle = detail::coroHandle;
4638 detail::coroHandle = {};
4639 return handle;
4640}
4641
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004642bool isDone(Nucleus::CoroutineHandle handle)
4643{
4644 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004645 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004646}
4647
4648void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4649{
4650 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4651 coroData->promisePtr = promisePtr;
4652}
4653
4654void *getPromisePtr(Nucleus::CoroutineHandle handle)
4655{
4656 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4657 return coroData->promisePtr;
4658}
4659
4660} // namespace coro
4661} // namespace
4662
4663// Used to generate coroutines.
4664// Lifetime: from yield to acquireCoroutine
4665class CoroutineGenerator
4666{
4667public:
4668 CoroutineGenerator()
4669 {
4670 }
4671
4672 // Inserts instructions at the top of the current function to make it a coroutine.
4673 void generateCoroutineBegin()
4674 {
4675 // Begin building the main coroutine_begin() function.
4676 // We insert these instructions at the top of the entry node,
4677 // before existing reactor-generated instructions.
4678
4679 // CoroutineHandle coroutine_begin(<Arguments>)
4680 // {
4681 // this->handle = coro::getHandleParam();
4682 //
4683 // YieldType promise;
4684 // coro::setPromisePtr(handle, &promise); // For await
4685 //
4686 // ... <REACTOR CODE> ...
4687 //
4688
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004689 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004690 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004691
4692 // YieldType promise;
4693 // coro::setPromisePtr(handle, &promise); // For await
4694 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004695 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004696 }
4697
4698 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4699 void generateYield(Value *val)
4700 {
4701 // ... <REACTOR CODE> ...
4702 //
4703 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004704 // if (!coro::suspend(handle)) {
4705 // return false; // coroutine has been stopped by the caller.
4706 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004707 //
4708 // ... <REACTOR CODE> ...
4709
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004710 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004711 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004712
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004713 // if (!coro::suspend(handle)) {
4714 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4715 auto doneBlock = Nucleus::createBasicBlock();
4716 auto resumeBlock = Nucleus::createBasicBlock();
4717 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4718
4719 // return false; // coroutine has been stopped by the caller.
4720 ::basicBlock = doneBlock;
4721 Nucleus::createRetVoid(); // coroutine return value is ignored.
4722
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004723 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004724 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004725 }
4726
4727 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4728
4729 // Generates the await function for the current coroutine.
4730 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4731 static FunctionUniquePtr generateAwaitFunction()
4732 {
4733 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4734 // {
4735 // if (coro::isDone())
4736 // {
4737 // return false;
4738 // }
4739 // else // resume
4740 // {
4741 // YieldType* promise = coro::getPromisePtr(handle);
4742 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004743 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004744 // return true;
4745 // }
4746 // }
4747
4748 // Subzero doesn't support bool types (IceType_i1) as return type
4749 const Ice::Type ReturnType = Ice::IceType_i32;
4750 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4751 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4752
4753 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4754 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4755
4756 Ice::Variable *handle = awaitFunc->getArgs()[0];
4757 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4758
4759 auto doneBlock = awaitFunc->makeNode();
4760 {
4761 // return false;
4762 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4763 doneBlock->appendInst(ret);
4764 }
4765
4766 auto resumeBlock = awaitFunc->makeNode();
4767 {
4768 // YieldType* promise = coro::getPromisePtr(handle);
4769 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4770
4771 // *out = *promise;
4772 // Load promise value
4773 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4774 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4775 resumeBlock->appendInst(load);
4776 // Then store it in output param
4777 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4778 resumeBlock->appendInst(store);
4779
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004780 // coro::resume(handle);
4781 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004782
4783 // return true;
4784 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4785 resumeBlock->appendInst(ret);
4786 }
4787
4788 // if (coro::isDone())
4789 // {
4790 // <doneBlock>
4791 // }
4792 // else // resume
4793 // {
4794 // <resumeBlock>
4795 // }
4796 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004797 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004798 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4799 bb->appendInst(br);
4800
4801 return FunctionUniquePtr{ awaitFunc };
4802 }
4803
4804 // Generates the destroy function for the current coroutine.
4805 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4806 static FunctionUniquePtr generateDestroyFunction()
4807 {
4808 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4809 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004810 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004811 // return;
4812 // }
4813
4814 const Ice::Type ReturnType = Ice::IceType_void;
4815 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4816
4817 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4818 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4819
4820 Ice::Variable *handle = destroyFunc->getArgs()[0];
4821
4822 auto *bb = destroyFunc->getEntryNode();
4823
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004824 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4825 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004826
4827 // return;
4828 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4829 bb->appendInst(ret);
4830
4831 return FunctionUniquePtr{ destroyFunc };
4832 }
4833
4834private:
4835 Ice::Variable *handle{};
4836 Ice::Variable *promise{};
4837};
4838
4839static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4840{
4841 // This doubles up as our coroutine handle
4842 auto coroData = coro::createCoroutineData();
4843
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004844 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4845 if(coroData->useInternalScheduler)
4846 {
4847 ::getOrCreateScheduler().bind();
4848 }
4849
Ben Clayton76e9e532020-03-16 20:35:04 +00004850 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004851 // Store handle in TLS so that the coroutine can grab it right away, before
4852 // any fiber switch occurs.
4853 coro::setHandleParam(coroData);
4854
Ben Claytonc3466532020-03-24 11:54:05 +00004855 ASSERT(!coroData->routineFiber);
4856 coroData->routineFiber = marl::Scheduler::Fiber::current();
4857
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004858 beginFunc();
4859
Ben Claytonc3466532020-03-24 11:54:05 +00004860 ASSERT(coroData->inRoutine);
4861 coroData->done = true; // coroutine is done.
4862 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4863 coroData->inRoutine = false;
4864 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004865 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004866
Ben Claytonc3466532020-03-24 11:54:05 +00004867 ASSERT(!coroData->mainFiber);
4868 coroData->mainFiber = marl::Scheduler::Fiber::current();
4869
4870 // block until the first yield or coroutine end
4871 ASSERT(!coroData->inRoutine);
4872 coroData->inRoutine = true;
4873 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4874 while(coroData->inRoutine)
4875 {
4876 coroData->mainFiber->wait();
4877 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004878
4879 return coroData;
4880}
4881
4882void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4883{
4884 // Start by creating a regular function
4885 createFunction(yieldType, params);
4886
4887 // Save in case yield() is called
4888 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4889 ::coroYieldType = yieldType;
4890}
4891
4892void Nucleus::yield(Value *val)
4893{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004894 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004895 Variable::materializeAll();
4896
4897 // On first yield, we start generating coroutine functions
4898 if(!::coroGen)
4899 {
4900 ::coroGen = std::make_shared<CoroutineGenerator>();
4901 ::coroGen->generateCoroutineBegin();
4902 }
4903
4904 ASSERT(::coroGen);
4905 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004906}
4907
Ben Clayton713b8d32019-12-17 20:37:56 +00004908static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4909{
4910 return false;
4911}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004912
4913static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4914{
4915}
Nicolas Capens157ba262019-12-10 17:49:14 -05004916
4917std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4918{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004919 if(::coroGen)
4920 {
4921 // Finish generating coroutine functions
4922 {
4923 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004924 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004925 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004926
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004927 auto awaitFunc = ::coroGen->generateAwaitFunction();
4928 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004929
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004930 // At this point, we no longer need the CoroutineGenerator.
4931 ::coroGen.reset();
4932 ::coroYieldType = nullptr;
4933
4934 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4935 { name, "await", "destroy" },
4936 cfgEdit);
4937
4938 return routine;
4939 }
4940 else
4941 {
4942 {
4943 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004944 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004945 }
4946
4947 ::coroYieldType = nullptr;
4948
4949 // Not an actual coroutine (no yields), so return stubs for await and destroy
4950 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4951
4952 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4953 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4954 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4955 return routine;
4956 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004957}
4958
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004959Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004960{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004961 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4962
4963 if(isCoroutine)
4964 {
4965 return rr::invokeCoroutineBegin(func);
4966 }
4967 else
4968 {
4969 // For regular routines, just invoke the begin func directly
4970 return func();
4971 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004972}
Nicolas Capens157ba262019-12-10 17:49:14 -05004973
4974} // namespace rr