blob: 9b0e0f140d89b98840e61f592ebee9954baa7a2e [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano9c14bda2020-09-18 16:33:36 -040016#include "EmulatedIntrinsics.hpp"
17#include "OptimalIntrinsics.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050018#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000019#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050020#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040021
Nicolas Capens1a3ce872018-10-10 10:42:36 -040022#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000023#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040024
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040026#include "src/IceCfgNode.h"
27#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceELFStreamer.h"
29#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040030#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000031#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040032
Ben Clayton713b8d32019-12-17 20:37:56 +000033#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040034#include "llvm/Support/FileSystem.h"
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -050035#include "llvm/Support/ManagedStatic.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040036#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040037
Antonio Maiorano8bce0672020-02-28 13:13:45 -050038#include "marl/event.h"
39
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000041# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040042#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040043
Nicolas Capensbd65da92017-01-05 16:31:06 -050044#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000045# ifndef WIN32_LEAN_AND_MEAN
46# define WIN32_LEAN_AND_MEAN
47# endif // !WIN32_LEAN_AND_MEAN
48# ifndef NOMINMAX
49# define NOMINMAX
50# endif // !NOMINMAX
51# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050052#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040053
Ben Clayton683bad82020-02-10 23:57:09 +000054#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040055#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000056#include <limits>
57#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040058
Antonio Maiorano02a39532020-01-21 15:15:34 -050059// Subzero utility functions
60// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050062namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050063
64Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
65{
66 uint32_t sequenceNumber = 0;
67 auto function = Ice::Cfg::create(context, sequenceNumber).release();
68
69 Ice::CfgLocalAllocatorScope allocScope{ function };
70
71 for(auto type : paramTypes)
72 {
73 Ice::Variable *arg = function->makeVariable(type);
74 function->addArg(arg);
75 }
76
77 Ice::CfgNode *node = function->makeNode();
78 function->setEntryNode(node);
79
80 return function;
81}
82
83Ice::Type getPointerType(Ice::Type elementType)
84{
85 if(sizeof(void *) == 8)
86 {
87 return Ice::IceType_i64;
88 }
89 else
90 {
91 return Ice::IceType_i32;
92 }
93}
94
95Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
96{
97 int typeSize = Ice::typeWidthInBytes(type);
98 int totalSize = typeSize * (arraySize ? arraySize : 1);
99
100 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
101 auto address = function->makeVariable(getPointerType(type));
102 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
103 function->getEntryNode()->getInsts().push_front(alloca);
104
105 return address;
106}
107
108Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500109{
110 if(sizeof(void *) == 8)
111 {
112 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
113 }
114 else
115 {
116 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
117 }
118}
119
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400120// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
121Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500122
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400123// Wrapper for calls on C functions with Ice types
124Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
125{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500126 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400127
128 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
129 // then truncate result to bool.
130 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
131 const bool returningBool = (retTy == Ice::IceType_i1);
132 if(returningBool)
133 {
134 ret = function->makeVariable(Ice::IceType_i32);
135 }
136 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500137 {
138 ret = function->makeVariable(retTy);
139 }
140
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400141 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500142 for(auto arg : iceArgs)
143 {
144 call->addArg(arg);
145 }
146
147 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400148
149 if(returningBool)
150 {
151 // Truncate result to bool so that if any (lsb) bits were set, result will be true
152 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
153 }
154
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500155 return ret;
156}
157
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400158Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
159{
160 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
161 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
162}
163
Antonio Maiorano62427e02020-02-13 09:18:05 -0500164// Wrapper for calls on C functions with Ice types
165template<typename Return, typename... CArgs, typename... RArgs>
166Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
167{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400168 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
169
Nicolas Capens519cf222020-05-08 15:27:19 -0400170 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500171 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500172 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500173}
174
Antonio Maiorano02a39532020-01-21 15:15:34 -0500175// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500176Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500177{
178 Ice::Variable *result = function->makeVariable(v->getType());
179 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
180 basicBlock->appendInst(cast);
181 return result;
182}
183
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400184Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
185{
186 Ice::Variable *to = function->makeVariable(toType);
187 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
188 basicBlock->appendInst(cast);
189 return to;
190}
191
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500192Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500193{
194 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
195 // absolute address. We circumvent this by casting to a non-const variable, and loading
196 // from that.
197 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
198 {
199 ptr = sz::createUnconstCast(function, basicBlock, cptr);
200 }
201
202 Ice::Variable *result = function->makeVariable(type);
203 auto load = Ice::InstLoad::create(function, result, ptr, align);
204 basicBlock->appendInst(load);
205
206 return result;
207}
208
209} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500210} // namespace
211
Ben Clayton713b8d32019-12-17 20:37:56 +0000212namespace rr {
213class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500214class CoroutineGenerator;
215} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500216
217namespace {
218
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -0500219// Used to automatically invoke llvm_shutdown() when driver is unloaded
220llvm::llvm_shutdown_obj llvmShutdownObj;
221
Nicolas Capens157ba262019-12-10 17:49:14 -0500222// Default configuration settings. Must be accessed under mutex lock.
223std::mutex defaultConfigLock;
224rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000225{
Nicolas Capens157ba262019-12-10 17:49:14 -0500226 // This uses a static in a function to avoid the cost of a global static
227 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
228 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000229 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500230 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000231}
232
Nicolas Capens157ba262019-12-10 17:49:14 -0500233Ice::GlobalContext *context = nullptr;
234Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400235Ice::CfgNode *entryBlock = nullptr;
236Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500237Ice::CfgNode *basicBlock = nullptr;
238Ice::CfgLocalAllocatorScope *allocator = nullptr;
239rr::ELFMemoryStreamer *routine = nullptr;
240
241std::mutex codegenMutex;
242
243Ice::ELFFileStreamer *elfFile = nullptr;
244Ice::Fdstream *out = nullptr;
245
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500246// Coroutine globals
247rr::Type *coroYieldType = nullptr;
248std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500249marl::Scheduler &getOrCreateScheduler()
250{
251 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100252 marl::Scheduler::Config cfg;
253 cfg.setWorkerThreadCount(8);
254 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500255 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500256
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500257 return *scheduler;
258}
Nicolas Capens157ba262019-12-10 17:49:14 -0500259} // Anonymous namespace
260
261namespace {
262
263#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000264# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500265#endif
266
Ben Clayton713b8d32019-12-17 20:37:56 +0000267#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
268# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500269#endif
270
Antonio Maiorano370cba52019-12-31 11:36:07 -0500271Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400272{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500273 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100274 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500275 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000276 case rr::Optimization::Level::None: return Ice::Opt_m1;
277 case rr::Optimization::Level::Less: return Ice::Opt_m1;
278 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500279 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
280 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100281 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500282 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400283}
284
Antonio Maiorano370cba52019-12-31 11:36:07 -0500285Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
286{
287 switch(memoryOrder)
288 {
289 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
290 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
291 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
292 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
293 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
294 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
295 }
296 return Ice::Intrinsics::MemoryOrderInvalid;
297}
298
Nicolas Capens157ba262019-12-10 17:49:14 -0500299class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500300{
Nicolas Capens157ba262019-12-10 17:49:14 -0500301public:
302 const static bool ARM;
303 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400304
Nicolas Capens157ba262019-12-10 17:49:14 -0500305private:
306 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100307 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000308#if defined(__i386__) || defined(__x86_64__)
309# if defined(_WIN32)
310 __cpuid(registers, info);
311# else
312 __asm volatile("cpuid"
313 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
314 : "a"(info));
315# endif
316#else
317 registers[0] = 0;
318 registers[1] = 0;
319 registers[2] = 0;
320 registers[3] = 0;
321#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100322 }
323
Nicolas Capens157ba262019-12-10 17:49:14 -0500324 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500325 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000326#if defined(__arm__) || defined(__aarch64__)
327 return true;
328#elif defined(__i386__) || defined(__x86_64__)
329 return false;
330#elif defined(__mips__)
331 return false;
332#else
333# error "Unknown architecture"
334#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500335 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500336
Nicolas Capens157ba262019-12-10 17:49:14 -0500337 static bool detectSSE4_1()
338 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000339#if defined(__i386__) || defined(__x86_64__)
340 int registers[4];
341 cpuid(registers, 1);
342 return (registers[2] & 0x00080000) != 0;
343#else
344 return false;
345#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500346 }
347};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500348
Nicolas Capens157ba262019-12-10 17:49:14 -0500349const bool CPUID::ARM = CPUID::detectARM();
350const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
351const bool emulateIntrinsics = false;
352const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400353
Nicolas Capens157ba262019-12-10 17:49:14 -0500354constexpr bool subzeroDumpEnabled = false;
355constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500356
357#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500358static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
359static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500360#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500361
362} // anonymous namespace
363
364namespace rr {
365
Antonio Maioranoab210f92019-12-13 16:26:24 -0500366std::string BackendName()
367{
368 return "Subzero";
369}
370
Ben Clayton713b8d32019-12-17 20:37:56 +0000371const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500372 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500373};
374
375enum EmulatedType
376{
377 EmulatedShift = 16,
378 EmulatedV2 = 2 << EmulatedShift,
379 EmulatedV4 = 4 << EmulatedShift,
380 EmulatedV8 = 8 << EmulatedShift,
381 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
382
383 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
384 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
385 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000386 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
387 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500388 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
389};
390
Ben Clayton713b8d32019-12-17 20:37:56 +0000391class Value : public Ice::Operand
392{};
393class SwitchCases : public Ice::InstSwitch
394{};
395class BasicBlock : public Ice::CfgNode
396{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500397
398Ice::Type T(Type *t)
399{
400 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
401 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500402}
403
Nicolas Capens157ba262019-12-10 17:49:14 -0500404Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400405{
Ben Clayton713b8d32019-12-17 20:37:56 +0000406 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500407}
408
409Type *T(EmulatedType t)
410{
Ben Clayton713b8d32019-12-17 20:37:56 +0000411 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500412}
413
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500414std::vector<Ice::Type> T(const std::vector<Type *> &types)
415{
416 std::vector<Ice::Type> result;
417 result.reserve(types.size());
418 for(auto &t : types)
419 {
420 result.push_back(T(t));
421 }
422 return result;
423}
424
Nicolas Capens157ba262019-12-10 17:49:14 -0500425Value *V(Ice::Operand *v)
426{
Ben Clayton713b8d32019-12-17 20:37:56 +0000427 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500428}
429
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500430Ice::Operand *V(Value *v)
431{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500432 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500433}
434
Antonio Maiorano62427e02020-02-13 09:18:05 -0500435std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
436{
437 std::vector<Ice::Operand *> result;
438 result.reserve(values.size());
439 for(auto &v : values)
440 {
441 result.push_back(V(v));
442 }
443 return result;
444}
445
Nicolas Capens157ba262019-12-10 17:49:14 -0500446BasicBlock *B(Ice::CfgNode *b)
447{
Ben Clayton713b8d32019-12-17 20:37:56 +0000448 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500449}
450
451static size_t typeSize(Type *type)
452{
453 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400454 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500455 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800456 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000457 case Type_v2i32: return 8;
458 case Type_v4i16: return 8;
459 case Type_v2i16: return 4;
460 case Type_v8i8: return 8;
461 case Type_v4i8: return 4;
462 case Type_v2f32: return 8;
463 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500464 }
465 }
466
467 return Ice::typeWidthInBytes(T(type));
468}
469
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400470static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500471{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400472 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500473 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
474 {
475 Nucleus::createRetVoid();
476 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400477
478 // Connect the entry block to the top of the initial basic block
479 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
480 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500481}
482
Ben Clayton713b8d32019-12-17 20:37:56 +0000483using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
484using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500485
486inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
487{
Ben Clayton713b8d32019-12-17 20:37:56 +0000488 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500489}
490
491inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
492{
493 return &sectionHeader(elfHeader)[index];
494}
495
496static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
497{
498 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
499
500 uint32_t index = relocation.getSymbol();
501 int table = relocationTable.sh_link;
502 void *symbolValue = nullptr;
503
504 if(index != SHN_UNDEF)
505 {
506 if(table == SHN_UNDEF) return nullptr;
507 const SectionHeader *symbolTable = elfSection(elfHeader, table);
508
509 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
510 if(index >= symtab_entries)
511 {
512 ASSERT(index < symtab_entries && "Symbol Index out of range");
513 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800514 }
515
Nicolas Capens157ba262019-12-10 17:49:14 -0500516 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000517 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500518 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800519
Nicolas Capens157ba262019-12-10 17:49:14 -0500520 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400521 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500522 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000523 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400524 }
525 else
526 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500527 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400528 }
Nicolas Capens66478362016-10-13 15:36:36 -0400529 }
530
Nicolas Capens157ba262019-12-10 17:49:14 -0500531 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000532 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500533
534 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400535 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400536 switch(relocation.getType())
537 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000538 case R_ARM_NONE:
539 // No relocation
540 break;
541 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500542 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000543 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500544 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
545 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
546 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400547 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000548 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500549 {
550 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
551 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
552 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400553 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000554 default:
555 ASSERT(false && "Unsupported relocation type");
556 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400557 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500558 }
559 else
560 {
561 switch(relocation.getType())
562 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000563 case R_386_NONE:
564 // No relocation
565 break;
566 case R_386_32:
567 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
568 break;
569 case R_386_PC32:
570 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
571 break;
572 default:
573 ASSERT(false && "Unsupported relocation type");
574 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500575 }
Nicolas Capens66478362016-10-13 15:36:36 -0400576 }
577
Nicolas Capens157ba262019-12-10 17:49:14 -0500578 return symbolValue;
579}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400580
Nicolas Capens157ba262019-12-10 17:49:14 -0500581static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
582{
583 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
584
585 uint32_t index = relocation.getSymbol();
586 int table = relocationTable.sh_link;
587 void *symbolValue = nullptr;
588
589 if(index != SHN_UNDEF)
590 {
591 if(table == SHN_UNDEF) return nullptr;
592 const SectionHeader *symbolTable = elfSection(elfHeader, table);
593
594 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
595 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400596 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400598 return nullptr;
599 }
600
Nicolas Capens157ba262019-12-10 17:49:14 -0500601 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000602 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400604
Nicolas Capens157ba262019-12-10 17:49:14 -0500605 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400606 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500607 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000608 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500609 }
610 else
611 {
612 return nullptr;
613 }
614 }
Nicolas Capens66478362016-10-13 15:36:36 -0400615
Nicolas Capens157ba262019-12-10 17:49:14 -0500616 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000617 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
618 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400619
Nicolas Capens157ba262019-12-10 17:49:14 -0500620 switch(relocation.getType())
621 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000622 case R_X86_64_NONE:
623 // No relocation
624 break;
625 case R_X86_64_64:
626 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
627 break;
628 case R_X86_64_PC32:
629 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
630 break;
631 case R_X86_64_32S:
632 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
633 break;
634 default:
635 ASSERT(false && "Unsupported relocation type");
636 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500637 }
638
639 return symbolValue;
640}
641
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400642struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500643{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400644 const void *entry;
645 size_t codeSize = 0;
646};
647
648std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
649{
650 ASSERT(functionNames.size() > 0);
651 std::vector<EntryPoint> entryPoints(functionNames.size());
652
Ben Clayton713b8d32019-12-17 20:37:56 +0000653 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500654
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400655 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500656 if(!elfHeader->checkMagic())
657 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400658 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500659 }
660
661 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000662 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
663#if defined(__i386__)
664 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
665#elif defined(__x86_64__)
666 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
667#elif defined(__arm__)
668 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
669#elif defined(__aarch64__)
670 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
671#elif defined(__mips__)
672 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
673#else
674# error "Unsupported platform"
675#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500676
Ben Clayton713b8d32019-12-17 20:37:56 +0000677 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500678
679 for(int i = 0; i < elfHeader->e_shnum; i++)
680 {
681 if(sectionHeader[i].sh_type == SHT_PROGBITS)
682 {
683 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
684 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400685 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500686 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400687 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500688
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400689 for(size_t j = 0; j < functionNames.size(); ++j)
690 {
691 if(strstr(sectionName, functionNames[j]) != nullptr)
692 {
693 return j;
694 }
695 }
696
697 UNREACHABLE("Failed to find executable section that matches input function names");
698 return static_cast<size_t>(-1);
699 };
700
701 size_t index = findSectionNameEntryIndex();
702 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
703 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400704 }
705 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500706 else if(sectionHeader[i].sh_type == SHT_REL)
707 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000708 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400709
Nicolas Capens157ba262019-12-10 17:49:14 -0500710 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
711 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000712 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500713 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
714 }
715 }
716 else if(sectionHeader[i].sh_type == SHT_RELA)
717 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000718 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500719
720 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
721 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000722 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500723 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
724 }
725 }
726 }
727
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400728 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500729}
730
731template<typename T>
732struct ExecutableAllocator
733{
734 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000735 template<class U>
736 ExecutableAllocator(const ExecutableAllocator<U> &other)
737 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500738
739 using value_type = T;
740 using size_type = std::size_t;
741
742 T *allocate(size_type n)
743 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000744 return (T *)allocateMemoryPages(
745 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500746 }
747
748 void deallocate(T *p, size_type n)
749 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800750 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500751 }
752};
753
754class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
755{
756 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
757 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
758
759public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000760 ELFMemoryStreamer()
761 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500762 {
763 position = 0;
764 buffer.reserve(0x1000);
765 }
766
767 ~ELFMemoryStreamer() override
768 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500769 }
770
771 void write8(uint8_t Value) override
772 {
773 if(position == (uint64_t)buffer.size())
774 {
775 buffer.push_back(Value);
776 position++;
777 }
778 else if(position < (uint64_t)buffer.size())
779 {
780 buffer[position] = Value;
781 position++;
782 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000783 else
784 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500785 }
786
787 void writeBytes(llvm::StringRef Bytes) override
788 {
789 std::size_t oldSize = buffer.size();
790 buffer.resize(oldSize + Bytes.size());
791 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
792 position += Bytes.size();
793 }
794
795 uint64_t tell() const override { return position; }
796
797 void seek(uint64_t Off) override { position = Off; }
798
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400799 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500800 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400801 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500802
803#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500804 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
805#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400806 for(auto &entryPoint : entryPoints)
807 {
808 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
809 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500810#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500811
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400812 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400813 }
814
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500815 void finalize()
816 {
817 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
818
819 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
820 }
821
Ben Clayton713b8d32019-12-17 20:37:56 +0000822 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400823 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500824 ASSERT(func);
825 funcs[index] = func;
826 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400827
Nicolas Capens157ba262019-12-10 17:49:14 -0500828 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400829 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500830 ASSERT(funcs[index]);
831 return funcs[index];
832 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400833
Antonio Maiorano02a39532020-01-21 15:15:34 -0500834 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500835 {
Nicolas Capens4e75f452021-01-28 01:52:56 -0500836 // Check if we already have a suitable constant.
837 for(const auto &c : constantsPool)
838 {
839 void *ptr = c.data.get();
840 size_t space = c.space;
841
842 void *alignedPtr = std::align(alignment, size, ptr, space);
843
844 if(space < size)
845 {
846 continue;
847 }
848
849 if(memcmp(data, alignedPtr, size) == 0)
850 {
851 return alignedPtr;
852 }
853 }
854
Antonio Maiorano02a39532020-01-21 15:15:34 -0500855 // TODO(b/148086935): Replace with a buffer allocator.
856 size_t space = size + alignment;
857 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
858 void *ptr = buf.get();
859 void *alignedPtr = std::align(alignment, size, ptr, space);
860 ASSERT(alignedPtr);
861 memcpy(alignedPtr, data, size);
Nicolas Capens4e75f452021-01-28 01:52:56 -0500862 constantsPool.emplace_back(std::move(buf), space);
863
Antonio Maiorano02a39532020-01-21 15:15:34 -0500864 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500865 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400866
Nicolas Capens157ba262019-12-10 17:49:14 -0500867private:
Nicolas Capens4e75f452021-01-28 01:52:56 -0500868 struct Constant
869 {
870 Constant(std::unique_ptr<uint8_t[]> data, size_t space)
871 : data(std::move(data))
872 , space(space)
873 {}
874
875 std::unique_ptr<uint8_t[]> data;
876 size_t space;
877 };
878
Ben Clayton713b8d32019-12-17 20:37:56 +0000879 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500880 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
881 std::size_t position;
Nicolas Capens4e75f452021-01-28 01:52:56 -0500882 std::vector<Constant> constantsPool;
Nicolas Capens157ba262019-12-10 17:49:14 -0500883};
884
Antonio Maiorano62427e02020-02-13 09:18:05 -0500885#ifdef ENABLE_RR_PRINT
886void VPrintf(const std::vector<Value *> &vals)
887{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400888 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500889}
890#endif // ENABLE_RR_PRINT
891
Nicolas Capens157ba262019-12-10 17:49:14 -0500892Nucleus::Nucleus()
893{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400894 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500895
896 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
897 Ice::ClFlags::getParsedClFlags(Flags);
898
Ben Clayton713b8d32019-12-17 20:37:56 +0000899#if defined(__arm__)
900 Flags.setTargetArch(Ice::Target_ARM32);
901 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
902#elif defined(__mips__)
903 Flags.setTargetArch(Ice::Target_MIPS32);
904 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
905#else // x86
906 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
907 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
908#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500909 Flags.setOutFileType(Ice::FT_Elf);
910 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
911 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
912 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
913 Flags.setDisableHybridAssembly(true);
914
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500915 // Emit functions into separate sections in the ELF so we can find them by name
916 Flags.setFunctionSections(true);
917
Nicolas Capens157ba262019-12-10 17:49:14 -0500918 static llvm::raw_os_ostream cout(std::cout);
919 static llvm::raw_os_ostream cerr(std::cerr);
920
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500921 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500922 {
923 // Decorate text asm with liveness info
924 Flags.setDecorateAsm(true);
925 }
926
Ben Clayton713b8d32019-12-17 20:37:56 +0000927 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500928 {
929 std::error_code errorCode;
930 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
931 ::elfFile = new Ice::ELFFileStreamer(*out);
932 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
933 }
934 else
935 {
936 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
937 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
938 ::routine = elfMemory;
939 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400940
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400941#if !__has_feature(memory_sanitizer)
942 // thread_local variables in shared libraries are initialized at load-time,
943 // but this is not observed by MemorySanitizer if the loader itself was not
944 // instrumented, leading to false-positive unitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400945 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400946#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500947 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500948}
949
950Nucleus::~Nucleus()
951{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400952 delete Variable::unmaterializedVariables;
953 Variable::unmaterializedVariables = nullptr;
954
Nicolas Capens157ba262019-12-10 17:49:14 -0500955 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500956 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500957
958 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500959 ::allocator = nullptr;
960
Nicolas Capens157ba262019-12-10 17:49:14 -0500961 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500962 ::function = nullptr;
963
Nicolas Capens157ba262019-12-10 17:49:14 -0500964 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500965 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500966
967 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500968 ::elfFile = nullptr;
969
Nicolas Capens157ba262019-12-10 17:49:14 -0500970 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500971 ::out = nullptr;
972
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400973 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500974 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400975 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500976
977 ::codegenMutex.unlock();
978}
979
980void Nucleus::setDefaultConfig(const Config &cfg)
981{
982 std::unique_lock<std::mutex> lock(::defaultConfigLock);
983 ::defaultConfig() = cfg;
984}
985
986void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
987{
988 std::unique_lock<std::mutex> lock(::defaultConfigLock);
989 auto &config = ::defaultConfig();
990 config = cfgEdit.apply(config);
991}
992
993Config Nucleus::getDefaultConfig()
994{
995 std::unique_lock<std::mutex> lock(::defaultConfigLock);
996 return ::defaultConfig();
997}
998
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500999// This function lowers and produces executable binary code in memory for the input functions,
1000// and returns a Routine with the entry points to these functions.
1001template<size_t Count>
1002static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -05001003{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001004 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
1005 // and GlobalContext::emitItems.
1006
Nicolas Capens81bc9d92019-12-16 15:05:57 -05001007 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -05001008 {
1009 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001010 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -05001011 }
1012
1013 ::context->emitFileHeader();
1014
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001015 // Translate
1016
1017 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -05001018 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001019 Ice::Cfg *currFunc = functions[i];
1020
1021 // Install function allocator in TLS for Cfg-specific container allocators
1022 Ice::CfgLocalAllocatorScope allocScope(currFunc);
1023
1024 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
1025
1026 rr::optimize(currFunc);
1027
1028 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001029 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001030
1031 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001032 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001033
1034 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1035
1036 if(subzeroEmitTextAsm)
1037 {
1038 currFunc->emit();
1039 }
1040
1041 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -05001042 }
1043
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001044 // Emit items
1045
1046 ::context->lowerGlobals("");
1047
Nicolas Capens157ba262019-12-10 17:49:14 -05001048 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001049
1050 for(size_t i = 0; i < Count; ++i)
1051 {
1052 Ice::Cfg *currFunc = functions[i];
1053
1054 // Accumulate globals from functions to emit into the "last" section at the end
1055 auto globals = currFunc->getGlobalInits();
1056 if(globals && !globals->empty())
1057 {
1058 ::context->getGlobals()->merge(globals.get());
1059 }
1060
1061 auto assembler = currFunc->releaseAssembler();
1062 assembler->alignFunction();
1063 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1064 }
1065
Nicolas Capens157ba262019-12-10 17:49:14 -05001066 ::context->lowerGlobals("last");
1067 ::context->lowerConstants();
1068 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001069
Nicolas Capens157ba262019-12-10 17:49:14 -05001070 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001071 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001072 objectWriter->writeNonUserSections();
1073
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001074 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001075 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1076 ASSERT(entryPoints.size() == Count);
1077 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001078 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001079 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001080 }
1081
1082 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001083
1084 Routine *handoffRoutine = ::routine;
1085 ::routine = nullptr;
1086
1087 return std::shared_ptr<Routine>(handoffRoutine);
1088}
1089
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001090std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1091{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001092 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001093 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1094}
1095
Nicolas Capens157ba262019-12-10 17:49:14 -05001096Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1097{
1098 Ice::Type type = T(t);
1099 int typeSize = Ice::typeWidthInBytes(type);
1100 int totalSize = typeSize * (arraySize ? arraySize : 1);
1101
1102 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1103 auto address = ::function->makeVariable(T(getPointerType(t)));
1104 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1105 ::function->getEntryNode()->getInsts().push_front(alloca);
1106
1107 return V(address);
1108}
1109
1110BasicBlock *Nucleus::createBasicBlock()
1111{
1112 return B(::function->makeNode());
1113}
1114
1115BasicBlock *Nucleus::getInsertBlock()
1116{
1117 return B(::basicBlock);
1118}
1119
1120void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1121{
Ben Clayton713b8d32019-12-17 20:37:56 +00001122 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001123
1124 Variable::materializeAll();
1125
1126 ::basicBlock = basicBlock;
1127}
1128
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001129void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001130{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001131 ASSERT(::function == nullptr);
1132 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001133 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001134 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001135 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001136
1137 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1138
1139 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1140 // becomes invalid if another one is created; for example, when creating await and destroy functions
1141 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1142 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001143 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1144
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001145 ::entryBlock = ::function->getEntryNode();
1146 ::basicBlock = ::function->makeNode();
1147 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001148}
1149
1150Value *Nucleus::getArgument(unsigned int index)
1151{
1152 return V(::function->getArgs()[index]);
1153}
1154
1155void Nucleus::createRetVoid()
1156{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001157 RR_DEBUG_INFO_UPDATE_LOC();
1158
Nicolas Capens157ba262019-12-10 17:49:14 -05001159 // Code generated after this point is unreachable, so any variables
1160 // being read can safely return an undefined value. We have to avoid
1161 // materializing variables after the terminator ret instruction.
1162 Variable::killUnmaterialized();
1163
1164 Ice::InstRet *ret = Ice::InstRet::create(::function);
1165 ::basicBlock->appendInst(ret);
1166}
1167
1168void Nucleus::createRet(Value *v)
1169{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001170 RR_DEBUG_INFO_UPDATE_LOC();
1171
Nicolas Capens157ba262019-12-10 17:49:14 -05001172 // Code generated after this point is unreachable, so any variables
1173 // being read can safely return an undefined value. We have to avoid
1174 // materializing variables after the terminator ret instruction.
1175 Variable::killUnmaterialized();
1176
1177 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1178 ::basicBlock->appendInst(ret);
1179}
1180
1181void Nucleus::createBr(BasicBlock *dest)
1182{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001183 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001184 Variable::materializeAll();
1185
1186 auto br = Ice::InstBr::create(::function, dest);
1187 ::basicBlock->appendInst(br);
1188}
1189
1190void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1191{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001192 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001193 Variable::materializeAll();
1194
1195 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1196 ::basicBlock->appendInst(br);
1197}
1198
1199static bool isCommutative(Ice::InstArithmetic::OpKind op)
1200{
1201 switch(op)
1202 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001203 case Ice::InstArithmetic::Add:
1204 case Ice::InstArithmetic::Fadd:
1205 case Ice::InstArithmetic::Mul:
1206 case Ice::InstArithmetic::Fmul:
1207 case Ice::InstArithmetic::And:
1208 case Ice::InstArithmetic::Or:
1209 case Ice::InstArithmetic::Xor:
1210 return true;
1211 default:
1212 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001213 }
1214}
1215
1216static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1217{
1218 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1219
1220 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1221
1222 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1223 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1224 ::basicBlock->appendInst(arithmetic);
1225
1226 return V(result);
1227}
1228
1229Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1230{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001231 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001232 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1233}
1234
1235Value *Nucleus::createSub(Value *lhs, Value *rhs)
1236{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001237 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001238 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1239}
1240
1241Value *Nucleus::createMul(Value *lhs, Value *rhs)
1242{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001243 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001244 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1245}
1246
1247Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1248{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001249 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001250 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1251}
1252
1253Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1254{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001255 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001256 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1257}
1258
1259Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1260{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001261 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001262 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1263}
1264
1265Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1266{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001267 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001268 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1269}
1270
1271Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1272{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001273 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001274 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1275}
1276
1277Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1278{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001279 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001280 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1281}
1282
1283Value *Nucleus::createURem(Value *lhs, Value *rhs)
1284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001285 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001286 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1287}
1288
1289Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001292 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1293}
1294
1295Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1296{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001297 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001298 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1299 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001300 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001301 return nullptr;
1302}
1303
1304RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1305{
1306 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001307}
1308
1309Value *Nucleus::createShl(Value *lhs, Value *rhs)
1310{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001311 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001312 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1313}
1314
1315Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1316{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001317 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001318 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1319}
1320
1321Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1322{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001323 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001324 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1325}
1326
1327Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1328{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001329 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001330 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1331}
1332
1333Value *Nucleus::createOr(Value *lhs, Value *rhs)
1334{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001335 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001336 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1337}
1338
1339Value *Nucleus::createXor(Value *lhs, Value *rhs)
1340{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001341 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001342 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1343}
1344
1345Value *Nucleus::createNeg(Value *v)
1346{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001347 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001348 return createSub(createNullValue(T(v->getType())), v);
1349}
1350
1351Value *Nucleus::createFNeg(Value *v)
1352{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001353 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001354 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1355 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001356
1357 return createFSub(negativeZero, v);
1358}
1359
1360Value *Nucleus::createNot(Value *v)
1361{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001362 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001363 if(Ice::isScalarIntegerType(v->getType()))
1364 {
1365 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1366 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001367 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001368 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001369 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001370 return createXor(v, createConstantVector(c, T(v->getType())));
1371 }
1372}
1373
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001374static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1375{
1376#if defined(__i386__) || defined(__x86_64__)
1377 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1378 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1379 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1380#else
1381 if(atomic)
1382 {
1383 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1384 }
1385 if(memoryOrder != std::memory_order_relaxed)
1386 {
1387 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1388 }
1389#endif
1390
1391 // Vulkan doesn't allow sequential memory order
1392 ASSERT(memoryOrder != std::memory_order_seq_cst);
1393}
1394
Nicolas Capens157ba262019-12-10 17:49:14 -05001395Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1396{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001397 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001398 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001399
1400 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001401 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001402
Ben Clayton713b8d32019-12-17 20:37:56 +00001403 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001404 {
1405 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001406 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001407 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001408 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001409 auto pointer = RValue<Pointer<Byte>>(ptr);
1410 Int x = *Pointer<Int>(pointer);
1411
1412 Int4 vector;
1413 vector = Insert(vector, x, 0);
1414
Antonio Maiorano02a39532020-01-21 15:15:34 -05001415 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001416 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1417 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001418 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001419 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001420 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001421 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001422 auto pointer = RValue<Pointer<Byte>>(ptr);
1423 Int x = *Pointer<Int>(pointer);
1424 Int y = *Pointer<Int>(pointer + 4);
1425
1426 Int4 vector;
1427 vector = Insert(vector, x, 0);
1428 vector = Insert(vector, y, 1);
1429
Antonio Maiorano02a39532020-01-21 15:15:34 -05001430 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001431 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1432 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001433 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001434 else
1435 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001436 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001437 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001438 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001439 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001440 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001441 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001442 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1443 load->addArg(ptr);
1444 load->addArg(::context->getConstantInt32(typeSize(type)));
1445 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001446 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001447 }
1448 else
1449 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001450 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001451 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001452
Antonio Maiorano02a39532020-01-21 15:15:34 -05001453 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001454 return V(result);
1455}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001456
Nicolas Capens157ba262019-12-10 17:49:14 -05001457Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1458{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001459 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001460 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001461
Ben Clayton713b8d32019-12-17 20:37:56 +00001462#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001463 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001464 if(align != 0)
1465 {
1466 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1467 call->addArg(ptr);
1468 call->addArg(::context->getConstantInt64(typeSize(type)));
1469 ::basicBlock->appendInst(call);
1470 }
1471#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001472
Nicolas Capens157ba262019-12-10 17:49:14 -05001473 int valueType = (int)reinterpret_cast<intptr_t>(type);
1474
Ben Clayton713b8d32019-12-17 20:37:56 +00001475 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001476 {
1477 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001478 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001479 if(typeSize(type) == 4)
1480 {
1481 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1482 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1483 ::basicBlock->appendInst(bitcast);
1484
1485 RValue<Int4> v(V(vector));
1486
1487 auto pointer = RValue<Pointer<Byte>>(ptr);
1488 Int x = Extract(v, 0);
1489 *Pointer<Int>(pointer) = x;
1490 }
1491 else if(typeSize(type) == 8)
1492 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001493 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001494 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1495 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1496 ::basicBlock->appendInst(bitcast);
1497
1498 RValue<Int4> v(V(vector));
1499
1500 auto pointer = RValue<Pointer<Byte>>(ptr);
1501 Int x = Extract(v, 0);
1502 *Pointer<Int>(pointer) = x;
1503 Int y = Extract(v, 1);
1504 *Pointer<Int>(pointer + 4) = y;
1505 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001506 else
1507 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001508 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001509 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001510 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001511 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001512 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1513 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1514 store->addArg(value);
1515 store->addArg(ptr);
1516 store->addArg(::context->getConstantInt32(typeSize(type)));
1517 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001518 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001519 }
1520 else
1521 {
1522 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001523
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001524 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001525 ::basicBlock->appendInst(store);
1526 }
1527
1528 return value;
1529}
1530
1531Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1532{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001533 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001534 ASSERT(index->getType() == Ice::IceType_i32);
1535
1536 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1537 {
1538 int32_t offset = constant->getValue() * (int)typeSize(type);
1539
1540 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001541 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001542 return ptr;
1543 }
1544
Nicolas Capens157ba262019-12-10 17:49:14 -05001545 return createAdd(ptr, createConstantInt(offset));
1546 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001547
Nicolas Capens157ba262019-12-10 17:49:14 -05001548 if(!Ice::isByteSizedType(T(type)))
1549 {
1550 index = createMul(index, createConstantInt((int)typeSize(type)));
1551 }
1552
Ben Clayton713b8d32019-12-17 20:37:56 +00001553 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001554 {
1555 if(unsignedIndex)
1556 {
1557 index = createZExt(index, T(Ice::IceType_i64));
1558 }
1559 else
1560 {
1561 index = createSExt(index, T(Ice::IceType_i64));
1562 }
1563 }
1564
1565 return createAdd(ptr, index);
1566}
1567
Antonio Maiorano370cba52019-12-31 11:36:07 -05001568static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1569{
1570 Ice::Variable *result = ::function->makeVariable(value->getType());
1571
1572 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1573 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1574 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1575 auto op = ::context->getConstantInt32(rmwOp);
1576 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1577 inst->addArg(op);
1578 inst->addArg(ptr);
1579 inst->addArg(value);
1580 inst->addArg(order);
1581 ::basicBlock->appendInst(inst);
1582
1583 return V(result);
1584}
1585
Nicolas Capens157ba262019-12-10 17:49:14 -05001586Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1587{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001588 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001589 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001590}
1591
1592Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1593{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001594 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001595 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001596}
1597
1598Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1599{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001600 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001601 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001602}
1603
1604Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1605{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001606 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001607 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001608}
1609
1610Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1611{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001612 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001613 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001614}
1615
1616Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1617{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001618 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001619 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001620}
1621
1622Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1623{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001624 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001625 Ice::Variable *result = ::function->makeVariable(value->getType());
1626
1627 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1628 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1629 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1630 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1631 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1632 inst->addArg(ptr);
1633 inst->addArg(compare);
1634 inst->addArg(value);
1635 inst->addArg(orderEq);
1636 inst->addArg(orderNeq);
1637 ::basicBlock->appendInst(inst);
1638
1639 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001640}
1641
1642static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1643{
1644 if(v->getType() == T(destType))
1645 {
1646 return v;
1647 }
1648
1649 Ice::Variable *result = ::function->makeVariable(T(destType));
1650 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1651 ::basicBlock->appendInst(cast);
1652
1653 return V(result);
1654}
1655
1656Value *Nucleus::createTrunc(Value *v, Type *destType)
1657{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001658 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001659 return createCast(Ice::InstCast::Trunc, v, destType);
1660}
1661
1662Value *Nucleus::createZExt(Value *v, Type *destType)
1663{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001664 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001665 return createCast(Ice::InstCast::Zext, v, destType);
1666}
1667
1668Value *Nucleus::createSExt(Value *v, Type *destType)
1669{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001670 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001671 return createCast(Ice::InstCast::Sext, v, destType);
1672}
1673
1674Value *Nucleus::createFPToUI(Value *v, Type *destType)
1675{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001676 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001677 return createCast(Ice::InstCast::Fptoui, v, destType);
1678}
1679
1680Value *Nucleus::createFPToSI(Value *v, Type *destType)
1681{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001682 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001683 return createCast(Ice::InstCast::Fptosi, v, destType);
1684}
1685
1686Value *Nucleus::createSIToFP(Value *v, Type *destType)
1687{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001688 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001689 return createCast(Ice::InstCast::Sitofp, v, destType);
1690}
1691
1692Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1693{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001694 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001695 return createCast(Ice::InstCast::Fptrunc, v, destType);
1696}
1697
1698Value *Nucleus::createFPExt(Value *v, Type *destType)
1699{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001700 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001701 return createCast(Ice::InstCast::Fpext, v, destType);
1702}
1703
1704Value *Nucleus::createBitCast(Value *v, Type *destType)
1705{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001706 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001707 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1708 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1709 // emulate them by writing to the stack and reading back as the destination type.
1710 if(emulateMismatchedBitCast)
1711 {
1712 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1713 {
1714 Value *address = allocateStackVariable(destType);
1715 createStore(v, address, T(v->getType()));
1716 return createLoad(address, destType);
1717 }
1718 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1719 {
1720 Value *address = allocateStackVariable(T(v->getType()));
1721 createStore(v, address, T(v->getType()));
1722 return createLoad(address, destType);
1723 }
1724 }
1725
1726 return createCast(Ice::InstCast::Bitcast, v, destType);
1727}
1728
1729static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1730{
1731 ASSERT(lhs->getType() == rhs->getType());
1732
1733 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1734 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1735 ::basicBlock->appendInst(cmp);
1736
1737 return V(result);
1738}
1739
1740Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1741{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001742 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001743 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1744}
1745
1746Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1747{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001748 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001749 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1750}
1751
1752Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1753{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001754 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001755 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1756}
1757
1758Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1759{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001760 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001761 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1762}
1763
1764Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1765{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001766 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001767 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1768}
1769
1770Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1771{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001772 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001773 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1774}
1775
1776Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1777{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001778 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001779 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1780}
1781
1782Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1783{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001784 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001785 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1786}
1787
1788Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1789{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001790 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001791 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1792}
1793
1794Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1795{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001796 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001797 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1798}
1799
1800Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1801{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001802 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001803 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1804}
1805
1806static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1807{
1808 ASSERT(lhs->getType() == rhs->getType());
1809 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1810
1811 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1812 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1813 ::basicBlock->appendInst(cmp);
1814
1815 return V(result);
1816}
1817
1818Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1819{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001820 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001821 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1822}
1823
1824Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1825{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001826 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001827 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1828}
1829
1830Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1831{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001832 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001833 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1834}
1835
1836Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1837{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001838 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001839 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1840}
1841
1842Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1843{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001844 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001845 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1846}
1847
1848Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1849{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001850 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001851 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1852}
1853
1854Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1855{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001856 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001857 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1858}
1859
1860Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1861{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001862 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001863 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1864}
1865
1866Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1867{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001868 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001869 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1870}
1871
1872Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1873{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001874 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001875 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1876}
1877
1878Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1879{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001880 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001881 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1882}
1883
1884Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1885{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001886 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001887 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1888}
1889
1890Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1891{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001892 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001893 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1894}
1895
1896Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1897{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001898 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001899 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1900}
1901
1902Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1903{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001904 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001905 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001906 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001907 ::basicBlock->appendInst(extract);
1908
1909 return V(result);
1910}
1911
1912Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1913{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001914 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001915 auto result = ::function->makeVariable(vector->getType());
1916 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1917 ::basicBlock->appendInst(insert);
1918
1919 return V(result);
1920}
1921
1922Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1923{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001924 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001925 ASSERT(V1->getType() == V2->getType());
1926
1927 int size = Ice::typeNumElements(V1->getType());
1928 auto result = ::function->makeVariable(V1->getType());
1929 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1930
1931 for(int i = 0; i < size; i++)
1932 {
1933 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1934 }
1935
1936 ::basicBlock->appendInst(shuffle);
1937
1938 return V(result);
1939}
1940
1941Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1942{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001943 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001944 ASSERT(ifTrue->getType() == ifFalse->getType());
1945
1946 auto result = ::function->makeVariable(ifTrue->getType());
1947 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1948 ::basicBlock->appendInst(select);
1949
1950 return V(result);
1951}
1952
1953SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1954{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001955 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001956 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1957 ::basicBlock->appendInst(switchInst);
1958
Ben Clayton713b8d32019-12-17 20:37:56 +00001959 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001960}
1961
1962void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1963{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001964 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001965 switchCases->addBranch(label, label, branch);
1966}
1967
1968void Nucleus::createUnreachable()
1969{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001970 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001971 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1972 ::basicBlock->appendInst(unreachable);
1973}
1974
Antonio Maiorano62427e02020-02-13 09:18:05 -05001975Type *Nucleus::getType(Value *value)
1976{
1977 return T(V(value)->getType());
1978}
1979
1980Type *Nucleus::getContainedType(Type *vectorType)
1981{
1982 Ice::Type vecTy = T(vectorType);
1983 switch(vecTy)
1984 {
1985 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1986 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1987 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1988 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1989 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1990 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1991 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1992 default:
1993 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1994 return {};
1995 }
1996}
1997
Nicolas Capens157ba262019-12-10 17:49:14 -05001998Type *Nucleus::getPointerType(Type *ElementType)
1999{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05002000 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05002001}
2002
Antonio Maiorano62427e02020-02-13 09:18:05 -05002003static constexpr Ice::Type getNaturalIntType()
2004{
2005 constexpr size_t intSize = sizeof(int);
2006 static_assert(intSize == 4 || intSize == 8, "");
2007 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
2008}
2009
2010Type *Nucleus::getPrintfStorageType(Type *valueType)
2011{
2012 Ice::Type valueTy = T(valueType);
2013 switch(valueTy)
2014 {
2015 case Ice::IceType_i32:
2016 return T(getNaturalIntType());
2017
2018 case Ice::IceType_f32:
2019 return T(Ice::IceType_f64);
2020
2021 default:
2022 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
2023 return {};
2024 }
2025}
2026
Nicolas Capens157ba262019-12-10 17:49:14 -05002027Value *Nucleus::createNullValue(Type *Ty)
2028{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002029 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002030 if(Ice::isVectorType(T(Ty)))
2031 {
2032 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002033 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002034 return createConstantVector(c, Ty);
2035 }
2036 else
2037 {
2038 return V(::context->getConstantZero(T(Ty)));
2039 }
2040}
2041
2042Value *Nucleus::createConstantLong(int64_t i)
2043{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002044 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002045 return V(::context->getConstantInt64(i));
2046}
2047
2048Value *Nucleus::createConstantInt(int i)
2049{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002050 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002051 return V(::context->getConstantInt32(i));
2052}
2053
2054Value *Nucleus::createConstantInt(unsigned int i)
2055{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002056 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002057 return V(::context->getConstantInt32(i));
2058}
2059
2060Value *Nucleus::createConstantBool(bool b)
2061{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002062 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002063 return V(::context->getConstantInt1(b));
2064}
2065
2066Value *Nucleus::createConstantByte(signed char i)
2067{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002068 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002069 return V(::context->getConstantInt8(i));
2070}
2071
2072Value *Nucleus::createConstantByte(unsigned char i)
2073{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002074 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002075 return V(::context->getConstantInt8(i));
2076}
2077
2078Value *Nucleus::createConstantShort(short i)
2079{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002080 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002081 return V(::context->getConstantInt16(i));
2082}
2083
2084Value *Nucleus::createConstantShort(unsigned short i)
2085{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002086 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002087 return V(::context->getConstantInt16(i));
2088}
2089
2090Value *Nucleus::createConstantFloat(float x)
2091{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002092 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002093 return V(::context->getConstantFloat(x));
2094}
2095
2096Value *Nucleus::createNullPointer(Type *Ty)
2097{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002098 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002099 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002100}
2101
Antonio Maiorano02a39532020-01-21 15:15:34 -05002102static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2103{
2104 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2105}
2106
Nicolas Capens157ba262019-12-10 17:49:14 -05002107Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2108{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002109 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002110 const int vectorSize = 16;
2111 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2112 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002113
2114 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002115 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002116
Antonio Maioranoa0957112020-03-04 15:06:19 -05002117 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002118 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002119
2120 switch((int)reinterpret_cast<intptr_t>(type))
2121 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002122 case Ice::IceType_v4i32:
2123 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002124 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002125 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002126 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002127 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002128 }
2129 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002130 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002131 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002132 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002133 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002134 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002135 }
2136 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002137 case Ice::IceType_v8i16:
2138 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002139 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002140 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002141 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002142 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002143 }
2144 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002145 case Ice::IceType_v16i8:
2146 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002147 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002148 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002149 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002150 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002151 }
2152 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002153 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002154 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002155 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002156 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002157 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002158 }
2159 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002160 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002161 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002162 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002163 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002164 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002165 }
2166 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002167 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002168 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002169 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002170 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002171 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002172 }
2173 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002174 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002175 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002176 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002177 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002178 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002179 }
2180 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002181 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002182 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002183 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002184 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002185 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002186 }
2187 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002188 default:
2189 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002190 }
2191
Antonio Maiorano02a39532020-01-21 15:15:34 -05002192 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002193
Antonio Maiorano02a39532020-01-21 15:15:34 -05002194 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002195 return V(result);
2196}
2197
2198Value *Nucleus::createConstantVector(const double *constants, Type *type)
2199{
Ben Clayton713b8d32019-12-17 20:37:56 +00002200 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002201}
2202
Antonio Maiorano62427e02020-02-13 09:18:05 -05002203Value *Nucleus::createConstantString(const char *v)
2204{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002205 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002206 return V(IceConstantData(v, strlen(v) + 1));
2207}
2208
Nicolas Capens519cf222020-05-08 15:27:19 -04002209Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002210{
2211 return T(Ice::IceType_void);
2212}
2213
Nicolas Capens519cf222020-05-08 15:27:19 -04002214Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002215{
2216 return T(Ice::IceType_i1);
2217}
2218
Nicolas Capens519cf222020-05-08 15:27:19 -04002219Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002220{
2221 return T(Ice::IceType_i8);
2222}
2223
Nicolas Capens519cf222020-05-08 15:27:19 -04002224Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002225{
2226 return T(Ice::IceType_i8);
2227}
2228
Nicolas Capens519cf222020-05-08 15:27:19 -04002229Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002230{
2231 return T(Ice::IceType_i16);
2232}
2233
Nicolas Capens519cf222020-05-08 15:27:19 -04002234Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002235{
2236 return T(Ice::IceType_i16);
2237}
2238
Nicolas Capens519cf222020-05-08 15:27:19 -04002239Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002240{
2241 return T(Type_v4i8);
2242}
2243
Nicolas Capens519cf222020-05-08 15:27:19 -04002244Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002245{
2246 return T(Type_v4i8);
2247}
2248
Ben Clayton713b8d32019-12-17 20:37:56 +00002249namespace {
2250RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002251{
Ben Clayton713b8d32019-12-17 20:37:56 +00002252 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002253}
2254
Ben Clayton713b8d32019-12-17 20:37:56 +00002255RValue<Byte> Extract(RValue<Byte8> val, int i)
2256{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002257 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002258}
2259
2260RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2261{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002262 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002263}
2264} // namespace
2265
Nicolas Capens157ba262019-12-10 17:49:14 -05002266RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002268 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002269 if(emulateIntrinsics)
2270 {
2271 Byte8 result;
2272 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2273 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2274 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2275 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2276 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2277 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2278 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2279 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2280
2281 return result;
2282 }
2283 else
2284 {
2285 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002286 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002287 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2288 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002289 paddusb->addArg(x.value());
2290 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002291 ::basicBlock->appendInst(paddusb);
2292
2293 return RValue<Byte8>(V(result));
2294 }
2295}
2296
2297RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2298{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002299 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002300 if(emulateIntrinsics)
2301 {
2302 Byte8 result;
2303 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2304 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2305 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2306 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2307 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2308 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2309 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2310 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2311
2312 return result;
2313 }
2314 else
2315 {
2316 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002317 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002318 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2319 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002320 psubusw->addArg(x.value());
2321 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002322 ::basicBlock->appendInst(psubusw);
2323
2324 return RValue<Byte8>(V(result));
2325 }
2326}
2327
2328RValue<SByte> Extract(RValue<SByte8> val, int i)
2329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002331 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002332}
2333
2334RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002337 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002338}
2339
2340RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002343 if(emulateIntrinsics)
2344 {
2345 SByte8 result;
2346 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2347 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2348 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2349 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2350 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2351 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2352 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2353 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2354
2355 return result;
2356 }
2357 else
2358 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002359#if defined(__i386__) || defined(__x86_64__)
2360 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2361 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2362 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002363
Ben Clayton713b8d32019-12-17 20:37:56 +00002364 return As<SByte8>(hi | lo);
2365#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002366 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002367#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002368 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002369}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002370
Nicolas Capens157ba262019-12-10 17:49:14 -05002371RValue<Int> SignMask(RValue<Byte8> x)
2372{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002373 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002374 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002375 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002376 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2377 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002378 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002379 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002380 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002381 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002382 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002383 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2384 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002385 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002386 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002387
Nicolas Capens157ba262019-12-10 17:49:14 -05002388 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002389 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002390}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002391
2392// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2393// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002394// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002395// }
2396
Nicolas Capens157ba262019-12-10 17:49:14 -05002397RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2398{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002399 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002400 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002401}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002402
Nicolas Capens519cf222020-05-08 15:27:19 -04002403Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002404{
2405 return T(Type_v8i8);
2406}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002407
Nicolas Capens598f8d82016-09-26 15:09:10 -04002408// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2409// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002410// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002411// }
2412
2413// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2414// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002415// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002416// }
2417
Nicolas Capens157ba262019-12-10 17:49:14 -05002418RValue<SByte> SaturateSigned(RValue<Short> x)
2419{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002420 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002421 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2422}
2423
2424RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2425{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002426 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002427 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002428 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002429 SByte8 result;
2430 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2431 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2432 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2433 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2434 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2435 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2436 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2437 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002438
Nicolas Capens157ba262019-12-10 17:49:14 -05002439 return result;
2440 }
2441 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002442 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002443 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002444 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002445 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2446 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002447 paddsb->addArg(x.value());
2448 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002449 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002450
Nicolas Capens157ba262019-12-10 17:49:14 -05002451 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002452 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002453}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002454
Nicolas Capens157ba262019-12-10 17:49:14 -05002455RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2456{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002457 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002458 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002459 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002460 SByte8 result;
2461 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2462 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2463 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2464 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2465 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2466 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2467 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2468 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002469
Nicolas Capens157ba262019-12-10 17:49:14 -05002470 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002471 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002472 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002473 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002474 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002475 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002476 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2477 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002478 psubsb->addArg(x.value());
2479 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002480 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002481
Nicolas Capens157ba262019-12-10 17:49:14 -05002482 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002483 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002484}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002485
Nicolas Capens157ba262019-12-10 17:49:14 -05002486RValue<Int> SignMask(RValue<SByte8> x)
2487{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002488 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002489 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002490 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002491 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2492 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002493 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002494 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002495 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002496 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002497 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002498 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2499 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002500 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002501 ::basicBlock->appendInst(movmsk);
2502
2503 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002504 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002505}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002506
Nicolas Capens157ba262019-12-10 17:49:14 -05002507RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2508{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002509 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002510 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002511}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002512
Nicolas Capens157ba262019-12-10 17:49:14 -05002513RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2514{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002515 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002516 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002517}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002518
Nicolas Capens519cf222020-05-08 15:27:19 -04002519Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002520{
2521 return T(Type_v8i8);
2522}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002523
Nicolas Capens519cf222020-05-08 15:27:19 -04002524Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002525{
2526 return T(Ice::IceType_v16i8);
2527}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002528
Nicolas Capens519cf222020-05-08 15:27:19 -04002529Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002530{
2531 return T(Ice::IceType_v16i8);
2532}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002533
Nicolas Capens519cf222020-05-08 15:27:19 -04002534Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002535{
2536 return T(Type_v2i16);
2537}
Nicolas Capensd4227962016-11-09 14:24:25 -05002538
Nicolas Capens519cf222020-05-08 15:27:19 -04002539Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002540{
2541 return T(Type_v2i16);
2542}
Nicolas Capensd4227962016-11-09 14:24:25 -05002543
Nicolas Capens157ba262019-12-10 17:49:14 -05002544Short4::Short4(RValue<Int4> cast)
2545{
Ben Clayton713b8d32019-12-17 20:37:56 +00002546 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002547 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002548 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2549
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002550 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002551 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002552
2553 storeValue(short4);
2554}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002555
2556// Short4::Short4(RValue<Float> cast)
2557// {
2558// }
2559
Nicolas Capens157ba262019-12-10 17:49:14 -05002560Short4::Short4(RValue<Float4> cast)
2561{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002562 // TODO(b/150791192): Generalize and optimize
2563 auto smin = std::numeric_limits<short>::min();
2564 auto smax = std::numeric_limits<short>::max();
2565 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002566}
2567
2568RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2569{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002570 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002571 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002572 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002573 Short4 result;
2574 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2575 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2576 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2577 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002578
2579 return result;
2580 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002581 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002582 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002583 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002584 }
2585}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002586
Nicolas Capens157ba262019-12-10 17:49:14 -05002587RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2588{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002589 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002590 if(emulateIntrinsics)
2591 {
2592 Short4 result;
2593 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2594 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2595 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2596 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2597
2598 return result;
2599 }
2600 else
2601 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002602 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002603 }
2604}
2605
2606RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2607{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002608 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002609 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002610 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002611 ::basicBlock->appendInst(cmp);
2612
2613 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002614 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002615 ::basicBlock->appendInst(select);
2616
2617 return RValue<Short4>(V(result));
2618}
2619
2620RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2621{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002622 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002623 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002624 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002625 ::basicBlock->appendInst(cmp);
2626
2627 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002628 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002629 ::basicBlock->appendInst(select);
2630
2631 return RValue<Short4>(V(result));
2632}
2633
2634RValue<Short> SaturateSigned(RValue<Int> x)
2635{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002636 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002637 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2638}
2639
2640RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2641{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002642 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002643 if(emulateIntrinsics)
2644 {
2645 Short4 result;
2646 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2647 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2648 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2649 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2650
2651 return result;
2652 }
2653 else
2654 {
2655 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002656 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002657 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2658 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002659 paddsw->addArg(x.value());
2660 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002661 ::basicBlock->appendInst(paddsw);
2662
2663 return RValue<Short4>(V(result));
2664 }
2665}
2666
2667RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2668{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002669 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002670 if(emulateIntrinsics)
2671 {
2672 Short4 result;
2673 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2674 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2675 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2676 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2677
2678 return result;
2679 }
2680 else
2681 {
2682 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002683 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002684 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2685 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002686 psubsw->addArg(x.value());
2687 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002688 ::basicBlock->appendInst(psubsw);
2689
2690 return RValue<Short4>(V(result));
2691 }
2692}
2693
2694RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2695{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002696 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002697 if(emulateIntrinsics)
2698 {
2699 Short4 result;
2700 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2701 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2702 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2703 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2704
2705 return result;
2706 }
2707 else
2708 {
2709 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002710 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002711 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2712 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002713 pmulhw->addArg(x.value());
2714 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002715 ::basicBlock->appendInst(pmulhw);
2716
2717 return RValue<Short4>(V(result));
2718 }
2719}
2720
2721RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2722{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002723 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002724 if(emulateIntrinsics)
2725 {
2726 Int2 result;
2727 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2728 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2729
2730 return result;
2731 }
2732 else
2733 {
2734 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002735 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002736 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2737 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002738 pmaddwd->addArg(x.value());
2739 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002740 ::basicBlock->appendInst(pmaddwd);
2741
2742 return As<Int2>(V(result));
2743 }
2744}
2745
2746RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2747{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002748 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002749 if(emulateIntrinsics)
2750 {
2751 SByte8 result;
2752 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2753 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2754 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2755 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2756 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2757 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2758 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2759 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2760
2761 return result;
2762 }
2763 else
2764 {
2765 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002766 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002767 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2768 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002769 pack->addArg(x.value());
2770 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002771 ::basicBlock->appendInst(pack);
2772
2773 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2774 }
2775}
2776
2777RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002780 if(emulateIntrinsics)
2781 {
2782 Byte8 result;
2783 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2784 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2785 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2786 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2787 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2788 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2789 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2790 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2791
2792 return result;
2793 }
2794 else
2795 {
2796 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002797 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002798 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2799 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002800 pack->addArg(x.value());
2801 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002802 ::basicBlock->appendInst(pack);
2803
2804 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2805 }
2806}
2807
2808RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2809{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002810 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002811 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002812}
2813
2814RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2815{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002816 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002817 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002818}
2819
Nicolas Capens519cf222020-05-08 15:27:19 -04002820Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002821{
2822 return T(Type_v4i16);
2823}
2824
2825UShort4::UShort4(RValue<Float4> cast, bool saturate)
2826{
2827 if(saturate)
2828 {
2829 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002830 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002831 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2832 // PackUnsigned takes care of 0x0000 saturation.
2833 Int4 int4(Min(cast, Float4(0xFFFF)));
2834 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002835 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002836 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002837 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002838 // ARM saturates the 32-bit integer result on overflow/undeflow.
2839 Int4 int4(cast);
2840 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002841 }
2842 else
2843 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002844 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002845 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002846 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002847 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002848 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002849 *this = Short4(Int4(cast));
2850 }
2851}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002852
Nicolas Capens157ba262019-12-10 17:49:14 -05002853RValue<UShort> Extract(RValue<UShort4> val, int i)
2854{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002855 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002856}
2857
2858RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2859{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002860 return RValue<UShort4>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002861}
2862
2863RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002866 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002867
Nicolas Capens157ba262019-12-10 17:49:14 -05002868 {
2869 UShort4 result;
2870 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2871 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2872 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2873 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2874
2875 return result;
2876 }
2877 else
2878 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002879 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002880 }
2881}
2882
2883RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2884{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002885 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002886 if(emulateIntrinsics)
2887 {
2888 UShort4 result;
2889 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2890 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2891 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2892 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2893
2894 return result;
2895 }
2896 else
2897 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002898 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002899 }
2900}
2901
2902RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2903{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002904 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002905 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002906 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002907 ::basicBlock->appendInst(cmp);
2908
2909 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002910 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002911 ::basicBlock->appendInst(select);
2912
2913 return RValue<UShort4>(V(result));
2914}
2915
2916RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2917{
2918 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002919 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002920 ::basicBlock->appendInst(cmp);
2921
2922 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002923 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002924 ::basicBlock->appendInst(select);
2925
2926 return RValue<UShort4>(V(result));
2927}
2928
2929RValue<UShort> SaturateUnsigned(RValue<Int> x)
2930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002932 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2933}
2934
2935RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2936{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002937 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002938 if(emulateIntrinsics)
2939 {
2940 UShort4 result;
2941 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2942 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2943 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2944 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2945
2946 return result;
2947 }
2948 else
2949 {
2950 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002951 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002952 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2953 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002954 paddusw->addArg(x.value());
2955 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002956 ::basicBlock->appendInst(paddusw);
2957
2958 return RValue<UShort4>(V(result));
2959 }
2960}
2961
2962RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2963{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002964 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002965 if(emulateIntrinsics)
2966 {
2967 UShort4 result;
2968 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2969 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2970 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2971 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2972
2973 return result;
2974 }
2975 else
2976 {
2977 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002978 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002979 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2980 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002981 psubusw->addArg(x.value());
2982 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002983 ::basicBlock->appendInst(psubusw);
2984
2985 return RValue<UShort4>(V(result));
2986 }
2987}
2988
2989RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2990{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002991 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002992 if(emulateIntrinsics)
2993 {
2994 UShort4 result;
2995 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2996 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2997 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2998 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2999
3000 return result;
3001 }
3002 else
3003 {
3004 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003005 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003006 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3007 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003008 pmulhuw->addArg(x.value());
3009 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003010 ::basicBlock->appendInst(pmulhuw);
3011
3012 return RValue<UShort4>(V(result));
3013 }
3014}
3015
3016RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
3017{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003018 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003019 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3020
3021 // Scalarized implementation.
3022 Int4 result;
3023 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
3024 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
3025 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
3026 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
3027
3028 return result;
3029}
3030
3031RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3032{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003033 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003034 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3035
3036 if(false) // Partial product based implementation.
3037 {
3038 auto xh = x >> 16;
3039 auto yh = y >> 16;
3040 auto xl = x & UInt4(0x0000FFFF);
3041 auto yl = y & UInt4(0x0000FFFF);
3042 auto xlyh = xl * yh;
3043 auto xhyl = xh * yl;
3044 auto xlyhh = xlyh >> 16;
3045 auto xhylh = xhyl >> 16;
3046 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3047 auto xhyll = xhyl & UInt4(0x0000FFFF);
3048 auto xlylh = (xl * yl) >> 16;
3049 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3050
3051 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003052 }
3053
Nicolas Capens157ba262019-12-10 17:49:14 -05003054 // Scalarized implementation.
3055 Int4 result;
3056 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3057 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3058 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3059 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3060
3061 return As<UInt4>(result);
3062}
3063
3064RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3065{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003066 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003067 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003068 return UShort4(0);
3069}
3070
Nicolas Capens519cf222020-05-08 15:27:19 -04003071Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003072{
3073 return T(Type_v4i16);
3074}
3075
3076RValue<Short> Extract(RValue<Short8> val, int i)
3077{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003078 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003079 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003080}
3081
3082RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3083{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003084 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003085 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003086}
3087
3088RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3089{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003090 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003091 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003092 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003093 Short8 result;
3094 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3095 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3096 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3097 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3098 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3099 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3100 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3101 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003102
Nicolas Capens157ba262019-12-10 17:49:14 -05003103 return result;
3104 }
3105 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003106 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003107 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003108 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003109}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003110
Nicolas Capens157ba262019-12-10 17:49:14 -05003111RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3112{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003113 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003114 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003115 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003116 Short8 result;
3117 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3118 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3119 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3120 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3121 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3122 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3123 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3124 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003125
Nicolas Capens157ba262019-12-10 17:49:14 -05003126 return result;
3127 }
3128 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003129 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003130 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003131 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003132}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003133
Nicolas Capens157ba262019-12-10 17:49:14 -05003134RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3135{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003136 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003137 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003138 return Int4(0);
3139}
3140
3141RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3142{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003143 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003144 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003145 return Short8(0);
3146}
3147
Nicolas Capens519cf222020-05-08 15:27:19 -04003148Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003149{
3150 return T(Ice::IceType_v8i16);
3151}
3152
3153RValue<UShort> Extract(RValue<UShort8> val, int i)
3154{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003155 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003156 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003157}
3158
3159RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3160{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003161 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003162 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003163}
3164
3165RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3166{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003167 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003168 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003169 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003170 UShort8 result;
3171 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3172 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3173 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3174 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3175 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3176 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3177 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3178 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003179
Nicolas Capens157ba262019-12-10 17:49:14 -05003180 return result;
3181 }
3182 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003183 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003184 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003185 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003186}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003187
Nicolas Capens157ba262019-12-10 17:49:14 -05003188RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003190 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003191 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003192 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003193 UShort8 result;
3194 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3195 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3196 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3197 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3198 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3199 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3200 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3201 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003202
Nicolas Capens157ba262019-12-10 17:49:14 -05003203 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003204 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003205 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003206 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003207 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003208 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003209}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003210
Nicolas Capens157ba262019-12-10 17:49:14 -05003211RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3212{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003213 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003214 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003215 return UShort8(0);
3216}
3217
Nicolas Capens519cf222020-05-08 15:27:19 -04003218Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003219{
3220 return T(Ice::IceType_v8i16);
3221}
3222
Ben Clayton713b8d32019-12-17 20:37:56 +00003223RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003224{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003225 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003226 RValue<Int> res = val;
3227 val += 1;
3228 return res;
3229}
3230
Ben Clayton713b8d32019-12-17 20:37:56 +00003231const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003232{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003233 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003234 val += 1;
3235 return val;
3236}
3237
Ben Clayton713b8d32019-12-17 20:37:56 +00003238RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003239{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003240 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003241 RValue<Int> res = val;
3242 val -= 1;
3243 return res;
3244}
3245
Ben Clayton713b8d32019-12-17 20:37:56 +00003246const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003247{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003248 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003249 val -= 1;
3250 return val;
3251}
3252
3253RValue<Int> RoundInt(RValue<Float> cast)
3254{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003255 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003256 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003257 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003258 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3259 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003260 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003261 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003262 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003263 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003264 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003265 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3266 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003267 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003268 ::basicBlock->appendInst(nearbyint);
3269
3270 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003271 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003272}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003273
Nicolas Capens519cf222020-05-08 15:27:19 -04003274Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003275{
3276 return T(Ice::IceType_i32);
3277}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003278
Nicolas Capens519cf222020-05-08 15:27:19 -04003279Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003280{
3281 return T(Ice::IceType_i64);
3282}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003283
Nicolas Capens157ba262019-12-10 17:49:14 -05003284UInt::UInt(RValue<Float> cast)
3285{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003286 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003287 // Smallest positive value representable in UInt, but not in Int
3288 const unsigned int ustart = 0x80000000u;
3289 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003290
Nicolas Capens157ba262019-12-10 17:49:14 -05003291 // If the value is negative, store 0, otherwise store the result of the conversion
3292 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003293 // Check if the value can be represented as an Int
3294 IfThenElse(cast >= ustartf,
3295 // If the value is too large, subtract ustart and re-add it after conversion.
3296 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3297 // Otherwise, just convert normally
3298 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003299 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003300}
Nicolas Capensa8086512016-11-07 17:32:17 -05003301
Ben Clayton713b8d32019-12-17 20:37:56 +00003302RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003303{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003304 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003305 RValue<UInt> res = val;
3306 val += 1;
3307 return res;
3308}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003309
Ben Clayton713b8d32019-12-17 20:37:56 +00003310const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003313 val += 1;
3314 return val;
3315}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003316
Ben Clayton713b8d32019-12-17 20:37:56 +00003317RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003318{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003319 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003320 RValue<UInt> res = val;
3321 val -= 1;
3322 return res;
3323}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003324
Ben Clayton713b8d32019-12-17 20:37:56 +00003325const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003326{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003327 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003328 val -= 1;
3329 return val;
3330}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331
Nicolas Capens598f8d82016-09-26 15:09:10 -04003332// RValue<UInt> RoundUInt(RValue<Float> cast)
3333// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003334// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003335// }
3336
Nicolas Capens519cf222020-05-08 15:27:19 -04003337Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003338{
3339 return T(Ice::IceType_i32);
3340}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003341
3342// Int2::Int2(RValue<Int> cast)
3343// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003344// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003345// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003346//
3347// Constant *shuffle[2];
3348// shuffle[0] = Nucleus::createConstantInt(0);
3349// shuffle[1] = Nucleus::createConstantInt(0);
3350//
Nicolas Capens519cf222020-05-08 15:27:19 -04003351// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003352//
3353// storeValue(replicate);
3354// }
3355
Nicolas Capens157ba262019-12-10 17:49:14 -05003356RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3357{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003358 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003359 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003360 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003361 Int2 result;
3362 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3363 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003364
Nicolas Capens157ba262019-12-10 17:49:14 -05003365 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003366 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003367 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003368 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003369 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003370 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003371}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003372
Nicolas Capens157ba262019-12-10 17:49:14 -05003373RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3374{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003375 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003376 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003377 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003378 Int2 result;
3379 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3380 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3381
3382 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003383 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003384 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003385 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003386 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003387 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003388}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003389
Nicolas Capens519cf222020-05-08 15:27:19 -04003390Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003391{
3392 return T(Type_v2i32);
3393}
3394
3395RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3396{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003397 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003398 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003399 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003400 UInt2 result;
3401 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3402 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003403
Nicolas Capens157ba262019-12-10 17:49:14 -05003404 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003405 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003406 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003407 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003408 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003409 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003410}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003411
Nicolas Capens157ba262019-12-10 17:49:14 -05003412RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3413{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003414 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003415 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003416 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003417 UInt2 result;
3418 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3419 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003420
Nicolas Capens157ba262019-12-10 17:49:14 -05003421 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003422 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003423 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003424 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003425 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003426 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003427}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003428
Nicolas Capens519cf222020-05-08 15:27:19 -04003429Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003430{
3431 return T(Type_v2i32);
3432}
3433
Ben Clayton713b8d32019-12-17 20:37:56 +00003434Int4::Int4(RValue<Byte4> cast)
3435 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003436{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003437 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003438 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003439 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3440
3441 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003442 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003443 Value *b = Nucleus::createBitCast(a, Byte16::type());
3444 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003445
Ben Clayton713b8d32019-12-17 20:37:56 +00003446 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003447 Value *d = Nucleus::createBitCast(c, Short8::type());
3448 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003449
Nicolas Capens519cf222020-05-08 15:27:19 -04003450 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003451 storeValue(f);
3452}
3453
Ben Clayton713b8d32019-12-17 20:37:56 +00003454Int4::Int4(RValue<SByte4> cast)
3455 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003456{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003457 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003458 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003459 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3460
Ben Clayton713b8d32019-12-17 20:37:56 +00003461 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003462 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003463 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3464
Ben Clayton713b8d32019-12-17 20:37:56 +00003465 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003466 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003467 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3468
3469 *this = As<Int4>(e) >> 24;
3470}
3471
Ben Clayton713b8d32019-12-17 20:37:56 +00003472Int4::Int4(RValue<Short4> cast)
3473 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003474{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003475 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003476 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003477 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003478
3479 *this = As<Int4>(c) >> 16;
3480}
3481
Ben Clayton713b8d32019-12-17 20:37:56 +00003482Int4::Int4(RValue<UShort4> cast)
3483 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003484{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003485 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003486 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003487 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003488 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003489 storeValue(d);
3490}
3491
Ben Clayton713b8d32019-12-17 20:37:56 +00003492Int4::Int4(RValue<Int> rhs)
3493 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003494{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003495 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003496 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003497
Ben Clayton713b8d32019-12-17 20:37:56 +00003498 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003499 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3500
3501 storeValue(replicate);
3502}
3503
3504RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3505{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003506 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003507 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003508 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003509 Int4 result;
3510 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3511 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3512 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3513 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003514
Nicolas Capens157ba262019-12-10 17:49:14 -05003515 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003516 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003517 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003518 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003519 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003520 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003521}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003522
Nicolas Capens157ba262019-12-10 17:49:14 -05003523RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3524{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003525 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003526 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003527 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003528 Int4 result;
3529 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3530 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3531 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3532 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003533
Nicolas Capens157ba262019-12-10 17:49:14 -05003534 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003535 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003536 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003537 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003538 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003539 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003540}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003541
Nicolas Capens157ba262019-12-10 17:49:14 -05003542RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3543{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003544 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003545 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003546}
3547
3548RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3549{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003550 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003551 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003552}
3553
3554RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3555{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003556 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003557 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003558}
3559
3560RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3561{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003562 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003563 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003564}
3565
3566RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3567{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003568 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003569 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003570}
3571
3572RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3573{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003574 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003575 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003576}
3577
3578RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3579{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003580 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003581 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003582 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003583 ::basicBlock->appendInst(cmp);
3584
3585 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003586 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003587 ::basicBlock->appendInst(select);
3588
3589 return RValue<Int4>(V(result));
3590}
3591
3592RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3593{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003594 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003595 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003596 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003597 ::basicBlock->appendInst(cmp);
3598
3599 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003600 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003601 ::basicBlock->appendInst(select);
3602
3603 return RValue<Int4>(V(result));
3604}
3605
3606RValue<Int4> RoundInt(RValue<Float4> cast)
3607{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003608 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003609 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003610 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003611 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3612 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003613 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003614 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003615 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003616 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003617 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003618 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3619 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003620 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003621 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003622
3623 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003624 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003625}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003626
Nicolas Capenseeb81842021-01-12 17:44:40 -05003627RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3628{
3629 RR_DEBUG_INFO_UPDATE_LOC();
3630
3631 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3632 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3633 // saturate to 0x80000000.
3634 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3635
3636 if(emulateIntrinsics || CPUID::ARM)
3637 {
3638 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3639 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3640 }
3641 else
3642 {
3643 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3644 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3645 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3646 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3647 nearbyint->addArg(clamped.value());
3648 ::basicBlock->appendInst(nearbyint);
3649
3650 return RValue<Int4>(V(result));
3651 }
3652}
3653
Nicolas Capens157ba262019-12-10 17:49:14 -05003654RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3655{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003656 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003657 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003658 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003659 Short8 result;
3660 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3661 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3662 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3663 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3664 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3665 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3666 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3667 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003668
Nicolas Capens157ba262019-12-10 17:49:14 -05003669 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003670 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003671 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003672 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003673 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003674 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003675 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3676 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003677 pack->addArg(x.value());
3678 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003679 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003680
Nicolas Capens157ba262019-12-10 17:49:14 -05003681 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003682 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003683}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003684
Nicolas Capens157ba262019-12-10 17:49:14 -05003685RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3686{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003687 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003688 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003689 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003690 RValue<Int4> sx = As<Int4>(x);
3691 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003692
Nicolas Capens157ba262019-12-10 17:49:14 -05003693 RValue<Int4> sy = As<Int4>(y);
3694 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003695
Nicolas Capens157ba262019-12-10 17:49:14 -05003696 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003697 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003698 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003699 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003700 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003701 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003702 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3703 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003704 pack->addArg(x.value());
3705 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003706 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003707
Nicolas Capens157ba262019-12-10 17:49:14 -05003708 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003709 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003710}
Nicolas Capens33438a62017-09-27 11:47:35 -04003711
Nicolas Capens157ba262019-12-10 17:49:14 -05003712RValue<Int> SignMask(RValue<Int4> x)
3713{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003714 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003715 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003716 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003717 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3718 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003719 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003720 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003721 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003722 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003723 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003724 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3725 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003726 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003727 ::basicBlock->appendInst(movmsk);
3728
3729 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003730 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003731}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003732
Nicolas Capens519cf222020-05-08 15:27:19 -04003733Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003734{
3735 return T(Ice::IceType_v4i32);
3736}
3737
Ben Clayton713b8d32019-12-17 20:37:56 +00003738UInt4::UInt4(RValue<Float4> cast)
3739 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003740{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003741 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003742 // Smallest positive value representable in UInt, but not in Int
3743 const unsigned int ustart = 0x80000000u;
3744 const float ustartf = float(ustart);
3745
3746 // Check if the value can be represented as an Int
3747 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3748 // If the value is too large, subtract ustart and re-add it after conversion.
3749 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003750 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003751 (~uiValue & Int4(cast));
3752 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003753 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003754}
3755
Ben Clayton713b8d32019-12-17 20:37:56 +00003756UInt4::UInt4(RValue<UInt> rhs)
3757 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003758{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003759 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003760 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003761
Ben Clayton713b8d32019-12-17 20:37:56 +00003762 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003763 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3764
3765 storeValue(replicate);
3766}
3767
3768RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3769{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003770 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003771 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003772 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003773 UInt4 result;
3774 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3775 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3776 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3777 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003778
Nicolas Capens157ba262019-12-10 17:49:14 -05003779 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003780 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003781 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003782 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003783 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003784 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003785}
Ben Clayton88816fa2019-05-15 17:08:14 +01003786
Nicolas Capens157ba262019-12-10 17:49:14 -05003787RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3788{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003789 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003790 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003791 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003792 UInt4 result;
3793 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3794 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3795 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3796 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003797
Nicolas Capens157ba262019-12-10 17:49:14 -05003798 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003799 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003800 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003801 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003802 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003803 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003804}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003805
Nicolas Capens157ba262019-12-10 17:49:14 -05003806RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3807{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003808 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003809 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003810}
3811
3812RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3813{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003814 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003815 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003816}
3817
3818RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3819{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003820 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003821 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003822}
3823
3824RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3825{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003826 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003827 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003828}
3829
3830RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3831{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003832 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003833 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003834}
3835
3836RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3837{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003838 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003839 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003840}
3841
3842RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3843{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003844 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003845 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003846 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003847 ::basicBlock->appendInst(cmp);
3848
3849 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003850 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003851 ::basicBlock->appendInst(select);
3852
3853 return RValue<UInt4>(V(result));
3854}
3855
3856RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3857{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003858 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003859 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003860 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003861 ::basicBlock->appendInst(cmp);
3862
3863 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003864 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003865 ::basicBlock->appendInst(select);
3866
3867 return RValue<UInt4>(V(result));
3868}
3869
Nicolas Capens519cf222020-05-08 15:27:19 -04003870Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003871{
3872 return T(Ice::IceType_v4i32);
3873}
3874
Nicolas Capens519cf222020-05-08 15:27:19 -04003875Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003876{
3877 return T(Ice::IceType_i16);
3878}
3879
3880RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3881{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003882 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003883 return 1.0f / x;
3884}
3885
3886RValue<Float> RcpSqrt_pp(RValue<Float> x)
3887{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003888 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003889 return Rcp_pp(Sqrt(x));
3890}
3891
3892RValue<Float> Sqrt(RValue<Float> x)
3893{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003894 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003895 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003896 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003897 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3898 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003899 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003900 ::basicBlock->appendInst(sqrt);
3901
3902 return RValue<Float>(V(result));
3903}
3904
3905RValue<Float> Round(RValue<Float> x)
3906{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003907 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003908 return Float4(Round(Float4(x))).x;
3909}
3910
3911RValue<Float> Trunc(RValue<Float> x)
3912{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003913 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003914 return Float4(Trunc(Float4(x))).x;
3915}
3916
3917RValue<Float> Frac(RValue<Float> x)
3918{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003919 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003920 return Float4(Frac(Float4(x))).x;
3921}
3922
3923RValue<Float> Floor(RValue<Float> x)
3924{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003925 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003926 return Float4(Floor(Float4(x))).x;
3927}
3928
3929RValue<Float> Ceil(RValue<Float> x)
3930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003932 return Float4(Ceil(Float4(x))).x;
3933}
3934
Nicolas Capens519cf222020-05-08 15:27:19 -04003935Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003936{
3937 return T(Ice::IceType_f32);
3938}
3939
Nicolas Capens519cf222020-05-08 15:27:19 -04003940Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003941{
3942 return T(Type_v2f32);
3943}
3944
Ben Clayton713b8d32019-12-17 20:37:56 +00003945Float4::Float4(RValue<Float> rhs)
3946 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003947{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003948 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003949 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003950
Ben Clayton713b8d32019-12-17 20:37:56 +00003951 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003952 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3953
3954 storeValue(replicate);
3955}
3956
3957RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3958{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003959 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003960 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003961 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003962 ::basicBlock->appendInst(cmp);
3963
3964 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003965 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003966 ::basicBlock->appendInst(select);
3967
3968 return RValue<Float4>(V(result));
3969}
3970
3971RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3972{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003973 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003974 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003975 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003976 ::basicBlock->appendInst(cmp);
3977
3978 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003979 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003980 ::basicBlock->appendInst(select);
3981
3982 return RValue<Float4>(V(result));
3983}
3984
3985RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3986{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003987 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003988 return Float4(1.0f) / x;
3989}
3990
3991RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3992{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003993 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003994 return Rcp_pp(Sqrt(x));
3995}
3996
Antonio Maioranod1561872020-12-14 14:03:53 -05003997bool HasRcpApprox()
3998{
3999 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4000 return false;
4001}
4002
4003RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
4004{
4005 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4006 UNREACHABLE("RValue<Float4> RcpApprox()");
4007 return { 0.0f };
4008}
4009
4010RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
4011{
4012 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4013 UNREACHABLE("RValue<Float> RcpApprox()");
4014 return { 0.0f };
4015}
4016
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05004017bool HasRcpSqrtApprox()
4018{
4019 return false;
4020}
4021
4022RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
4023{
4024 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4025 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
4026 return { 0.0f };
4027}
4028
4029RValue<Float> RcpSqrtApprox(RValue<Float> x)
4030{
4031 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4032 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
4033 return { 0.0f };
4034}
4035
Nicolas Capens157ba262019-12-10 17:49:14 -05004036RValue<Float4> Sqrt(RValue<Float4> x)
4037{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004038 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004039 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04004040 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004041 Float4 result;
4042 result.x = Sqrt(Float(Float4(x).x));
4043 result.y = Sqrt(Float(Float4(x).y));
4044 result.z = Sqrt(Float(Float4(x).z));
4045 result.w = Sqrt(Float(Float4(x).w));
4046
4047 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04004048 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004049 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04004050 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004051 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004052 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04004053 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4054 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004055 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04004056 ::basicBlock->appendInst(sqrt);
4057
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04004058 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04004059 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04004060}
Nicolas Capens157ba262019-12-10 17:49:14 -05004061
4062RValue<Int> SignMask(RValue<Float4> x)
4063{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004064 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004065 if(emulateIntrinsics || CPUID::ARM)
4066 {
4067 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
4068 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
4069 }
4070 else
4071 {
4072 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004073 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004074 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4075 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004076 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004077 ::basicBlock->appendInst(movmsk);
4078
4079 return RValue<Int>(V(result));
4080 }
4081}
4082
4083RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
4084{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004085 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004086 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004087}
4088
4089RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
4090{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004091 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004092 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004093}
4094
4095RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4096{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004097 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004098 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004099}
4100
4101RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4102{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004103 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004104 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004105}
4106
4107RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4108{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004109 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004110 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004111}
4112
4113RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4114{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004115 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004116 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004117}
4118
4119RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4120{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004121 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004122 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004123}
4124
4125RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4126{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004127 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004128 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004129}
4130
4131RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4132{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004133 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004134 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004135}
4136
4137RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4138{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004139 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004140 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004141}
4142
4143RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4144{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004145 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004146 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004147}
4148
4149RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4150{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004151 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004152 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004153}
4154
4155RValue<Float4> Round(RValue<Float4> x)
4156{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004157 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004158 if(emulateIntrinsics || CPUID::ARM)
4159 {
4160 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4161 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4162 }
4163 else if(CPUID::SSE4_1)
4164 {
4165 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004166 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004167 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4168 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004169 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004170 round->addArg(::context->getConstantInt32(0));
4171 ::basicBlock->appendInst(round);
4172
4173 return RValue<Float4>(V(result));
4174 }
4175 else
4176 {
4177 return Float4(RoundInt(x));
4178 }
4179}
4180
4181RValue<Float4> Trunc(RValue<Float4> x)
4182{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004183 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004184 if(CPUID::SSE4_1)
4185 {
4186 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004187 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004188 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4189 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004190 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004191 round->addArg(::context->getConstantInt32(3));
4192 ::basicBlock->appendInst(round);
4193
4194 return RValue<Float4>(V(result));
4195 }
4196 else
4197 {
4198 return Float4(Int4(x));
4199 }
4200}
4201
4202RValue<Float4> Frac(RValue<Float4> x)
4203{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004204 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004205 Float4 frc;
4206
4207 if(CPUID::SSE4_1)
4208 {
4209 frc = x - Floor(x);
4210 }
4211 else
4212 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004213 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004214
Ben Clayton713b8d32019-12-17 20:37:56 +00004215 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004216 }
4217
4218 // x - floor(x) can be 1.0 for very small negative x.
4219 // Clamp against the value just below 1.0.
4220 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4221}
4222
4223RValue<Float4> Floor(RValue<Float4> x)
4224{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004225 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004226 if(CPUID::SSE4_1)
4227 {
4228 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004229 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004230 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4231 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004232 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004233 round->addArg(::context->getConstantInt32(1));
4234 ::basicBlock->appendInst(round);
4235
4236 return RValue<Float4>(V(result));
4237 }
4238 else
4239 {
4240 return x - Frac(x);
4241 }
4242}
4243
4244RValue<Float4> Ceil(RValue<Float4> x)
4245{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004246 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004247 if(CPUID::SSE4_1)
4248 {
4249 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004250 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004251 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4252 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004253 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004254 round->addArg(::context->getConstantInt32(2));
4255 ::basicBlock->appendInst(round);
4256
4257 return RValue<Float4>(V(result));
4258 }
4259 else
4260 {
4261 return -Floor(-x);
4262 }
4263}
4264
Nicolas Capens519cf222020-05-08 15:27:19 -04004265Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004266{
4267 return T(Ice::IceType_v4f32);
4268}
4269
4270RValue<Long> Ticks()
4271{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004272 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004273 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004274 return Long(Int(0));
4275}
4276
Ben Clayton713b8d32019-12-17 20:37:56 +00004277RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004278{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004279 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004280 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004281}
4282
Ben Clayton713b8d32019-12-17 20:37:56 +00004283RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004284{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004285 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004286 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004287}
4288
Ben Clayton713b8d32019-12-17 20:37:56 +00004289Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004290{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004291 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004292 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004293}
4294
4295void Breakpoint()
4296{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004297 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004298 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004299 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4300 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4301 ::basicBlock->appendInst(trap);
4302}
4303
Ben Clayton713b8d32019-12-17 20:37:56 +00004304void Nucleus::createFence(std::memory_order memoryOrder)
4305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004306 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004307 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4308 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4309 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4310 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4311 inst->addArg(order);
4312 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004313}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004314
Ben Clayton713b8d32019-12-17 20:37:56 +00004315Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4316{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004317 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004318 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004319 return nullptr;
4320}
4321void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4322{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004323 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004324 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004325}
Nicolas Capens157ba262019-12-10 17:49:14 -05004326
4327RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4328{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004329 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004330 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4331}
4332
4333RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4334{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004335 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004336 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4337}
4338
4339void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4340{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004341 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004342 return emulated::Scatter(base, val, offsets, mask, alignment);
4343}
4344
4345void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4346{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004347 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004348 return emulated::Scatter(base, val, offsets, mask, alignment);
4349}
4350
4351RValue<Float> Exp2(RValue<Float> x)
4352{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004353 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004354 return emulated::Exp2(x);
4355}
4356
4357RValue<Float> Log2(RValue<Float> x)
4358{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004359 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004360 return emulated::Log2(x);
4361}
4362
4363RValue<Float4> Sin(RValue<Float4> x)
4364{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004365 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004366 return optimal::Sin(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004367}
4368
4369RValue<Float4> Cos(RValue<Float4> x)
4370{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004371 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004372 return optimal::Cos(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004373}
4374
4375RValue<Float4> Tan(RValue<Float4> x)
4376{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004377 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004378 return optimal::Tan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004379}
4380
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004381RValue<Float4> Asin(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004382{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004383 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004384 if(p == Precision::Full)
4385 {
4386 return emulated::Asin(x);
4387 }
4388 return optimal::Asin_8_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004389}
4390
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004391RValue<Float4> Acos(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004392{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004393 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004394 // Surprisingly, deqp-vk's precision.acos.highp/mediump tests pass when using the 4-term polynomial approximation
4395 // version of acos, unlike for Asin, which requires higher precision algorithms.
4396 return optimal::Acos_4_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004397}
4398
4399RValue<Float4> Atan(RValue<Float4> x)
4400{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004401 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004402 return optimal::Atan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004403}
4404
4405RValue<Float4> Sinh(RValue<Float4> x)
4406{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004407 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004408 return optimal::Sinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004409}
4410
4411RValue<Float4> Cosh(RValue<Float4> x)
4412{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004413 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004414 return optimal::Cosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004415}
4416
4417RValue<Float4> Tanh(RValue<Float4> x)
4418{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004419 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004420 return optimal::Tanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004421}
4422
4423RValue<Float4> Asinh(RValue<Float4> x)
4424{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004425 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004426 return optimal::Asinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004427}
4428
4429RValue<Float4> Acosh(RValue<Float4> x)
4430{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004431 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004432 return optimal::Acosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004433}
4434
4435RValue<Float4> Atanh(RValue<Float4> x)
4436{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004437 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004438 return optimal::Atanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004439}
4440
4441RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4442{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004443 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004444 return optimal::Atan2(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004445}
4446
4447RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4448{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004449 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004450 return optimal::Pow(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004451}
4452
4453RValue<Float4> Exp(RValue<Float4> x)
4454{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004455 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004456 return optimal::Exp(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004457}
4458
4459RValue<Float4> Log(RValue<Float4> x)
4460{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004461 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004462 return optimal::Log(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004463}
4464
4465RValue<Float4> Exp2(RValue<Float4> x)
4466{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004467 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004468 return optimal::Exp2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004469}
4470
4471RValue<Float4> Log2(RValue<Float4> x)
4472{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004473 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004474 return optimal::Log2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004475}
4476
4477RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4478{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004479 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004480 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004481 {
Ben Claytonce54c592020-02-07 11:30:51 +00004482 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004483 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004484 }
4485 else
4486 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004487 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004488 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4489 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4490 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004491 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004492 ::basicBlock->appendInst(ctlz);
4493
4494 return RValue<UInt>(V(result));
4495 }
4496}
4497
4498RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4499{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004500 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004501 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004502 {
Ben Claytonce54c592020-02-07 11:30:51 +00004503 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004504 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004505 }
4506 else
4507 {
4508 // TODO: implement vectorized version in Subzero
4509 UInt4 result;
4510 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4511 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4512 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4513 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4514 return result;
4515 }
4516}
4517
4518RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4519{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004520 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004521 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004522 {
Ben Claytonce54c592020-02-07 11:30:51 +00004523 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004524 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004525 }
4526 else
4527 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004528 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004529 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4530 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4531 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004532 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004533 ::basicBlock->appendInst(ctlz);
4534
4535 return RValue<UInt>(V(result));
4536 }
4537}
4538
4539RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4540{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004541 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004542 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004543 {
Ben Claytonce54c592020-02-07 11:30:51 +00004544 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004545 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004546 }
4547 else
4548 {
4549 // TODO: implement vectorized version in Subzero
4550 UInt4 result;
4551 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4552 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4553 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4554 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4555 return result;
4556 }
4557}
4558
Antonio Maiorano370cba52019-12-31 11:36:07 -05004559RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4560{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004561 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004562 return emulated::MinAtomic(x, y, memoryOrder);
4563}
4564
4565RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4566{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004567 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004568 return emulated::MinAtomic(x, y, memoryOrder);
4569}
4570
4571RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4572{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004573 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004574 return emulated::MaxAtomic(x, y, memoryOrder);
4575}
4576
4577RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4578{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004579 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004580 return emulated::MaxAtomic(x, y, memoryOrder);
4581}
4582
Antonio Maioranoaae33732020-02-14 14:52:34 -05004583void EmitDebugLocation()
4584{
4585#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004586 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004587#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004588}
Ben Clayton713b8d32019-12-17 20:37:56 +00004589void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004590void FlushDebug() {}
4591
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004592namespace {
4593namespace coro {
4594
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004595// Instance data per generated coroutine
4596// This is the "handle" type used for Coroutine functions
4597// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4598struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004599{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004600 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004601 bool done = false; // the coroutine should stop at the next yield()
4602 bool terminated = false; // the coroutine has finished.
4603 bool inRoutine = false; // is the coroutine currently executing?
4604 marl::Scheduler::Fiber *mainFiber = nullptr;
4605 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004606 void *promisePtr = nullptr;
4607};
4608
4609CoroutineData *createCoroutineData()
4610{
4611 return new CoroutineData{};
4612}
4613
4614void destroyCoroutineData(CoroutineData *coroData)
4615{
4616 delete coroData;
4617}
4618
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004619// suspend() pauses execution of the coroutine, and resumes execution from the
4620// caller's call to await().
4621// Returns true if await() is called again, or false if coroutine_destroy()
4622// is called.
4623bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004624{
Ben Claytonc3466532020-03-24 11:54:05 +00004625 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4626 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4627 ASSERT(coroData->inRoutine);
4628 coroData->inRoutine = false;
4629 coroData->mainFiber->notify();
4630 while(!coroData->inRoutine)
4631 {
4632 coroData->routineFiber->wait();
4633 }
4634 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004635}
4636
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004637// resume() is called by await(), blocking until the coroutine calls yield()
4638// or the coroutine terminates.
4639void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004640{
Ben Claytonc3466532020-03-24 11:54:05 +00004641 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4642 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4643 ASSERT(!coroData->inRoutine);
4644 coroData->inRoutine = true;
4645 coroData->routineFiber->notify();
4646 while(coroData->inRoutine)
4647 {
4648 coroData->mainFiber->wait();
4649 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004650}
4651
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004652// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4653// until the coroutine ends, and deletes the coroutine data.
4654void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004655{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004656 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004657 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4658 ASSERT(!coroData->inRoutine);
4659 if(!coroData->terminated)
4660 {
4661 coroData->done = true;
4662 coroData->inRoutine = true;
4663 coroData->routineFiber->notify();
4664 while(!coroData->terminated)
4665 {
4666 coroData->mainFiber->wait();
4667 }
4668 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004669 if(coroData->useInternalScheduler)
4670 {
4671 ::getOrCreateScheduler().unbind();
4672 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004673 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004674}
4675
4676namespace detail {
4677thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4678} // namespace detail
4679
4680void setHandleParam(Nucleus::CoroutineHandle handle)
4681{
4682 ASSERT(!detail::coroHandle);
4683 detail::coroHandle = handle;
4684}
4685
4686Nucleus::CoroutineHandle getHandleParam()
4687{
4688 ASSERT(detail::coroHandle);
4689 auto handle = detail::coroHandle;
4690 detail::coroHandle = {};
4691 return handle;
4692}
4693
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004694bool isDone(Nucleus::CoroutineHandle handle)
4695{
4696 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004697 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004698}
4699
4700void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4701{
4702 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4703 coroData->promisePtr = promisePtr;
4704}
4705
4706void *getPromisePtr(Nucleus::CoroutineHandle handle)
4707{
4708 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4709 return coroData->promisePtr;
4710}
4711
4712} // namespace coro
4713} // namespace
4714
4715// Used to generate coroutines.
4716// Lifetime: from yield to acquireCoroutine
4717class CoroutineGenerator
4718{
4719public:
4720 CoroutineGenerator()
4721 {
4722 }
4723
4724 // Inserts instructions at the top of the current function to make it a coroutine.
4725 void generateCoroutineBegin()
4726 {
4727 // Begin building the main coroutine_begin() function.
4728 // We insert these instructions at the top of the entry node,
4729 // before existing reactor-generated instructions.
4730
4731 // CoroutineHandle coroutine_begin(<Arguments>)
4732 // {
4733 // this->handle = coro::getHandleParam();
4734 //
4735 // YieldType promise;
4736 // coro::setPromisePtr(handle, &promise); // For await
4737 //
4738 // ... <REACTOR CODE> ...
4739 //
4740
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004741 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004742 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004743
4744 // YieldType promise;
4745 // coro::setPromisePtr(handle, &promise); // For await
4746 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004747 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004748 }
4749
4750 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4751 void generateYield(Value *val)
4752 {
4753 // ... <REACTOR CODE> ...
4754 //
4755 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004756 // if (!coro::suspend(handle)) {
4757 // return false; // coroutine has been stopped by the caller.
4758 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004759 //
4760 // ... <REACTOR CODE> ...
4761
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004762 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004763 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004764
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004765 // if (!coro::suspend(handle)) {
4766 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4767 auto doneBlock = Nucleus::createBasicBlock();
4768 auto resumeBlock = Nucleus::createBasicBlock();
4769 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4770
4771 // return false; // coroutine has been stopped by the caller.
4772 ::basicBlock = doneBlock;
4773 Nucleus::createRetVoid(); // coroutine return value is ignored.
4774
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004775 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004776 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004777 }
4778
4779 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4780
4781 // Generates the await function for the current coroutine.
4782 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4783 static FunctionUniquePtr generateAwaitFunction()
4784 {
4785 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4786 // {
4787 // if (coro::isDone())
4788 // {
4789 // return false;
4790 // }
4791 // else // resume
4792 // {
4793 // YieldType* promise = coro::getPromisePtr(handle);
4794 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004795 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004796 // return true;
4797 // }
4798 // }
4799
4800 // Subzero doesn't support bool types (IceType_i1) as return type
4801 const Ice::Type ReturnType = Ice::IceType_i32;
4802 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4803 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4804
4805 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4806 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4807
4808 Ice::Variable *handle = awaitFunc->getArgs()[0];
4809 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4810
4811 auto doneBlock = awaitFunc->makeNode();
4812 {
4813 // return false;
4814 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4815 doneBlock->appendInst(ret);
4816 }
4817
4818 auto resumeBlock = awaitFunc->makeNode();
4819 {
4820 // YieldType* promise = coro::getPromisePtr(handle);
4821 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4822
4823 // *out = *promise;
4824 // Load promise value
4825 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4826 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4827 resumeBlock->appendInst(load);
4828 // Then store it in output param
4829 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4830 resumeBlock->appendInst(store);
4831
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004832 // coro::resume(handle);
4833 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004834
4835 // return true;
4836 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4837 resumeBlock->appendInst(ret);
4838 }
4839
4840 // if (coro::isDone())
4841 // {
4842 // <doneBlock>
4843 // }
4844 // else // resume
4845 // {
4846 // <resumeBlock>
4847 // }
4848 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004849 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004850 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4851 bb->appendInst(br);
4852
4853 return FunctionUniquePtr{ awaitFunc };
4854 }
4855
4856 // Generates the destroy function for the current coroutine.
4857 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4858 static FunctionUniquePtr generateDestroyFunction()
4859 {
4860 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4861 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004862 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004863 // return;
4864 // }
4865
4866 const Ice::Type ReturnType = Ice::IceType_void;
4867 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4868
4869 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4870 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4871
4872 Ice::Variable *handle = destroyFunc->getArgs()[0];
4873
4874 auto *bb = destroyFunc->getEntryNode();
4875
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004876 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4877 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004878
4879 // return;
4880 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4881 bb->appendInst(ret);
4882
4883 return FunctionUniquePtr{ destroyFunc };
4884 }
4885
4886private:
4887 Ice::Variable *handle{};
4888 Ice::Variable *promise{};
4889};
4890
4891static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4892{
4893 // This doubles up as our coroutine handle
4894 auto coroData = coro::createCoroutineData();
4895
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004896 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4897 if(coroData->useInternalScheduler)
4898 {
4899 ::getOrCreateScheduler().bind();
4900 }
4901
Ben Clayton76e9e532020-03-16 20:35:04 +00004902 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004903 // Store handle in TLS so that the coroutine can grab it right away, before
4904 // any fiber switch occurs.
4905 coro::setHandleParam(coroData);
4906
Ben Claytonc3466532020-03-24 11:54:05 +00004907 ASSERT(!coroData->routineFiber);
4908 coroData->routineFiber = marl::Scheduler::Fiber::current();
4909
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004910 beginFunc();
4911
Ben Claytonc3466532020-03-24 11:54:05 +00004912 ASSERT(coroData->inRoutine);
4913 coroData->done = true; // coroutine is done.
4914 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4915 coroData->inRoutine = false;
4916 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004917 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004918
Ben Claytonc3466532020-03-24 11:54:05 +00004919 ASSERT(!coroData->mainFiber);
4920 coroData->mainFiber = marl::Scheduler::Fiber::current();
4921
4922 // block until the first yield or coroutine end
4923 ASSERT(!coroData->inRoutine);
4924 coroData->inRoutine = true;
4925 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4926 while(coroData->inRoutine)
4927 {
4928 coroData->mainFiber->wait();
4929 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004930
4931 return coroData;
4932}
4933
4934void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4935{
4936 // Start by creating a regular function
4937 createFunction(yieldType, params);
4938
4939 // Save in case yield() is called
4940 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4941 ::coroYieldType = yieldType;
4942}
4943
4944void Nucleus::yield(Value *val)
4945{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004946 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004947 Variable::materializeAll();
4948
4949 // On first yield, we start generating coroutine functions
4950 if(!::coroGen)
4951 {
4952 ::coroGen = std::make_shared<CoroutineGenerator>();
4953 ::coroGen->generateCoroutineBegin();
4954 }
4955
4956 ASSERT(::coroGen);
4957 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004958}
4959
Ben Clayton713b8d32019-12-17 20:37:56 +00004960static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4961{
4962 return false;
4963}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004964
4965static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4966{
4967}
Nicolas Capens157ba262019-12-10 17:49:14 -05004968
4969std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4970{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004971 if(::coroGen)
4972 {
4973 // Finish generating coroutine functions
4974 {
4975 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004976 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004977 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004978
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004979 auto awaitFunc = ::coroGen->generateAwaitFunction();
4980 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004981
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004982 // At this point, we no longer need the CoroutineGenerator.
4983 ::coroGen.reset();
4984 ::coroYieldType = nullptr;
4985
4986 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4987 { name, "await", "destroy" },
4988 cfgEdit);
4989
4990 return routine;
4991 }
4992 else
4993 {
4994 {
4995 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004996 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004997 }
4998
4999 ::coroYieldType = nullptr;
5000
5001 // Not an actual coroutine (no yields), so return stubs for await and destroy
5002 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
5003
5004 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
5005 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
5006 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
5007 return routine;
5008 }
Nicolas Capens157ba262019-12-10 17:49:14 -05005009}
5010
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005011Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00005012{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005013 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
5014
5015 if(isCoroutine)
5016 {
5017 return rr::invokeCoroutineBegin(func);
5018 }
5019 else
5020 {
5021 // For regular routines, just invoke the begin func directly
5022 return func();
5023 }
Ben Clayton713b8d32019-12-17 20:37:56 +00005024}
Nicolas Capens157ba262019-12-10 17:49:14 -05005025
5026} // namespace rr