blob: fe53ad7a60c0332d88d45712e40a08ce1bec5535 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano9c14bda2020-09-18 16:33:36 -040016#include "EmulatedIntrinsics.hpp"
17#include "OptimalIntrinsics.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050018#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000019#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050020#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040021
Nicolas Capens1a3ce872018-10-10 10:42:36 -040022#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000023#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040024
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040026#include "src/IceCfgNode.h"
27#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceELFStreamer.h"
29#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040030#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000031#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040032
Ben Clayton713b8d32019-12-17 20:37:56 +000033#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040034#include "llvm/Support/FileSystem.h"
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -050035#include "llvm/Support/ManagedStatic.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040036#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040037
Antonio Maiorano8bce0672020-02-28 13:13:45 -050038#include "marl/event.h"
39
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000041# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040042#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040043
Nicolas Capensbd65da92017-01-05 16:31:06 -050044#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000045# ifndef WIN32_LEAN_AND_MEAN
46# define WIN32_LEAN_AND_MEAN
47# endif // !WIN32_LEAN_AND_MEAN
48# ifndef NOMINMAX
49# define NOMINMAX
50# endif // !NOMINMAX
51# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050052#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040053
Ben Clayton683bad82020-02-10 23:57:09 +000054#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040055#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000056#include <limits>
57#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040058
Antonio Maiorano02a39532020-01-21 15:15:34 -050059// Subzero utility functions
60// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050062namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050063
64Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
65{
66 uint32_t sequenceNumber = 0;
67 auto function = Ice::Cfg::create(context, sequenceNumber).release();
68
69 Ice::CfgLocalAllocatorScope allocScope{ function };
70
71 for(auto type : paramTypes)
72 {
73 Ice::Variable *arg = function->makeVariable(type);
74 function->addArg(arg);
75 }
76
77 Ice::CfgNode *node = function->makeNode();
78 function->setEntryNode(node);
79
80 return function;
81}
82
83Ice::Type getPointerType(Ice::Type elementType)
84{
85 if(sizeof(void *) == 8)
86 {
87 return Ice::IceType_i64;
88 }
89 else
90 {
91 return Ice::IceType_i32;
92 }
93}
94
95Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
96{
97 int typeSize = Ice::typeWidthInBytes(type);
98 int totalSize = typeSize * (arraySize ? arraySize : 1);
99
100 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
101 auto address = function->makeVariable(getPointerType(type));
102 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
103 function->getEntryNode()->getInsts().push_front(alloca);
104
105 return address;
106}
107
108Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500109{
110 if(sizeof(void *) == 8)
111 {
112 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
113 }
114 else
115 {
116 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
117 }
118}
119
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400120// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
121Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500122
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400123// Wrapper for calls on C functions with Ice types
124Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
125{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500126 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400127
128 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
129 // then truncate result to bool.
130 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
131 const bool returningBool = (retTy == Ice::IceType_i1);
132 if(returningBool)
133 {
134 ret = function->makeVariable(Ice::IceType_i32);
135 }
136 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500137 {
138 ret = function->makeVariable(retTy);
139 }
140
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400141 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500142 for(auto arg : iceArgs)
143 {
144 call->addArg(arg);
145 }
146
147 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400148
149 if(returningBool)
150 {
151 // Truncate result to bool so that if any (lsb) bits were set, result will be true
152 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
153 }
154
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500155 return ret;
156}
157
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400158Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
159{
160 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
161 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
162}
163
Antonio Maiorano62427e02020-02-13 09:18:05 -0500164// Wrapper for calls on C functions with Ice types
165template<typename Return, typename... CArgs, typename... RArgs>
166Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
167{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400168 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
169
Nicolas Capens519cf222020-05-08 15:27:19 -0400170 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500171 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500172 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500173}
174
Antonio Maiorano02a39532020-01-21 15:15:34 -0500175// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500176Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500177{
178 Ice::Variable *result = function->makeVariable(v->getType());
179 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
180 basicBlock->appendInst(cast);
181 return result;
182}
183
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400184Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
185{
186 Ice::Variable *to = function->makeVariable(toType);
187 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
188 basicBlock->appendInst(cast);
189 return to;
190}
191
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500192Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500193{
194 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
195 // absolute address. We circumvent this by casting to a non-const variable, and loading
196 // from that.
197 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
198 {
199 ptr = sz::createUnconstCast(function, basicBlock, cptr);
200 }
201
202 Ice::Variable *result = function->makeVariable(type);
203 auto load = Ice::InstLoad::create(function, result, ptr, align);
204 basicBlock->appendInst(load);
205
206 return result;
207}
208
209} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500210} // namespace
211
Ben Clayton713b8d32019-12-17 20:37:56 +0000212namespace rr {
213class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500214class CoroutineGenerator;
215} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500216
217namespace {
218
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -0500219// Used to automatically invoke llvm_shutdown() when driver is unloaded
220llvm::llvm_shutdown_obj llvmShutdownObj;
221
Nicolas Capens157ba262019-12-10 17:49:14 -0500222// Default configuration settings. Must be accessed under mutex lock.
223std::mutex defaultConfigLock;
224rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000225{
Nicolas Capens157ba262019-12-10 17:49:14 -0500226 // This uses a static in a function to avoid the cost of a global static
227 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
228 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000229 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500230 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000231}
232
Nicolas Capens157ba262019-12-10 17:49:14 -0500233Ice::GlobalContext *context = nullptr;
234Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400235Ice::CfgNode *entryBlock = nullptr;
236Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500237Ice::CfgNode *basicBlock = nullptr;
238Ice::CfgLocalAllocatorScope *allocator = nullptr;
239rr::ELFMemoryStreamer *routine = nullptr;
240
241std::mutex codegenMutex;
242
243Ice::ELFFileStreamer *elfFile = nullptr;
244Ice::Fdstream *out = nullptr;
245
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500246// Coroutine globals
247rr::Type *coroYieldType = nullptr;
248std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500249marl::Scheduler &getOrCreateScheduler()
250{
251 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100252 marl::Scheduler::Config cfg;
253 cfg.setWorkerThreadCount(8);
254 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500255 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500256
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500257 return *scheduler;
258}
Nicolas Capens157ba262019-12-10 17:49:14 -0500259} // Anonymous namespace
260
261namespace {
262
263#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000264# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500265#endif
266
Ben Clayton713b8d32019-12-17 20:37:56 +0000267#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
268# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500269#endif
270
Antonio Maiorano370cba52019-12-31 11:36:07 -0500271Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400272{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500273 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100274 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500275 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000276 case rr::Optimization::Level::None: return Ice::Opt_m1;
277 case rr::Optimization::Level::Less: return Ice::Opt_m1;
278 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500279 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
280 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100281 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500282 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400283}
284
Antonio Maiorano370cba52019-12-31 11:36:07 -0500285Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
286{
287 switch(memoryOrder)
288 {
289 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
290 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
291 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
292 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
293 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
294 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
295 }
296 return Ice::Intrinsics::MemoryOrderInvalid;
297}
298
Nicolas Capens157ba262019-12-10 17:49:14 -0500299class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500300{
Nicolas Capens157ba262019-12-10 17:49:14 -0500301public:
302 const static bool ARM;
303 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400304
Nicolas Capens157ba262019-12-10 17:49:14 -0500305private:
306 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100307 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000308#if defined(__i386__) || defined(__x86_64__)
309# if defined(_WIN32)
310 __cpuid(registers, info);
311# else
312 __asm volatile("cpuid"
313 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
314 : "a"(info));
315# endif
316#else
317 registers[0] = 0;
318 registers[1] = 0;
319 registers[2] = 0;
320 registers[3] = 0;
321#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100322 }
323
Nicolas Capens157ba262019-12-10 17:49:14 -0500324 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500325 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000326#if defined(__arm__) || defined(__aarch64__)
327 return true;
328#elif defined(__i386__) || defined(__x86_64__)
329 return false;
330#elif defined(__mips__)
331 return false;
332#else
333# error "Unknown architecture"
334#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500335 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500336
Nicolas Capens157ba262019-12-10 17:49:14 -0500337 static bool detectSSE4_1()
338 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000339#if defined(__i386__) || defined(__x86_64__)
340 int registers[4];
341 cpuid(registers, 1);
342 return (registers[2] & 0x00080000) != 0;
343#else
344 return false;
345#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500346 }
347};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500348
Nicolas Capens157ba262019-12-10 17:49:14 -0500349const bool CPUID::ARM = CPUID::detectARM();
350const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
351const bool emulateIntrinsics = false;
352const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400353
Nicolas Capens157ba262019-12-10 17:49:14 -0500354constexpr bool subzeroDumpEnabled = false;
355constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500356
357#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500358static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
359static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500360#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500361
362} // anonymous namespace
363
364namespace rr {
365
Antonio Maioranoab210f92019-12-13 16:26:24 -0500366std::string BackendName()
367{
368 return "Subzero";
369}
370
Ben Clayton713b8d32019-12-17 20:37:56 +0000371const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500372 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500373};
374
375enum EmulatedType
376{
377 EmulatedShift = 16,
378 EmulatedV2 = 2 << EmulatedShift,
379 EmulatedV4 = 4 << EmulatedShift,
380 EmulatedV8 = 8 << EmulatedShift,
381 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
382
383 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
384 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
385 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000386 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
387 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500388 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
389};
390
Ben Clayton713b8d32019-12-17 20:37:56 +0000391class Value : public Ice::Operand
392{};
393class SwitchCases : public Ice::InstSwitch
394{};
395class BasicBlock : public Ice::CfgNode
396{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500397
398Ice::Type T(Type *t)
399{
400 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
401 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500402}
403
Nicolas Capens157ba262019-12-10 17:49:14 -0500404Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400405{
Ben Clayton713b8d32019-12-17 20:37:56 +0000406 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500407}
408
409Type *T(EmulatedType t)
410{
Ben Clayton713b8d32019-12-17 20:37:56 +0000411 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500412}
413
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500414std::vector<Ice::Type> T(const std::vector<Type *> &types)
415{
416 std::vector<Ice::Type> result;
417 result.reserve(types.size());
418 for(auto &t : types)
419 {
420 result.push_back(T(t));
421 }
422 return result;
423}
424
Nicolas Capens157ba262019-12-10 17:49:14 -0500425Value *V(Ice::Operand *v)
426{
Ben Clayton713b8d32019-12-17 20:37:56 +0000427 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500428}
429
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500430Ice::Operand *V(Value *v)
431{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500432 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500433}
434
Antonio Maiorano62427e02020-02-13 09:18:05 -0500435std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
436{
437 std::vector<Ice::Operand *> result;
438 result.reserve(values.size());
439 for(auto &v : values)
440 {
441 result.push_back(V(v));
442 }
443 return result;
444}
445
Nicolas Capens157ba262019-12-10 17:49:14 -0500446BasicBlock *B(Ice::CfgNode *b)
447{
Ben Clayton713b8d32019-12-17 20:37:56 +0000448 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500449}
450
451static size_t typeSize(Type *type)
452{
453 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400454 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500455 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800456 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000457 case Type_v2i32: return 8;
458 case Type_v4i16: return 8;
459 case Type_v2i16: return 4;
460 case Type_v8i8: return 8;
461 case Type_v4i8: return 4;
462 case Type_v2f32: return 8;
463 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500464 }
465 }
466
467 return Ice::typeWidthInBytes(T(type));
468}
469
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400470static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500471{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400472 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500473 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
474 {
475 Nucleus::createRetVoid();
476 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400477
478 // Connect the entry block to the top of the initial basic block
479 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
480 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500481}
482
Ben Clayton713b8d32019-12-17 20:37:56 +0000483using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
484using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500485
486inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
487{
Ben Clayton713b8d32019-12-17 20:37:56 +0000488 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500489}
490
491inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
492{
493 return &sectionHeader(elfHeader)[index];
494}
495
496static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
497{
498 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
499
500 uint32_t index = relocation.getSymbol();
501 int table = relocationTable.sh_link;
502 void *symbolValue = nullptr;
503
504 if(index != SHN_UNDEF)
505 {
506 if(table == SHN_UNDEF) return nullptr;
507 const SectionHeader *symbolTable = elfSection(elfHeader, table);
508
509 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
510 if(index >= symtab_entries)
511 {
512 ASSERT(index < symtab_entries && "Symbol Index out of range");
513 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800514 }
515
Nicolas Capens157ba262019-12-10 17:49:14 -0500516 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000517 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500518 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800519
Nicolas Capens157ba262019-12-10 17:49:14 -0500520 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400521 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500522 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000523 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400524 }
525 else
526 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500527 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400528 }
Nicolas Capens66478362016-10-13 15:36:36 -0400529 }
530
Nicolas Capens157ba262019-12-10 17:49:14 -0500531 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000532 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500533
534 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400535 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400536 switch(relocation.getType())
537 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000538 case R_ARM_NONE:
539 // No relocation
540 break;
541 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500542 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000543 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500544 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
545 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
546 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400547 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000548 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500549 {
550 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
551 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
552 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400553 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000554 default:
555 ASSERT(false && "Unsupported relocation type");
556 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400557 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500558 }
559 else
560 {
561 switch(relocation.getType())
562 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000563 case R_386_NONE:
564 // No relocation
565 break;
566 case R_386_32:
567 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
568 break;
569 case R_386_PC32:
570 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
571 break;
572 default:
573 ASSERT(false && "Unsupported relocation type");
574 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500575 }
Nicolas Capens66478362016-10-13 15:36:36 -0400576 }
577
Nicolas Capens157ba262019-12-10 17:49:14 -0500578 return symbolValue;
579}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400580
Nicolas Capens157ba262019-12-10 17:49:14 -0500581static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
582{
583 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
584
585 uint32_t index = relocation.getSymbol();
586 int table = relocationTable.sh_link;
587 void *symbolValue = nullptr;
588
589 if(index != SHN_UNDEF)
590 {
591 if(table == SHN_UNDEF) return nullptr;
592 const SectionHeader *symbolTable = elfSection(elfHeader, table);
593
594 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
595 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400596 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400598 return nullptr;
599 }
600
Nicolas Capens157ba262019-12-10 17:49:14 -0500601 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000602 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400604
Nicolas Capens157ba262019-12-10 17:49:14 -0500605 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400606 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500607 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000608 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500609 }
610 else
611 {
612 return nullptr;
613 }
614 }
Nicolas Capens66478362016-10-13 15:36:36 -0400615
Nicolas Capens157ba262019-12-10 17:49:14 -0500616 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000617 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
618 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400619
Nicolas Capens157ba262019-12-10 17:49:14 -0500620 switch(relocation.getType())
621 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000622 case R_X86_64_NONE:
623 // No relocation
624 break;
625 case R_X86_64_64:
626 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
627 break;
628 case R_X86_64_PC32:
629 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
630 break;
631 case R_X86_64_32S:
632 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
633 break;
634 default:
635 ASSERT(false && "Unsupported relocation type");
636 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500637 }
638
639 return symbolValue;
640}
641
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400642struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500643{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400644 const void *entry;
645 size_t codeSize = 0;
646};
647
648std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
649{
650 ASSERT(functionNames.size() > 0);
651 std::vector<EntryPoint> entryPoints(functionNames.size());
652
Ben Clayton713b8d32019-12-17 20:37:56 +0000653 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500654
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400655 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500656 if(!elfHeader->checkMagic())
657 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400658 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500659 }
660
661 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000662 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
663#if defined(__i386__)
664 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
665#elif defined(__x86_64__)
666 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
667#elif defined(__arm__)
668 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
669#elif defined(__aarch64__)
670 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
671#elif defined(__mips__)
672 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
673#else
674# error "Unsupported platform"
675#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500676
Ben Clayton713b8d32019-12-17 20:37:56 +0000677 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500678
679 for(int i = 0; i < elfHeader->e_shnum; i++)
680 {
681 if(sectionHeader[i].sh_type == SHT_PROGBITS)
682 {
683 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
684 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400685 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500686 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400687 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500688
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400689 for(size_t j = 0; j < functionNames.size(); ++j)
690 {
691 if(strstr(sectionName, functionNames[j]) != nullptr)
692 {
693 return j;
694 }
695 }
696
697 UNREACHABLE("Failed to find executable section that matches input function names");
698 return static_cast<size_t>(-1);
699 };
700
701 size_t index = findSectionNameEntryIndex();
702 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
703 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400704 }
705 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500706 else if(sectionHeader[i].sh_type == SHT_REL)
707 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000708 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400709
Nicolas Capens157ba262019-12-10 17:49:14 -0500710 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
711 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000712 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500713 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
714 }
715 }
716 else if(sectionHeader[i].sh_type == SHT_RELA)
717 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000718 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500719
720 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
721 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000722 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500723 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
724 }
725 }
726 }
727
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400728 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500729}
730
731template<typename T>
732struct ExecutableAllocator
733{
734 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000735 template<class U>
736 ExecutableAllocator(const ExecutableAllocator<U> &other)
737 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500738
739 using value_type = T;
740 using size_type = std::size_t;
741
742 T *allocate(size_type n)
743 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000744 return (T *)allocateMemoryPages(
745 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500746 }
747
748 void deallocate(T *p, size_type n)
749 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800750 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500751 }
752};
753
754class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
755{
756 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
757 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
758
759public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000760 ELFMemoryStreamer()
761 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500762 {
763 position = 0;
764 buffer.reserve(0x1000);
765 }
766
767 ~ELFMemoryStreamer() override
768 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500769 }
770
771 void write8(uint8_t Value) override
772 {
773 if(position == (uint64_t)buffer.size())
774 {
775 buffer.push_back(Value);
776 position++;
777 }
778 else if(position < (uint64_t)buffer.size())
779 {
780 buffer[position] = Value;
781 position++;
782 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000783 else
784 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500785 }
786
787 void writeBytes(llvm::StringRef Bytes) override
788 {
789 std::size_t oldSize = buffer.size();
790 buffer.resize(oldSize + Bytes.size());
791 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
792 position += Bytes.size();
793 }
794
795 uint64_t tell() const override { return position; }
796
797 void seek(uint64_t Off) override { position = Off; }
798
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400799 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500800 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400801 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500802
803#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500804 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
805#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400806 for(auto &entryPoint : entryPoints)
807 {
808 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
809 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500810#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500811
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400812 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400813 }
814
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500815 void finalize()
816 {
817 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
818
819 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
820 }
821
Ben Clayton713b8d32019-12-17 20:37:56 +0000822 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400823 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500824 ASSERT(func);
825 funcs[index] = func;
826 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400827
Nicolas Capens157ba262019-12-10 17:49:14 -0500828 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400829 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500830 ASSERT(funcs[index]);
831 return funcs[index];
832 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400833
Antonio Maiorano02a39532020-01-21 15:15:34 -0500834 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500835 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500836 // TODO(b/148086935): Replace with a buffer allocator.
837 size_t space = size + alignment;
838 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
839 void *ptr = buf.get();
840 void *alignedPtr = std::align(alignment, size, ptr, space);
841 ASSERT(alignedPtr);
842 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500843 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500844 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500845 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400846
Nicolas Capens157ba262019-12-10 17:49:14 -0500847private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000848 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500849 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
850 std::size_t position;
851 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500852};
853
Antonio Maiorano62427e02020-02-13 09:18:05 -0500854#ifdef ENABLE_RR_PRINT
855void VPrintf(const std::vector<Value *> &vals)
856{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400857 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500858}
859#endif // ENABLE_RR_PRINT
860
Nicolas Capens157ba262019-12-10 17:49:14 -0500861Nucleus::Nucleus()
862{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400863 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500864
865 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
866 Ice::ClFlags::getParsedClFlags(Flags);
867
Ben Clayton713b8d32019-12-17 20:37:56 +0000868#if defined(__arm__)
869 Flags.setTargetArch(Ice::Target_ARM32);
870 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
871#elif defined(__mips__)
872 Flags.setTargetArch(Ice::Target_MIPS32);
873 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
874#else // x86
875 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
876 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
877#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500878 Flags.setOutFileType(Ice::FT_Elf);
879 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
880 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
881 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
882 Flags.setDisableHybridAssembly(true);
883
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500884 // Emit functions into separate sections in the ELF so we can find them by name
885 Flags.setFunctionSections(true);
886
Nicolas Capens157ba262019-12-10 17:49:14 -0500887 static llvm::raw_os_ostream cout(std::cout);
888 static llvm::raw_os_ostream cerr(std::cerr);
889
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500890 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500891 {
892 // Decorate text asm with liveness info
893 Flags.setDecorateAsm(true);
894 }
895
Ben Clayton713b8d32019-12-17 20:37:56 +0000896 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500897 {
898 std::error_code errorCode;
899 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
900 ::elfFile = new Ice::ELFFileStreamer(*out);
901 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
902 }
903 else
904 {
905 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
906 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
907 ::routine = elfMemory;
908 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400909
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400910#if !__has_feature(memory_sanitizer)
911 // thread_local variables in shared libraries are initialized at load-time,
912 // but this is not observed by MemorySanitizer if the loader itself was not
913 // instrumented, leading to false-positive unitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400914 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400915#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500916 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500917}
918
919Nucleus::~Nucleus()
920{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400921 delete Variable::unmaterializedVariables;
922 Variable::unmaterializedVariables = nullptr;
923
Nicolas Capens157ba262019-12-10 17:49:14 -0500924 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500925 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500926
927 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500928 ::allocator = nullptr;
929
Nicolas Capens157ba262019-12-10 17:49:14 -0500930 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500931 ::function = nullptr;
932
Nicolas Capens157ba262019-12-10 17:49:14 -0500933 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500934 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500935
936 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500937 ::elfFile = nullptr;
938
Nicolas Capens157ba262019-12-10 17:49:14 -0500939 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500940 ::out = nullptr;
941
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400942 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500943 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400944 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500945
946 ::codegenMutex.unlock();
947}
948
949void Nucleus::setDefaultConfig(const Config &cfg)
950{
951 std::unique_lock<std::mutex> lock(::defaultConfigLock);
952 ::defaultConfig() = cfg;
953}
954
955void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
956{
957 std::unique_lock<std::mutex> lock(::defaultConfigLock);
958 auto &config = ::defaultConfig();
959 config = cfgEdit.apply(config);
960}
961
962Config Nucleus::getDefaultConfig()
963{
964 std::unique_lock<std::mutex> lock(::defaultConfigLock);
965 return ::defaultConfig();
966}
967
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500968// This function lowers and produces executable binary code in memory for the input functions,
969// and returns a Routine with the entry points to these functions.
970template<size_t Count>
971static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500972{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500973 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
974 // and GlobalContext::emitItems.
975
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500976 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500977 {
978 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500979 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500980 }
981
982 ::context->emitFileHeader();
983
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500984 // Translate
985
986 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500987 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500988 Ice::Cfg *currFunc = functions[i];
989
990 // Install function allocator in TLS for Cfg-specific container allocators
991 Ice::CfgLocalAllocatorScope allocScope(currFunc);
992
993 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
994
995 rr::optimize(currFunc);
996
997 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500998 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500999
1000 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001001 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001002
1003 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1004
1005 if(subzeroEmitTextAsm)
1006 {
1007 currFunc->emit();
1008 }
1009
1010 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -05001011 }
1012
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001013 // Emit items
1014
1015 ::context->lowerGlobals("");
1016
Nicolas Capens157ba262019-12-10 17:49:14 -05001017 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001018
1019 for(size_t i = 0; i < Count; ++i)
1020 {
1021 Ice::Cfg *currFunc = functions[i];
1022
1023 // Accumulate globals from functions to emit into the "last" section at the end
1024 auto globals = currFunc->getGlobalInits();
1025 if(globals && !globals->empty())
1026 {
1027 ::context->getGlobals()->merge(globals.get());
1028 }
1029
1030 auto assembler = currFunc->releaseAssembler();
1031 assembler->alignFunction();
1032 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1033 }
1034
Nicolas Capens157ba262019-12-10 17:49:14 -05001035 ::context->lowerGlobals("last");
1036 ::context->lowerConstants();
1037 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001038
Nicolas Capens157ba262019-12-10 17:49:14 -05001039 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001040 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001041 objectWriter->writeNonUserSections();
1042
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001043 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001044 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1045 ASSERT(entryPoints.size() == Count);
1046 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001047 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001048 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001049 }
1050
1051 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001052
1053 Routine *handoffRoutine = ::routine;
1054 ::routine = nullptr;
1055
1056 return std::shared_ptr<Routine>(handoffRoutine);
1057}
1058
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001059std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1060{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001061 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001062 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1063}
1064
Nicolas Capens157ba262019-12-10 17:49:14 -05001065Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1066{
1067 Ice::Type type = T(t);
1068 int typeSize = Ice::typeWidthInBytes(type);
1069 int totalSize = typeSize * (arraySize ? arraySize : 1);
1070
1071 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1072 auto address = ::function->makeVariable(T(getPointerType(t)));
1073 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1074 ::function->getEntryNode()->getInsts().push_front(alloca);
1075
1076 return V(address);
1077}
1078
1079BasicBlock *Nucleus::createBasicBlock()
1080{
1081 return B(::function->makeNode());
1082}
1083
1084BasicBlock *Nucleus::getInsertBlock()
1085{
1086 return B(::basicBlock);
1087}
1088
1089void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1090{
Ben Clayton713b8d32019-12-17 20:37:56 +00001091 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001092
1093 Variable::materializeAll();
1094
1095 ::basicBlock = basicBlock;
1096}
1097
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001098void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001099{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001100 ASSERT(::function == nullptr);
1101 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001102 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001103 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001104 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001105
1106 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1107
1108 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1109 // becomes invalid if another one is created; for example, when creating await and destroy functions
1110 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1111 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001112 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1113
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001114 ::entryBlock = ::function->getEntryNode();
1115 ::basicBlock = ::function->makeNode();
1116 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001117}
1118
1119Value *Nucleus::getArgument(unsigned int index)
1120{
1121 return V(::function->getArgs()[index]);
1122}
1123
1124void Nucleus::createRetVoid()
1125{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001126 RR_DEBUG_INFO_UPDATE_LOC();
1127
Nicolas Capens157ba262019-12-10 17:49:14 -05001128 // Code generated after this point is unreachable, so any variables
1129 // being read can safely return an undefined value. We have to avoid
1130 // materializing variables after the terminator ret instruction.
1131 Variable::killUnmaterialized();
1132
1133 Ice::InstRet *ret = Ice::InstRet::create(::function);
1134 ::basicBlock->appendInst(ret);
1135}
1136
1137void Nucleus::createRet(Value *v)
1138{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001139 RR_DEBUG_INFO_UPDATE_LOC();
1140
Nicolas Capens157ba262019-12-10 17:49:14 -05001141 // Code generated after this point is unreachable, so any variables
1142 // being read can safely return an undefined value. We have to avoid
1143 // materializing variables after the terminator ret instruction.
1144 Variable::killUnmaterialized();
1145
1146 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1147 ::basicBlock->appendInst(ret);
1148}
1149
1150void Nucleus::createBr(BasicBlock *dest)
1151{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001152 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001153 Variable::materializeAll();
1154
1155 auto br = Ice::InstBr::create(::function, dest);
1156 ::basicBlock->appendInst(br);
1157}
1158
1159void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1160{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001161 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001162 Variable::materializeAll();
1163
1164 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1165 ::basicBlock->appendInst(br);
1166}
1167
1168static bool isCommutative(Ice::InstArithmetic::OpKind op)
1169{
1170 switch(op)
1171 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001172 case Ice::InstArithmetic::Add:
1173 case Ice::InstArithmetic::Fadd:
1174 case Ice::InstArithmetic::Mul:
1175 case Ice::InstArithmetic::Fmul:
1176 case Ice::InstArithmetic::And:
1177 case Ice::InstArithmetic::Or:
1178 case Ice::InstArithmetic::Xor:
1179 return true;
1180 default:
1181 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001182 }
1183}
1184
1185static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1186{
1187 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1188
1189 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1190
1191 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1192 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1193 ::basicBlock->appendInst(arithmetic);
1194
1195 return V(result);
1196}
1197
1198Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1199{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001200 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001201 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1202}
1203
1204Value *Nucleus::createSub(Value *lhs, Value *rhs)
1205{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001206 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001207 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1208}
1209
1210Value *Nucleus::createMul(Value *lhs, Value *rhs)
1211{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001212 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001213 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1214}
1215
1216Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1217{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001218 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001219 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1220}
1221
1222Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1223{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001224 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001225 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1226}
1227
1228Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1229{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001230 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001231 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1232}
1233
1234Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1235{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001236 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001237 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1238}
1239
1240Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1241{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001242 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001243 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1244}
1245
1246Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1247{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001248 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001249 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1250}
1251
1252Value *Nucleus::createURem(Value *lhs, Value *rhs)
1253{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001254 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001255 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1256}
1257
1258Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1259{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001260 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001261 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1262}
1263
1264Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1265{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001266 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001267 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1268 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001269 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001270 return nullptr;
1271}
1272
1273RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1274{
1275 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001276}
1277
1278Value *Nucleus::createShl(Value *lhs, Value *rhs)
1279{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001280 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001281 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1282}
1283
1284Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1285{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001286 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001287 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1288}
1289
1290Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1291{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001292 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001293 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1294}
1295
1296Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1297{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001298 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001299 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1300}
1301
1302Value *Nucleus::createOr(Value *lhs, Value *rhs)
1303{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001304 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001305 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1306}
1307
1308Value *Nucleus::createXor(Value *lhs, Value *rhs)
1309{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001310 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001311 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1312}
1313
1314Value *Nucleus::createNeg(Value *v)
1315{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001316 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001317 return createSub(createNullValue(T(v->getType())), v);
1318}
1319
1320Value *Nucleus::createFNeg(Value *v)
1321{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001322 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001323 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1324 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001325
1326 return createFSub(negativeZero, v);
1327}
1328
1329Value *Nucleus::createNot(Value *v)
1330{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001331 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001332 if(Ice::isScalarIntegerType(v->getType()))
1333 {
1334 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1335 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001336 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001337 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001338 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001339 return createXor(v, createConstantVector(c, T(v->getType())));
1340 }
1341}
1342
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001343static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1344{
1345#if defined(__i386__) || defined(__x86_64__)
1346 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1347 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1348 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1349#else
1350 if(atomic)
1351 {
1352 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1353 }
1354 if(memoryOrder != std::memory_order_relaxed)
1355 {
1356 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1357 }
1358#endif
1359
1360 // Vulkan doesn't allow sequential memory order
1361 ASSERT(memoryOrder != std::memory_order_seq_cst);
1362}
1363
Nicolas Capens157ba262019-12-10 17:49:14 -05001364Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1365{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001366 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001367 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001368
1369 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001370 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001371
Ben Clayton713b8d32019-12-17 20:37:56 +00001372 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001373 {
1374 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001375 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001376 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001377 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001378 auto pointer = RValue<Pointer<Byte>>(ptr);
1379 Int x = *Pointer<Int>(pointer);
1380
1381 Int4 vector;
1382 vector = Insert(vector, x, 0);
1383
Antonio Maiorano02a39532020-01-21 15:15:34 -05001384 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001385 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1386 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001387 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001388 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001389 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001390 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001391 auto pointer = RValue<Pointer<Byte>>(ptr);
1392 Int x = *Pointer<Int>(pointer);
1393 Int y = *Pointer<Int>(pointer + 4);
1394
1395 Int4 vector;
1396 vector = Insert(vector, x, 0);
1397 vector = Insert(vector, y, 1);
1398
Antonio Maiorano02a39532020-01-21 15:15:34 -05001399 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001400 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1401 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001402 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001403 else
1404 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001405 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001406 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001407 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001408 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001409 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001410 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001411 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1412 load->addArg(ptr);
1413 load->addArg(::context->getConstantInt32(typeSize(type)));
1414 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001415 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001416 }
1417 else
1418 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001419 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001420 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001421
Antonio Maiorano02a39532020-01-21 15:15:34 -05001422 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001423 return V(result);
1424}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001425
Nicolas Capens157ba262019-12-10 17:49:14 -05001426Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1427{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001428 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001429 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001430
Ben Clayton713b8d32019-12-17 20:37:56 +00001431#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001432 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001433 if(align != 0)
1434 {
1435 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1436 call->addArg(ptr);
1437 call->addArg(::context->getConstantInt64(typeSize(type)));
1438 ::basicBlock->appendInst(call);
1439 }
1440#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001441
Nicolas Capens157ba262019-12-10 17:49:14 -05001442 int valueType = (int)reinterpret_cast<intptr_t>(type);
1443
Ben Clayton713b8d32019-12-17 20:37:56 +00001444 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001445 {
1446 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001447 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001448 if(typeSize(type) == 4)
1449 {
1450 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1451 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1452 ::basicBlock->appendInst(bitcast);
1453
1454 RValue<Int4> v(V(vector));
1455
1456 auto pointer = RValue<Pointer<Byte>>(ptr);
1457 Int x = Extract(v, 0);
1458 *Pointer<Int>(pointer) = x;
1459 }
1460 else if(typeSize(type) == 8)
1461 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001462 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001463 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1464 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1465 ::basicBlock->appendInst(bitcast);
1466
1467 RValue<Int4> v(V(vector));
1468
1469 auto pointer = RValue<Pointer<Byte>>(ptr);
1470 Int x = Extract(v, 0);
1471 *Pointer<Int>(pointer) = x;
1472 Int y = Extract(v, 1);
1473 *Pointer<Int>(pointer + 4) = y;
1474 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001475 else
1476 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001477 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001478 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001479 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001480 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001481 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1482 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1483 store->addArg(value);
1484 store->addArg(ptr);
1485 store->addArg(::context->getConstantInt32(typeSize(type)));
1486 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001487 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001488 }
1489 else
1490 {
1491 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001492
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001493 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001494 ::basicBlock->appendInst(store);
1495 }
1496
1497 return value;
1498}
1499
1500Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1501{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001502 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001503 ASSERT(index->getType() == Ice::IceType_i32);
1504
1505 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1506 {
1507 int32_t offset = constant->getValue() * (int)typeSize(type);
1508
1509 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001510 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001511 return ptr;
1512 }
1513
Nicolas Capens157ba262019-12-10 17:49:14 -05001514 return createAdd(ptr, createConstantInt(offset));
1515 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001516
Nicolas Capens157ba262019-12-10 17:49:14 -05001517 if(!Ice::isByteSizedType(T(type)))
1518 {
1519 index = createMul(index, createConstantInt((int)typeSize(type)));
1520 }
1521
Ben Clayton713b8d32019-12-17 20:37:56 +00001522 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001523 {
1524 if(unsignedIndex)
1525 {
1526 index = createZExt(index, T(Ice::IceType_i64));
1527 }
1528 else
1529 {
1530 index = createSExt(index, T(Ice::IceType_i64));
1531 }
1532 }
1533
1534 return createAdd(ptr, index);
1535}
1536
Antonio Maiorano370cba52019-12-31 11:36:07 -05001537static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1538{
1539 Ice::Variable *result = ::function->makeVariable(value->getType());
1540
1541 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1542 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1543 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1544 auto op = ::context->getConstantInt32(rmwOp);
1545 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1546 inst->addArg(op);
1547 inst->addArg(ptr);
1548 inst->addArg(value);
1549 inst->addArg(order);
1550 ::basicBlock->appendInst(inst);
1551
1552 return V(result);
1553}
1554
Nicolas Capens157ba262019-12-10 17:49:14 -05001555Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1556{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001557 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001558 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001559}
1560
1561Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1562{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001563 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001564 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001565}
1566
1567Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1568{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001569 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001570 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001571}
1572
1573Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1574{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001575 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001576 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001577}
1578
1579Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1580{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001581 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001582 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001583}
1584
1585Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1586{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001587 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001588 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001589}
1590
1591Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1592{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001593 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001594 Ice::Variable *result = ::function->makeVariable(value->getType());
1595
1596 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1597 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1598 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1599 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1600 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1601 inst->addArg(ptr);
1602 inst->addArg(compare);
1603 inst->addArg(value);
1604 inst->addArg(orderEq);
1605 inst->addArg(orderNeq);
1606 ::basicBlock->appendInst(inst);
1607
1608 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001609}
1610
1611static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1612{
1613 if(v->getType() == T(destType))
1614 {
1615 return v;
1616 }
1617
1618 Ice::Variable *result = ::function->makeVariable(T(destType));
1619 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1620 ::basicBlock->appendInst(cast);
1621
1622 return V(result);
1623}
1624
1625Value *Nucleus::createTrunc(Value *v, Type *destType)
1626{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001627 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001628 return createCast(Ice::InstCast::Trunc, v, destType);
1629}
1630
1631Value *Nucleus::createZExt(Value *v, Type *destType)
1632{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001633 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001634 return createCast(Ice::InstCast::Zext, v, destType);
1635}
1636
1637Value *Nucleus::createSExt(Value *v, Type *destType)
1638{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001639 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001640 return createCast(Ice::InstCast::Sext, v, destType);
1641}
1642
1643Value *Nucleus::createFPToUI(Value *v, Type *destType)
1644{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001645 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001646 return createCast(Ice::InstCast::Fptoui, v, destType);
1647}
1648
1649Value *Nucleus::createFPToSI(Value *v, Type *destType)
1650{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001651 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001652 return createCast(Ice::InstCast::Fptosi, v, destType);
1653}
1654
1655Value *Nucleus::createSIToFP(Value *v, Type *destType)
1656{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001657 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001658 return createCast(Ice::InstCast::Sitofp, v, destType);
1659}
1660
1661Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1662{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001663 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001664 return createCast(Ice::InstCast::Fptrunc, v, destType);
1665}
1666
1667Value *Nucleus::createFPExt(Value *v, Type *destType)
1668{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001669 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001670 return createCast(Ice::InstCast::Fpext, v, destType);
1671}
1672
1673Value *Nucleus::createBitCast(Value *v, Type *destType)
1674{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001675 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001676 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1677 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1678 // emulate them by writing to the stack and reading back as the destination type.
1679 if(emulateMismatchedBitCast)
1680 {
1681 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1682 {
1683 Value *address = allocateStackVariable(destType);
1684 createStore(v, address, T(v->getType()));
1685 return createLoad(address, destType);
1686 }
1687 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1688 {
1689 Value *address = allocateStackVariable(T(v->getType()));
1690 createStore(v, address, T(v->getType()));
1691 return createLoad(address, destType);
1692 }
1693 }
1694
1695 return createCast(Ice::InstCast::Bitcast, v, destType);
1696}
1697
1698static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1699{
1700 ASSERT(lhs->getType() == rhs->getType());
1701
1702 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1703 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1704 ::basicBlock->appendInst(cmp);
1705
1706 return V(result);
1707}
1708
1709Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1710{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001711 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001712 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1713}
1714
1715Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1716{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001717 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001718 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1719}
1720
1721Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1722{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001723 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001724 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1725}
1726
1727Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1728{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001729 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001730 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1731}
1732
1733Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1734{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001735 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001736 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1737}
1738
1739Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1740{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001741 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001742 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1743}
1744
1745Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1746{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001747 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001748 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1749}
1750
1751Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1752{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001753 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001754 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1755}
1756
1757Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1758{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001759 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001760 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1761}
1762
1763Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1764{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001765 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001766 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1767}
1768
1769Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1770{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001771 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001772 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1773}
1774
1775static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1776{
1777 ASSERT(lhs->getType() == rhs->getType());
1778 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1779
1780 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1781 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1782 ::basicBlock->appendInst(cmp);
1783
1784 return V(result);
1785}
1786
1787Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1788{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001789 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001790 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1791}
1792
1793Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1794{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001795 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001796 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1797}
1798
1799Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1800{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001801 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001802 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1803}
1804
1805Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1806{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001807 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001808 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1809}
1810
1811Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1812{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001813 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001814 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1815}
1816
1817Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1818{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001819 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001820 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1821}
1822
1823Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1824{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001825 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001826 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1827}
1828
1829Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1830{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001831 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001832 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1833}
1834
1835Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1836{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001837 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001838 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1839}
1840
1841Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1842{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001843 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001844 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1845}
1846
1847Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1848{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001849 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001850 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1851}
1852
1853Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1854{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001855 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001856 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1857}
1858
1859Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1860{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001861 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001862 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1863}
1864
1865Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1866{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001867 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001868 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1869}
1870
1871Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1872{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001873 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001874 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001875 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001876 ::basicBlock->appendInst(extract);
1877
1878 return V(result);
1879}
1880
1881Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1882{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001883 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001884 auto result = ::function->makeVariable(vector->getType());
1885 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1886 ::basicBlock->appendInst(insert);
1887
1888 return V(result);
1889}
1890
1891Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1892{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001893 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001894 ASSERT(V1->getType() == V2->getType());
1895
1896 int size = Ice::typeNumElements(V1->getType());
1897 auto result = ::function->makeVariable(V1->getType());
1898 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1899
1900 for(int i = 0; i < size; i++)
1901 {
1902 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1903 }
1904
1905 ::basicBlock->appendInst(shuffle);
1906
1907 return V(result);
1908}
1909
1910Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1911{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001912 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001913 ASSERT(ifTrue->getType() == ifFalse->getType());
1914
1915 auto result = ::function->makeVariable(ifTrue->getType());
1916 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1917 ::basicBlock->appendInst(select);
1918
1919 return V(result);
1920}
1921
1922SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1923{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001924 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001925 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1926 ::basicBlock->appendInst(switchInst);
1927
Ben Clayton713b8d32019-12-17 20:37:56 +00001928 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001929}
1930
1931void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1932{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001933 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001934 switchCases->addBranch(label, label, branch);
1935}
1936
1937void Nucleus::createUnreachable()
1938{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001939 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001940 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1941 ::basicBlock->appendInst(unreachable);
1942}
1943
Antonio Maiorano62427e02020-02-13 09:18:05 -05001944Type *Nucleus::getType(Value *value)
1945{
1946 return T(V(value)->getType());
1947}
1948
1949Type *Nucleus::getContainedType(Type *vectorType)
1950{
1951 Ice::Type vecTy = T(vectorType);
1952 switch(vecTy)
1953 {
1954 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1955 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1956 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1957 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1958 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1959 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1960 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1961 default:
1962 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1963 return {};
1964 }
1965}
1966
Nicolas Capens157ba262019-12-10 17:49:14 -05001967Type *Nucleus::getPointerType(Type *ElementType)
1968{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001969 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001970}
1971
Antonio Maiorano62427e02020-02-13 09:18:05 -05001972static constexpr Ice::Type getNaturalIntType()
1973{
1974 constexpr size_t intSize = sizeof(int);
1975 static_assert(intSize == 4 || intSize == 8, "");
1976 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1977}
1978
1979Type *Nucleus::getPrintfStorageType(Type *valueType)
1980{
1981 Ice::Type valueTy = T(valueType);
1982 switch(valueTy)
1983 {
1984 case Ice::IceType_i32:
1985 return T(getNaturalIntType());
1986
1987 case Ice::IceType_f32:
1988 return T(Ice::IceType_f64);
1989
1990 default:
1991 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1992 return {};
1993 }
1994}
1995
Nicolas Capens157ba262019-12-10 17:49:14 -05001996Value *Nucleus::createNullValue(Type *Ty)
1997{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001998 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001999 if(Ice::isVectorType(T(Ty)))
2000 {
2001 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002002 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002003 return createConstantVector(c, Ty);
2004 }
2005 else
2006 {
2007 return V(::context->getConstantZero(T(Ty)));
2008 }
2009}
2010
2011Value *Nucleus::createConstantLong(int64_t i)
2012{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002013 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002014 return V(::context->getConstantInt64(i));
2015}
2016
2017Value *Nucleus::createConstantInt(int i)
2018{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002019 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002020 return V(::context->getConstantInt32(i));
2021}
2022
2023Value *Nucleus::createConstantInt(unsigned int i)
2024{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002025 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002026 return V(::context->getConstantInt32(i));
2027}
2028
2029Value *Nucleus::createConstantBool(bool b)
2030{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002031 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002032 return V(::context->getConstantInt1(b));
2033}
2034
2035Value *Nucleus::createConstantByte(signed char i)
2036{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002037 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002038 return V(::context->getConstantInt8(i));
2039}
2040
2041Value *Nucleus::createConstantByte(unsigned char i)
2042{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002043 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002044 return V(::context->getConstantInt8(i));
2045}
2046
2047Value *Nucleus::createConstantShort(short i)
2048{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002049 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002050 return V(::context->getConstantInt16(i));
2051}
2052
2053Value *Nucleus::createConstantShort(unsigned short i)
2054{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002055 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002056 return V(::context->getConstantInt16(i));
2057}
2058
2059Value *Nucleus::createConstantFloat(float x)
2060{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002061 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002062 return V(::context->getConstantFloat(x));
2063}
2064
2065Value *Nucleus::createNullPointer(Type *Ty)
2066{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002067 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002068 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002069}
2070
Antonio Maiorano02a39532020-01-21 15:15:34 -05002071static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2072{
2073 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2074}
2075
Nicolas Capens157ba262019-12-10 17:49:14 -05002076Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2077{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002078 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002079 const int vectorSize = 16;
2080 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2081 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002082
2083 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002084 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002085
Antonio Maioranoa0957112020-03-04 15:06:19 -05002086 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002087 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002088
2089 switch((int)reinterpret_cast<intptr_t>(type))
2090 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002091 case Ice::IceType_v4i32:
2092 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002093 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002094 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002095 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002096 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002097 }
2098 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002099 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002100 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002101 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002102 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002103 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002104 }
2105 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002106 case Ice::IceType_v8i16:
2107 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002108 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002109 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002110 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002111 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002112 }
2113 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002114 case Ice::IceType_v16i8:
2115 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002116 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002117 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002118 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002119 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002120 }
2121 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002122 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002123 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002124 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002125 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002126 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002127 }
2128 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002129 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002130 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002131 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002132 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002133 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002134 }
2135 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002136 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002137 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002138 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002139 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002140 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002141 }
2142 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002143 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002144 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002145 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002146 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002147 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002148 }
2149 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002150 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002151 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002152 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002153 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002154 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002155 }
2156 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002157 default:
2158 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002159 }
2160
Antonio Maiorano02a39532020-01-21 15:15:34 -05002161 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002162
Antonio Maiorano02a39532020-01-21 15:15:34 -05002163 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002164 return V(result);
2165}
2166
2167Value *Nucleus::createConstantVector(const double *constants, Type *type)
2168{
Ben Clayton713b8d32019-12-17 20:37:56 +00002169 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002170}
2171
Antonio Maiorano62427e02020-02-13 09:18:05 -05002172Value *Nucleus::createConstantString(const char *v)
2173{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002174 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002175 return V(IceConstantData(v, strlen(v) + 1));
2176}
2177
Nicolas Capens519cf222020-05-08 15:27:19 -04002178Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002179{
2180 return T(Ice::IceType_void);
2181}
2182
Nicolas Capens519cf222020-05-08 15:27:19 -04002183Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002184{
2185 return T(Ice::IceType_i1);
2186}
2187
Nicolas Capens519cf222020-05-08 15:27:19 -04002188Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002189{
2190 return T(Ice::IceType_i8);
2191}
2192
Nicolas Capens519cf222020-05-08 15:27:19 -04002193Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002194{
2195 return T(Ice::IceType_i8);
2196}
2197
Nicolas Capens519cf222020-05-08 15:27:19 -04002198Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002199{
2200 return T(Ice::IceType_i16);
2201}
2202
Nicolas Capens519cf222020-05-08 15:27:19 -04002203Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002204{
2205 return T(Ice::IceType_i16);
2206}
2207
Nicolas Capens519cf222020-05-08 15:27:19 -04002208Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002209{
2210 return T(Type_v4i8);
2211}
2212
Nicolas Capens519cf222020-05-08 15:27:19 -04002213Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002214{
2215 return T(Type_v4i8);
2216}
2217
Ben Clayton713b8d32019-12-17 20:37:56 +00002218namespace {
2219RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002220{
Ben Clayton713b8d32019-12-17 20:37:56 +00002221 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002222}
2223
Ben Clayton713b8d32019-12-17 20:37:56 +00002224RValue<Byte> Extract(RValue<Byte8> val, int i)
2225{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002226 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002227}
2228
2229RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2230{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002231 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002232}
2233} // namespace
2234
Nicolas Capens157ba262019-12-10 17:49:14 -05002235RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2236{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002237 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002238 if(emulateIntrinsics)
2239 {
2240 Byte8 result;
2241 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2242 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2243 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2244 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2245 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2246 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2247 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2248 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2249
2250 return result;
2251 }
2252 else
2253 {
2254 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002255 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002256 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2257 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002258 paddusb->addArg(x.value());
2259 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002260 ::basicBlock->appendInst(paddusb);
2261
2262 return RValue<Byte8>(V(result));
2263 }
2264}
2265
2266RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002268 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002269 if(emulateIntrinsics)
2270 {
2271 Byte8 result;
2272 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2273 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2274 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2275 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2276 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2277 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2278 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2279 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2280
2281 return result;
2282 }
2283 else
2284 {
2285 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002286 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002287 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2288 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002289 psubusw->addArg(x.value());
2290 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002291 ::basicBlock->appendInst(psubusw);
2292
2293 return RValue<Byte8>(V(result));
2294 }
2295}
2296
2297RValue<SByte> Extract(RValue<SByte8> val, int i)
2298{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002299 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002300 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002301}
2302
2303RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2304{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002305 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002306 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002307}
2308
2309RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2310{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002311 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002312 if(emulateIntrinsics)
2313 {
2314 SByte8 result;
2315 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2316 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2317 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2318 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2319 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2320 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2321 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2322 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2323
2324 return result;
2325 }
2326 else
2327 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002328#if defined(__i386__) || defined(__x86_64__)
2329 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2330 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2331 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002332
Ben Clayton713b8d32019-12-17 20:37:56 +00002333 return As<SByte8>(hi | lo);
2334#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002335 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002336#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002337 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002338}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002339
Nicolas Capens157ba262019-12-10 17:49:14 -05002340RValue<Int> SignMask(RValue<Byte8> x)
2341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002343 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002344 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002345 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2346 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002347 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002348 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002349 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002350 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002351 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002352 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2353 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002354 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002355 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002356
Nicolas Capens157ba262019-12-10 17:49:14 -05002357 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002358 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002359}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002360
2361// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2362// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002363// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002364// }
2365
Nicolas Capens157ba262019-12-10 17:49:14 -05002366RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2367{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002368 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002369 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002370}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002371
Nicolas Capens519cf222020-05-08 15:27:19 -04002372Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002373{
2374 return T(Type_v8i8);
2375}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002376
Nicolas Capens598f8d82016-09-26 15:09:10 -04002377// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2378// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002379// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002380// }
2381
2382// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2383// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002384// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002385// }
2386
Nicolas Capens157ba262019-12-10 17:49:14 -05002387RValue<SByte> SaturateSigned(RValue<Short> x)
2388{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002389 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002390 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2391}
2392
2393RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2394{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002395 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002396 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002397 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002398 SByte8 result;
2399 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2400 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2401 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2402 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2403 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2404 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2405 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2406 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002407
Nicolas Capens157ba262019-12-10 17:49:14 -05002408 return result;
2409 }
2410 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002411 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002412 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002413 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002414 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2415 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002416 paddsb->addArg(x.value());
2417 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002418 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002419
Nicolas Capens157ba262019-12-10 17:49:14 -05002420 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002421 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002422}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002423
Nicolas Capens157ba262019-12-10 17:49:14 -05002424RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2425{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002426 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002427 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002428 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002429 SByte8 result;
2430 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2431 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2432 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2433 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2434 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2435 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2436 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2437 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002438
Nicolas Capens157ba262019-12-10 17:49:14 -05002439 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002440 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002441 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002442 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002443 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002444 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002445 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2446 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002447 psubsb->addArg(x.value());
2448 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002449 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002450
Nicolas Capens157ba262019-12-10 17:49:14 -05002451 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002452 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002453}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002454
Nicolas Capens157ba262019-12-10 17:49:14 -05002455RValue<Int> SignMask(RValue<SByte8> x)
2456{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002457 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002458 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002459 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002460 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2461 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002462 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002463 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002464 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002465 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002466 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002467 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2468 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002469 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002470 ::basicBlock->appendInst(movmsk);
2471
2472 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002473 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002474}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002475
Nicolas Capens157ba262019-12-10 17:49:14 -05002476RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2477{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002478 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002479 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002480}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002481
Nicolas Capens157ba262019-12-10 17:49:14 -05002482RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2483{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002484 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002485 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002486}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002487
Nicolas Capens519cf222020-05-08 15:27:19 -04002488Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002489{
2490 return T(Type_v8i8);
2491}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002492
Nicolas Capens519cf222020-05-08 15:27:19 -04002493Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002494{
2495 return T(Ice::IceType_v16i8);
2496}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002497
Nicolas Capens519cf222020-05-08 15:27:19 -04002498Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002499{
2500 return T(Ice::IceType_v16i8);
2501}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002502
Nicolas Capens519cf222020-05-08 15:27:19 -04002503Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002504{
2505 return T(Type_v2i16);
2506}
Nicolas Capensd4227962016-11-09 14:24:25 -05002507
Nicolas Capens519cf222020-05-08 15:27:19 -04002508Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002509{
2510 return T(Type_v2i16);
2511}
Nicolas Capensd4227962016-11-09 14:24:25 -05002512
Nicolas Capens157ba262019-12-10 17:49:14 -05002513Short4::Short4(RValue<Int4> cast)
2514{
Ben Clayton713b8d32019-12-17 20:37:56 +00002515 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002516 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002517 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2518
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002519 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002520 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002521
2522 storeValue(short4);
2523}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002524
2525// Short4::Short4(RValue<Float> cast)
2526// {
2527// }
2528
Nicolas Capens157ba262019-12-10 17:49:14 -05002529Short4::Short4(RValue<Float4> cast)
2530{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002531 // TODO(b/150791192): Generalize and optimize
2532 auto smin = std::numeric_limits<short>::min();
2533 auto smax = std::numeric_limits<short>::max();
2534 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002535}
2536
2537RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2538{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002539 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002540 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002541 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002542 Short4 result;
2543 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2544 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2545 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2546 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002547
2548 return result;
2549 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002550 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002551 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002552 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002553 }
2554}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002555
Nicolas Capens157ba262019-12-10 17:49:14 -05002556RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2557{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002558 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002559 if(emulateIntrinsics)
2560 {
2561 Short4 result;
2562 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2563 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2564 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2565 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2566
2567 return result;
2568 }
2569 else
2570 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002571 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002572 }
2573}
2574
2575RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2576{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002577 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002578 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002579 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002580 ::basicBlock->appendInst(cmp);
2581
2582 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002583 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002584 ::basicBlock->appendInst(select);
2585
2586 return RValue<Short4>(V(result));
2587}
2588
2589RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2590{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002591 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002592 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002593 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002594 ::basicBlock->appendInst(cmp);
2595
2596 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002597 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002598 ::basicBlock->appendInst(select);
2599
2600 return RValue<Short4>(V(result));
2601}
2602
2603RValue<Short> SaturateSigned(RValue<Int> x)
2604{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002605 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002606 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2607}
2608
2609RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2610{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002611 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002612 if(emulateIntrinsics)
2613 {
2614 Short4 result;
2615 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2616 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2617 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2618 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2619
2620 return result;
2621 }
2622 else
2623 {
2624 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002625 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002626 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2627 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002628 paddsw->addArg(x.value());
2629 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002630 ::basicBlock->appendInst(paddsw);
2631
2632 return RValue<Short4>(V(result));
2633 }
2634}
2635
2636RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2637{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002638 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002639 if(emulateIntrinsics)
2640 {
2641 Short4 result;
2642 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2643 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2644 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2645 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2646
2647 return result;
2648 }
2649 else
2650 {
2651 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002652 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002653 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2654 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002655 psubsw->addArg(x.value());
2656 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002657 ::basicBlock->appendInst(psubsw);
2658
2659 return RValue<Short4>(V(result));
2660 }
2661}
2662
2663RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2664{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002665 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002666 if(emulateIntrinsics)
2667 {
2668 Short4 result;
2669 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2670 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2671 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2672 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2673
2674 return result;
2675 }
2676 else
2677 {
2678 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002679 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002680 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2681 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002682 pmulhw->addArg(x.value());
2683 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002684 ::basicBlock->appendInst(pmulhw);
2685
2686 return RValue<Short4>(V(result));
2687 }
2688}
2689
2690RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2691{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002692 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002693 if(emulateIntrinsics)
2694 {
2695 Int2 result;
2696 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2697 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2698
2699 return result;
2700 }
2701 else
2702 {
2703 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002704 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002705 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2706 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002707 pmaddwd->addArg(x.value());
2708 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002709 ::basicBlock->appendInst(pmaddwd);
2710
2711 return As<Int2>(V(result));
2712 }
2713}
2714
2715RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2716{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002717 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002718 if(emulateIntrinsics)
2719 {
2720 SByte8 result;
2721 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2722 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2723 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2724 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2725 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2726 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2727 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2728 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2729
2730 return result;
2731 }
2732 else
2733 {
2734 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002735 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002736 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2737 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002738 pack->addArg(x.value());
2739 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002740 ::basicBlock->appendInst(pack);
2741
2742 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2743 }
2744}
2745
2746RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2747{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002748 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002749 if(emulateIntrinsics)
2750 {
2751 Byte8 result;
2752 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2753 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2754 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2755 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2756 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2757 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2758 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2759 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2760
2761 return result;
2762 }
2763 else
2764 {
2765 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002766 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002767 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2768 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002769 pack->addArg(x.value());
2770 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002771 ::basicBlock->appendInst(pack);
2772
2773 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2774 }
2775}
2776
2777RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002780 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002781}
2782
2783RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2784{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002785 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002786 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002787}
2788
Nicolas Capens519cf222020-05-08 15:27:19 -04002789Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002790{
2791 return T(Type_v4i16);
2792}
2793
2794UShort4::UShort4(RValue<Float4> cast, bool saturate)
2795{
2796 if(saturate)
2797 {
2798 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002799 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002800 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2801 // PackUnsigned takes care of 0x0000 saturation.
2802 Int4 int4(Min(cast, Float4(0xFFFF)));
2803 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002804 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002805 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002806 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002807 // ARM saturates the 32-bit integer result on overflow/undeflow.
2808 Int4 int4(cast);
2809 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002810 }
2811 else
2812 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002813 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002814 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002815 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002816 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002817 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002818 *this = Short4(Int4(cast));
2819 }
2820}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002821
Nicolas Capens157ba262019-12-10 17:49:14 -05002822RValue<UShort> Extract(RValue<UShort4> val, int i)
2823{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002824 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002825}
2826
2827RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2828{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002829 return RValue<UShort4>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002830}
2831
2832RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2833{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002834 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002835 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002836
Nicolas Capens157ba262019-12-10 17:49:14 -05002837 {
2838 UShort4 result;
2839 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2840 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2841 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2842 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2843
2844 return result;
2845 }
2846 else
2847 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002848 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002849 }
2850}
2851
2852RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2853{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002854 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002855 if(emulateIntrinsics)
2856 {
2857 UShort4 result;
2858 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2859 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2860 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2861 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2862
2863 return result;
2864 }
2865 else
2866 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002867 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002868 }
2869}
2870
2871RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2872{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002873 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002874 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002875 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002876 ::basicBlock->appendInst(cmp);
2877
2878 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002879 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002880 ::basicBlock->appendInst(select);
2881
2882 return RValue<UShort4>(V(result));
2883}
2884
2885RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2886{
2887 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002888 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002889 ::basicBlock->appendInst(cmp);
2890
2891 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002892 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002893 ::basicBlock->appendInst(select);
2894
2895 return RValue<UShort4>(V(result));
2896}
2897
2898RValue<UShort> SaturateUnsigned(RValue<Int> x)
2899{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002900 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002901 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2902}
2903
2904RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2905{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002906 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002907 if(emulateIntrinsics)
2908 {
2909 UShort4 result;
2910 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2911 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2912 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2913 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2914
2915 return result;
2916 }
2917 else
2918 {
2919 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002920 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002921 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2922 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002923 paddusw->addArg(x.value());
2924 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002925 ::basicBlock->appendInst(paddusw);
2926
2927 return RValue<UShort4>(V(result));
2928 }
2929}
2930
2931RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2932{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002933 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002934 if(emulateIntrinsics)
2935 {
2936 UShort4 result;
2937 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2938 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2939 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2940 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2941
2942 return result;
2943 }
2944 else
2945 {
2946 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002947 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002948 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2949 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002950 psubusw->addArg(x.value());
2951 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002952 ::basicBlock->appendInst(psubusw);
2953
2954 return RValue<UShort4>(V(result));
2955 }
2956}
2957
2958RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2959{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002960 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002961 if(emulateIntrinsics)
2962 {
2963 UShort4 result;
2964 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2965 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2966 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2967 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2968
2969 return result;
2970 }
2971 else
2972 {
2973 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002974 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002975 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2976 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002977 pmulhuw->addArg(x.value());
2978 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002979 ::basicBlock->appendInst(pmulhuw);
2980
2981 return RValue<UShort4>(V(result));
2982 }
2983}
2984
2985RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2986{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002987 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002988 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2989
2990 // Scalarized implementation.
2991 Int4 result;
2992 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2993 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2994 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2995 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2996
2997 return result;
2998}
2999
3000RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3001{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003002 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003003 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3004
3005 if(false) // Partial product based implementation.
3006 {
3007 auto xh = x >> 16;
3008 auto yh = y >> 16;
3009 auto xl = x & UInt4(0x0000FFFF);
3010 auto yl = y & UInt4(0x0000FFFF);
3011 auto xlyh = xl * yh;
3012 auto xhyl = xh * yl;
3013 auto xlyhh = xlyh >> 16;
3014 auto xhylh = xhyl >> 16;
3015 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3016 auto xhyll = xhyl & UInt4(0x0000FFFF);
3017 auto xlylh = (xl * yl) >> 16;
3018 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3019
3020 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003021 }
3022
Nicolas Capens157ba262019-12-10 17:49:14 -05003023 // Scalarized implementation.
3024 Int4 result;
3025 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3026 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3027 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3028 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3029
3030 return As<UInt4>(result);
3031}
3032
3033RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3034{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003035 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003036 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003037 return UShort4(0);
3038}
3039
Nicolas Capens519cf222020-05-08 15:27:19 -04003040Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003041{
3042 return T(Type_v4i16);
3043}
3044
3045RValue<Short> Extract(RValue<Short8> val, int i)
3046{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003047 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003048 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003049}
3050
3051RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3052{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003053 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003054 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003055}
3056
3057RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3058{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003059 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003060 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003061 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003062 Short8 result;
3063 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3064 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3065 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3066 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3067 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3068 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3069 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3070 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003071
Nicolas Capens157ba262019-12-10 17:49:14 -05003072 return result;
3073 }
3074 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003075 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003076 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003077 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003078}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003079
Nicolas Capens157ba262019-12-10 17:49:14 -05003080RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3081{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003082 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003083 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003084 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003085 Short8 result;
3086 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3087 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3088 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3089 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3090 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3091 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3092 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3093 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003094
Nicolas Capens157ba262019-12-10 17:49:14 -05003095 return result;
3096 }
3097 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003098 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003099 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003100 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003101}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003102
Nicolas Capens157ba262019-12-10 17:49:14 -05003103RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3104{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003105 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003106 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003107 return Int4(0);
3108}
3109
3110RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3111{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003112 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003113 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003114 return Short8(0);
3115}
3116
Nicolas Capens519cf222020-05-08 15:27:19 -04003117Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003118{
3119 return T(Ice::IceType_v8i16);
3120}
3121
3122RValue<UShort> Extract(RValue<UShort8> val, int i)
3123{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003124 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003125 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003126}
3127
3128RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3129{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003130 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003131 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003132}
3133
3134RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3135{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003136 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003137 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003138 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003139 UShort8 result;
3140 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3141 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3142 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3143 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3144 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3145 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3146 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3147 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003148
Nicolas Capens157ba262019-12-10 17:49:14 -05003149 return result;
3150 }
3151 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003152 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003153 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003154 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003155}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003156
Nicolas Capens157ba262019-12-10 17:49:14 -05003157RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3158{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003159 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003160 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003161 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003162 UShort8 result;
3163 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3164 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3165 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3166 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3167 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3168 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3169 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3170 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003171
Nicolas Capens157ba262019-12-10 17:49:14 -05003172 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003173 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003174 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003175 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003176 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003177 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003178}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003179
Nicolas Capens157ba262019-12-10 17:49:14 -05003180RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3181{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003182 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003183 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003184 return UShort8(0);
3185}
3186
Nicolas Capens519cf222020-05-08 15:27:19 -04003187Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003188{
3189 return T(Ice::IceType_v8i16);
3190}
3191
Ben Clayton713b8d32019-12-17 20:37:56 +00003192RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003193{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003194 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003195 RValue<Int> res = val;
3196 val += 1;
3197 return res;
3198}
3199
Ben Clayton713b8d32019-12-17 20:37:56 +00003200const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003201{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003202 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003203 val += 1;
3204 return val;
3205}
3206
Ben Clayton713b8d32019-12-17 20:37:56 +00003207RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003208{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003209 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003210 RValue<Int> res = val;
3211 val -= 1;
3212 return res;
3213}
3214
Ben Clayton713b8d32019-12-17 20:37:56 +00003215const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003216{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003217 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003218 val -= 1;
3219 return val;
3220}
3221
3222RValue<Int> RoundInt(RValue<Float> cast)
3223{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003224 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003225 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003226 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003227 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3228 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003229 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003230 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003231 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003232 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003233 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003234 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3235 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003236 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003237 ::basicBlock->appendInst(nearbyint);
3238
3239 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003240 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003241}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003242
Nicolas Capens519cf222020-05-08 15:27:19 -04003243Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003244{
3245 return T(Ice::IceType_i32);
3246}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003247
Nicolas Capens519cf222020-05-08 15:27:19 -04003248Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003249{
3250 return T(Ice::IceType_i64);
3251}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003252
Nicolas Capens157ba262019-12-10 17:49:14 -05003253UInt::UInt(RValue<Float> cast)
3254{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003255 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003256 // Smallest positive value representable in UInt, but not in Int
3257 const unsigned int ustart = 0x80000000u;
3258 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003259
Nicolas Capens157ba262019-12-10 17:49:14 -05003260 // If the value is negative, store 0, otherwise store the result of the conversion
3261 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003262 // Check if the value can be represented as an Int
3263 IfThenElse(cast >= ustartf,
3264 // If the value is too large, subtract ustart and re-add it after conversion.
3265 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3266 // Otherwise, just convert normally
3267 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003268 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003269}
Nicolas Capensa8086512016-11-07 17:32:17 -05003270
Ben Clayton713b8d32019-12-17 20:37:56 +00003271RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003272{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003273 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003274 RValue<UInt> res = val;
3275 val += 1;
3276 return res;
3277}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003278
Ben Clayton713b8d32019-12-17 20:37:56 +00003279const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003280{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003281 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003282 val += 1;
3283 return val;
3284}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003285
Ben Clayton713b8d32019-12-17 20:37:56 +00003286RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003289 RValue<UInt> res = val;
3290 val -= 1;
3291 return res;
3292}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003293
Ben Clayton713b8d32019-12-17 20:37:56 +00003294const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003295{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003296 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003297 val -= 1;
3298 return val;
3299}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003300
Nicolas Capens598f8d82016-09-26 15:09:10 -04003301// RValue<UInt> RoundUInt(RValue<Float> cast)
3302// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003303// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003304// }
3305
Nicolas Capens519cf222020-05-08 15:27:19 -04003306Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003307{
3308 return T(Ice::IceType_i32);
3309}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003310
3311// Int2::Int2(RValue<Int> cast)
3312// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003313// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003314// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003315//
3316// Constant *shuffle[2];
3317// shuffle[0] = Nucleus::createConstantInt(0);
3318// shuffle[1] = Nucleus::createConstantInt(0);
3319//
Nicolas Capens519cf222020-05-08 15:27:19 -04003320// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003321//
3322// storeValue(replicate);
3323// }
3324
Nicolas Capens157ba262019-12-10 17:49:14 -05003325RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3326{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003327 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003328 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003329 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003330 Int2 result;
3331 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3332 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003333
Nicolas Capens157ba262019-12-10 17:49:14 -05003334 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003335 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003336 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003337 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003338 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003339 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003340}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003341
Nicolas Capens157ba262019-12-10 17:49:14 -05003342RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3343{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003344 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003345 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003346 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003347 Int2 result;
3348 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3349 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3350
3351 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003352 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003353 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003354 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003355 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003356 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003357}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003358
Nicolas Capens519cf222020-05-08 15:27:19 -04003359Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003360{
3361 return T(Type_v2i32);
3362}
3363
3364RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3365{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003366 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003367 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003368 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003369 UInt2 result;
3370 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3371 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003372
Nicolas Capens157ba262019-12-10 17:49:14 -05003373 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003374 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003375 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003376 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003377 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003378 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003379}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003380
Nicolas Capens157ba262019-12-10 17:49:14 -05003381RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3382{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003383 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003384 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003385 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003386 UInt2 result;
3387 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3388 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003389
Nicolas Capens157ba262019-12-10 17:49:14 -05003390 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003391 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003392 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003393 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003394 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003395 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003396}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003397
Nicolas Capens519cf222020-05-08 15:27:19 -04003398Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003399{
3400 return T(Type_v2i32);
3401}
3402
Ben Clayton713b8d32019-12-17 20:37:56 +00003403Int4::Int4(RValue<Byte4> cast)
3404 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003405{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003406 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003407 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003408 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3409
3410 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003411 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003412 Value *b = Nucleus::createBitCast(a, Byte16::type());
3413 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003414
Ben Clayton713b8d32019-12-17 20:37:56 +00003415 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003416 Value *d = Nucleus::createBitCast(c, Short8::type());
3417 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003418
Nicolas Capens519cf222020-05-08 15:27:19 -04003419 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003420 storeValue(f);
3421}
3422
Ben Clayton713b8d32019-12-17 20:37:56 +00003423Int4::Int4(RValue<SByte4> cast)
3424 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003425{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003426 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003427 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003428 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3429
Ben Clayton713b8d32019-12-17 20:37:56 +00003430 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003431 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003432 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3433
Ben Clayton713b8d32019-12-17 20:37:56 +00003434 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003435 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003436 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3437
3438 *this = As<Int4>(e) >> 24;
3439}
3440
Ben Clayton713b8d32019-12-17 20:37:56 +00003441Int4::Int4(RValue<Short4> cast)
3442 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003443{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003444 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003445 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003446 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003447
3448 *this = As<Int4>(c) >> 16;
3449}
3450
Ben Clayton713b8d32019-12-17 20:37:56 +00003451Int4::Int4(RValue<UShort4> cast)
3452 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003453{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003454 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003455 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003456 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003457 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003458 storeValue(d);
3459}
3460
Ben Clayton713b8d32019-12-17 20:37:56 +00003461Int4::Int4(RValue<Int> rhs)
3462 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003463{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003464 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003465 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003466
Ben Clayton713b8d32019-12-17 20:37:56 +00003467 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003468 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3469
3470 storeValue(replicate);
3471}
3472
3473RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3474{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003475 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003476 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003477 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003478 Int4 result;
3479 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3480 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3481 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3482 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003483
Nicolas Capens157ba262019-12-10 17:49:14 -05003484 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003485 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003486 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003487 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003488 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003489 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003490}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003491
Nicolas Capens157ba262019-12-10 17:49:14 -05003492RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3493{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003494 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003495 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003496 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003497 Int4 result;
3498 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3499 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3500 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3501 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003502
Nicolas Capens157ba262019-12-10 17:49:14 -05003503 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003504 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003505 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003506 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003507 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003508 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003509}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003510
Nicolas Capens157ba262019-12-10 17:49:14 -05003511RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3512{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003513 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003514 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003515}
3516
3517RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3518{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003519 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003520 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003521}
3522
3523RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3524{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003525 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003526 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003527}
3528
3529RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3530{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003531 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003532 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003533}
3534
3535RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3536{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003537 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003538 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003539}
3540
3541RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3542{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003543 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003544 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003545}
3546
3547RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3548{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003549 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003550 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003551 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003552 ::basicBlock->appendInst(cmp);
3553
3554 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003555 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003556 ::basicBlock->appendInst(select);
3557
3558 return RValue<Int4>(V(result));
3559}
3560
3561RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3562{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003563 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003564 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003565 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003566 ::basicBlock->appendInst(cmp);
3567
3568 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003569 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003570 ::basicBlock->appendInst(select);
3571
3572 return RValue<Int4>(V(result));
3573}
3574
3575RValue<Int4> RoundInt(RValue<Float4> cast)
3576{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003577 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003578 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003579 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003580 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3581 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003582 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003583 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003584 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003585 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003586 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003587 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3588 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003589 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003590 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003591
3592 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003593 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003594}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003595
Nicolas Capenseeb81842021-01-12 17:44:40 -05003596RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3597{
3598 RR_DEBUG_INFO_UPDATE_LOC();
3599
3600 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3601 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3602 // saturate to 0x80000000.
3603 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3604
3605 if(emulateIntrinsics || CPUID::ARM)
3606 {
3607 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3608 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3609 }
3610 else
3611 {
3612 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3613 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3614 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3615 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3616 nearbyint->addArg(clamped.value());
3617 ::basicBlock->appendInst(nearbyint);
3618
3619 return RValue<Int4>(V(result));
3620 }
3621}
3622
Nicolas Capens157ba262019-12-10 17:49:14 -05003623RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3624{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003625 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003626 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003627 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003628 Short8 result;
3629 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3630 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3631 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3632 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3633 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3634 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3635 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3636 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003637
Nicolas Capens157ba262019-12-10 17:49:14 -05003638 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003639 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003640 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003641 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003642 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003643 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003644 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3645 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003646 pack->addArg(x.value());
3647 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003648 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003649
Nicolas Capens157ba262019-12-10 17:49:14 -05003650 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003651 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003652}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003653
Nicolas Capens157ba262019-12-10 17:49:14 -05003654RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3655{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003656 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003657 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003658 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003659 RValue<Int4> sx = As<Int4>(x);
3660 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003661
Nicolas Capens157ba262019-12-10 17:49:14 -05003662 RValue<Int4> sy = As<Int4>(y);
3663 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003664
Nicolas Capens157ba262019-12-10 17:49:14 -05003665 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003666 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003667 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003668 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003669 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003670 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003671 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3672 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003673 pack->addArg(x.value());
3674 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003675 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003676
Nicolas Capens157ba262019-12-10 17:49:14 -05003677 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003678 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003679}
Nicolas Capens33438a62017-09-27 11:47:35 -04003680
Nicolas Capens157ba262019-12-10 17:49:14 -05003681RValue<Int> SignMask(RValue<Int4> x)
3682{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003683 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003684 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003685 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003686 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3687 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003688 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003689 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003690 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003691 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003692 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003693 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3694 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003695 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003696 ::basicBlock->appendInst(movmsk);
3697
3698 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003699 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003700}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003701
Nicolas Capens519cf222020-05-08 15:27:19 -04003702Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003703{
3704 return T(Ice::IceType_v4i32);
3705}
3706
Ben Clayton713b8d32019-12-17 20:37:56 +00003707UInt4::UInt4(RValue<Float4> cast)
3708 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003709{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003710 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003711 // Smallest positive value representable in UInt, but not in Int
3712 const unsigned int ustart = 0x80000000u;
3713 const float ustartf = float(ustart);
3714
3715 // Check if the value can be represented as an Int
3716 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3717 // If the value is too large, subtract ustart and re-add it after conversion.
3718 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003719 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003720 (~uiValue & Int4(cast));
3721 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003722 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003723}
3724
Ben Clayton713b8d32019-12-17 20:37:56 +00003725UInt4::UInt4(RValue<UInt> rhs)
3726 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003727{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003728 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003729 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003730
Ben Clayton713b8d32019-12-17 20:37:56 +00003731 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003732 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3733
3734 storeValue(replicate);
3735}
3736
3737RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3738{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003739 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003740 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003741 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003742 UInt4 result;
3743 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3744 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3745 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3746 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003747
Nicolas Capens157ba262019-12-10 17:49:14 -05003748 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003749 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003750 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003751 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003752 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003753 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003754}
Ben Clayton88816fa2019-05-15 17:08:14 +01003755
Nicolas Capens157ba262019-12-10 17:49:14 -05003756RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3757{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003758 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003759 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003760 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003761 UInt4 result;
3762 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3763 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3764 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3765 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003766
Nicolas Capens157ba262019-12-10 17:49:14 -05003767 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003768 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003769 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003770 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003771 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003772 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003773}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003774
Nicolas Capens157ba262019-12-10 17:49:14 -05003775RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3776{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003777 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003778 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003779}
3780
3781RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3782{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003783 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003784 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003785}
3786
3787RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3788{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003789 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003790 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003791}
3792
3793RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3794{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003795 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003796 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003797}
3798
3799RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3800{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003801 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003802 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003803}
3804
3805RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3806{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003807 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003808 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003809}
3810
3811RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3812{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003813 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003814 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003815 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003816 ::basicBlock->appendInst(cmp);
3817
3818 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003819 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003820 ::basicBlock->appendInst(select);
3821
3822 return RValue<UInt4>(V(result));
3823}
3824
3825RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3826{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003827 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003828 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003829 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003830 ::basicBlock->appendInst(cmp);
3831
3832 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003833 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003834 ::basicBlock->appendInst(select);
3835
3836 return RValue<UInt4>(V(result));
3837}
3838
Nicolas Capens519cf222020-05-08 15:27:19 -04003839Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003840{
3841 return T(Ice::IceType_v4i32);
3842}
3843
Nicolas Capens519cf222020-05-08 15:27:19 -04003844Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003845{
3846 return T(Ice::IceType_i16);
3847}
3848
3849RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003852 return 1.0f / x;
3853}
3854
3855RValue<Float> RcpSqrt_pp(RValue<Float> x)
3856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003858 return Rcp_pp(Sqrt(x));
3859}
3860
3861RValue<Float> Sqrt(RValue<Float> x)
3862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003864 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003865 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003866 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3867 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003868 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003869 ::basicBlock->appendInst(sqrt);
3870
3871 return RValue<Float>(V(result));
3872}
3873
3874RValue<Float> Round(RValue<Float> x)
3875{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003876 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003877 return Float4(Round(Float4(x))).x;
3878}
3879
3880RValue<Float> Trunc(RValue<Float> x)
3881{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003882 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003883 return Float4(Trunc(Float4(x))).x;
3884}
3885
3886RValue<Float> Frac(RValue<Float> x)
3887{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003888 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003889 return Float4(Frac(Float4(x))).x;
3890}
3891
3892RValue<Float> Floor(RValue<Float> x)
3893{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003894 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003895 return Float4(Floor(Float4(x))).x;
3896}
3897
3898RValue<Float> Ceil(RValue<Float> x)
3899{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003900 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003901 return Float4(Ceil(Float4(x))).x;
3902}
3903
Nicolas Capens519cf222020-05-08 15:27:19 -04003904Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003905{
3906 return T(Ice::IceType_f32);
3907}
3908
Nicolas Capens519cf222020-05-08 15:27:19 -04003909Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003910{
3911 return T(Type_v2f32);
3912}
3913
Ben Clayton713b8d32019-12-17 20:37:56 +00003914Float4::Float4(RValue<Float> rhs)
3915 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003916{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003917 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003918 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003919
Ben Clayton713b8d32019-12-17 20:37:56 +00003920 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003921 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3922
3923 storeValue(replicate);
3924}
3925
3926RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3927{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003928 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003929 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003930 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003931 ::basicBlock->appendInst(cmp);
3932
3933 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003934 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003935 ::basicBlock->appendInst(select);
3936
3937 return RValue<Float4>(V(result));
3938}
3939
3940RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3941{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003942 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003943 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003944 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003945 ::basicBlock->appendInst(cmp);
3946
3947 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003948 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003949 ::basicBlock->appendInst(select);
3950
3951 return RValue<Float4>(V(result));
3952}
3953
3954RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3955{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003956 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003957 return Float4(1.0f) / x;
3958}
3959
3960RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3961{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003962 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003963 return Rcp_pp(Sqrt(x));
3964}
3965
Antonio Maioranod1561872020-12-14 14:03:53 -05003966bool HasRcpApprox()
3967{
3968 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3969 return false;
3970}
3971
3972RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
3973{
3974 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3975 UNREACHABLE("RValue<Float4> RcpApprox()");
3976 return { 0.0f };
3977}
3978
3979RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
3980{
3981 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3982 UNREACHABLE("RValue<Float> RcpApprox()");
3983 return { 0.0f };
3984}
3985
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05003986bool HasRcpSqrtApprox()
3987{
3988 return false;
3989}
3990
3991RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
3992{
3993 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
3994 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
3995 return { 0.0f };
3996}
3997
3998RValue<Float> RcpSqrtApprox(RValue<Float> x)
3999{
4000 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4001 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
4002 return { 0.0f };
4003}
4004
Nicolas Capens157ba262019-12-10 17:49:14 -05004005RValue<Float4> Sqrt(RValue<Float4> x)
4006{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004007 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004008 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04004009 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004010 Float4 result;
4011 result.x = Sqrt(Float(Float4(x).x));
4012 result.y = Sqrt(Float(Float4(x).y));
4013 result.z = Sqrt(Float(Float4(x).z));
4014 result.w = Sqrt(Float(Float4(x).w));
4015
4016 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04004017 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004018 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04004019 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004020 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004021 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04004022 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4023 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004024 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04004025 ::basicBlock->appendInst(sqrt);
4026
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04004027 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04004028 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04004029}
Nicolas Capens157ba262019-12-10 17:49:14 -05004030
4031RValue<Int> SignMask(RValue<Float4> x)
4032{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004033 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004034 if(emulateIntrinsics || CPUID::ARM)
4035 {
4036 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
4037 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
4038 }
4039 else
4040 {
4041 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004042 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004043 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4044 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004045 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004046 ::basicBlock->appendInst(movmsk);
4047
4048 return RValue<Int>(V(result));
4049 }
4050}
4051
4052RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
4053{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004054 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004055 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004056}
4057
4058RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
4059{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004060 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004061 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004062}
4063
4064RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4065{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004066 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004067 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004068}
4069
4070RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4071{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004072 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004073 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004074}
4075
4076RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4077{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004078 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004079 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004080}
4081
4082RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4083{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004084 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004085 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004086}
4087
4088RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4089{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004090 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004091 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004092}
4093
4094RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4095{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004096 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004097 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004098}
4099
4100RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4101{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004102 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004103 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004104}
4105
4106RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4107{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004108 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004109 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004110}
4111
4112RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4113{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004114 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004115 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004116}
4117
4118RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4119{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004120 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004121 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004122}
4123
4124RValue<Float4> Round(RValue<Float4> x)
4125{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004126 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004127 if(emulateIntrinsics || CPUID::ARM)
4128 {
4129 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4130 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4131 }
4132 else if(CPUID::SSE4_1)
4133 {
4134 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004135 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004136 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4137 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004138 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004139 round->addArg(::context->getConstantInt32(0));
4140 ::basicBlock->appendInst(round);
4141
4142 return RValue<Float4>(V(result));
4143 }
4144 else
4145 {
4146 return Float4(RoundInt(x));
4147 }
4148}
4149
4150RValue<Float4> Trunc(RValue<Float4> x)
4151{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004152 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004153 if(CPUID::SSE4_1)
4154 {
4155 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004156 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004157 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4158 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004159 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004160 round->addArg(::context->getConstantInt32(3));
4161 ::basicBlock->appendInst(round);
4162
4163 return RValue<Float4>(V(result));
4164 }
4165 else
4166 {
4167 return Float4(Int4(x));
4168 }
4169}
4170
4171RValue<Float4> Frac(RValue<Float4> x)
4172{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004173 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004174 Float4 frc;
4175
4176 if(CPUID::SSE4_1)
4177 {
4178 frc = x - Floor(x);
4179 }
4180 else
4181 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004182 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004183
Ben Clayton713b8d32019-12-17 20:37:56 +00004184 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004185 }
4186
4187 // x - floor(x) can be 1.0 for very small negative x.
4188 // Clamp against the value just below 1.0.
4189 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4190}
4191
4192RValue<Float4> Floor(RValue<Float4> x)
4193{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004194 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004195 if(CPUID::SSE4_1)
4196 {
4197 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004198 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004199 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4200 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004201 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004202 round->addArg(::context->getConstantInt32(1));
4203 ::basicBlock->appendInst(round);
4204
4205 return RValue<Float4>(V(result));
4206 }
4207 else
4208 {
4209 return x - Frac(x);
4210 }
4211}
4212
4213RValue<Float4> Ceil(RValue<Float4> x)
4214{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004215 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004216 if(CPUID::SSE4_1)
4217 {
4218 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004219 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004220 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4221 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004222 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004223 round->addArg(::context->getConstantInt32(2));
4224 ::basicBlock->appendInst(round);
4225
4226 return RValue<Float4>(V(result));
4227 }
4228 else
4229 {
4230 return -Floor(-x);
4231 }
4232}
4233
Nicolas Capens519cf222020-05-08 15:27:19 -04004234Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004235{
4236 return T(Ice::IceType_v4f32);
4237}
4238
4239RValue<Long> Ticks()
4240{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004241 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004242 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004243 return Long(Int(0));
4244}
4245
Ben Clayton713b8d32019-12-17 20:37:56 +00004246RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004247{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004248 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004249 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004250}
4251
Ben Clayton713b8d32019-12-17 20:37:56 +00004252RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004253{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004254 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004255 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004256}
4257
Ben Clayton713b8d32019-12-17 20:37:56 +00004258Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004259{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004260 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004261 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004262}
4263
4264void Breakpoint()
4265{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004266 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004267 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004268 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4269 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4270 ::basicBlock->appendInst(trap);
4271}
4272
Ben Clayton713b8d32019-12-17 20:37:56 +00004273void Nucleus::createFence(std::memory_order memoryOrder)
4274{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004275 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004276 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4277 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4278 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4279 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4280 inst->addArg(order);
4281 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004282}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004283
Ben Clayton713b8d32019-12-17 20:37:56 +00004284Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4285{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004286 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004287 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004288 return nullptr;
4289}
4290void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4291{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004292 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004293 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004294}
Nicolas Capens157ba262019-12-10 17:49:14 -05004295
4296RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4297{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004298 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004299 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4300}
4301
4302RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4303{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004304 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004305 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4306}
4307
4308void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4309{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004310 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004311 return emulated::Scatter(base, val, offsets, mask, alignment);
4312}
4313
4314void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4315{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004316 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004317 return emulated::Scatter(base, val, offsets, mask, alignment);
4318}
4319
4320RValue<Float> Exp2(RValue<Float> x)
4321{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004322 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004323 return emulated::Exp2(x);
4324}
4325
4326RValue<Float> Log2(RValue<Float> x)
4327{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004328 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004329 return emulated::Log2(x);
4330}
4331
4332RValue<Float4> Sin(RValue<Float4> x)
4333{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004334 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004335 return optimal::Sin(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004336}
4337
4338RValue<Float4> Cos(RValue<Float4> x)
4339{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004340 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004341 return optimal::Cos(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004342}
4343
4344RValue<Float4> Tan(RValue<Float4> x)
4345{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004346 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004347 return optimal::Tan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004348}
4349
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004350RValue<Float4> Asin(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004351{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004352 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004353 if(p == Precision::Full)
4354 {
4355 return emulated::Asin(x);
4356 }
4357 return optimal::Asin_8_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004358}
4359
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004360RValue<Float4> Acos(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004361{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004362 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004363 // Surprisingly, deqp-vk's precision.acos.highp/mediump tests pass when using the 4-term polynomial approximation
4364 // version of acos, unlike for Asin, which requires higher precision algorithms.
4365 return optimal::Acos_4_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004366}
4367
4368RValue<Float4> Atan(RValue<Float4> x)
4369{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004370 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004371 return optimal::Atan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004372}
4373
4374RValue<Float4> Sinh(RValue<Float4> x)
4375{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004376 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004377 return optimal::Sinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004378}
4379
4380RValue<Float4> Cosh(RValue<Float4> x)
4381{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004382 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004383 return optimal::Cosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004384}
4385
4386RValue<Float4> Tanh(RValue<Float4> x)
4387{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004388 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004389 return optimal::Tanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004390}
4391
4392RValue<Float4> Asinh(RValue<Float4> x)
4393{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004394 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004395 return optimal::Asinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004396}
4397
4398RValue<Float4> Acosh(RValue<Float4> x)
4399{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004400 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004401 return optimal::Acosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004402}
4403
4404RValue<Float4> Atanh(RValue<Float4> x)
4405{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004406 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004407 return optimal::Atanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004408}
4409
4410RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4411{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004412 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004413 return optimal::Atan2(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004414}
4415
4416RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004418 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004419 return optimal::Pow(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004420}
4421
4422RValue<Float4> Exp(RValue<Float4> x)
4423{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004424 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004425 return optimal::Exp(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004426}
4427
4428RValue<Float4> Log(RValue<Float4> x)
4429{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004430 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004431 return optimal::Log(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004432}
4433
4434RValue<Float4> Exp2(RValue<Float4> x)
4435{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004436 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004437 return optimal::Exp2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004438}
4439
4440RValue<Float4> Log2(RValue<Float4> x)
4441{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004442 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004443 return optimal::Log2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004444}
4445
4446RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4447{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004448 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004449 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004450 {
Ben Claytonce54c592020-02-07 11:30:51 +00004451 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004452 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004453 }
4454 else
4455 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004456 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004457 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4458 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4459 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004460 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004461 ::basicBlock->appendInst(ctlz);
4462
4463 return RValue<UInt>(V(result));
4464 }
4465}
4466
4467RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4468{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004469 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004470 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004471 {
Ben Claytonce54c592020-02-07 11:30:51 +00004472 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004473 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004474 }
4475 else
4476 {
4477 // TODO: implement vectorized version in Subzero
4478 UInt4 result;
4479 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4480 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4481 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4482 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4483 return result;
4484 }
4485}
4486
4487RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4488{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004489 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004490 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004491 {
Ben Claytonce54c592020-02-07 11:30:51 +00004492 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004493 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004494 }
4495 else
4496 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004497 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004498 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4499 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4500 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004501 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004502 ::basicBlock->appendInst(ctlz);
4503
4504 return RValue<UInt>(V(result));
4505 }
4506}
4507
4508RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4509{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004510 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004511 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004512 {
Ben Claytonce54c592020-02-07 11:30:51 +00004513 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004514 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004515 }
4516 else
4517 {
4518 // TODO: implement vectorized version in Subzero
4519 UInt4 result;
4520 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4521 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4522 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4523 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4524 return result;
4525 }
4526}
4527
Antonio Maiorano370cba52019-12-31 11:36:07 -05004528RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4529{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004530 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004531 return emulated::MinAtomic(x, y, memoryOrder);
4532}
4533
4534RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4535{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004536 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004537 return emulated::MinAtomic(x, y, memoryOrder);
4538}
4539
4540RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4541{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004542 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004543 return emulated::MaxAtomic(x, y, memoryOrder);
4544}
4545
4546RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4547{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004548 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004549 return emulated::MaxAtomic(x, y, memoryOrder);
4550}
4551
Antonio Maioranoaae33732020-02-14 14:52:34 -05004552void EmitDebugLocation()
4553{
4554#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004555 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004556#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004557}
Ben Clayton713b8d32019-12-17 20:37:56 +00004558void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004559void FlushDebug() {}
4560
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004561namespace {
4562namespace coro {
4563
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004564// Instance data per generated coroutine
4565// This is the "handle" type used for Coroutine functions
4566// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4567struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004568{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004569 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004570 bool done = false; // the coroutine should stop at the next yield()
4571 bool terminated = false; // the coroutine has finished.
4572 bool inRoutine = false; // is the coroutine currently executing?
4573 marl::Scheduler::Fiber *mainFiber = nullptr;
4574 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004575 void *promisePtr = nullptr;
4576};
4577
4578CoroutineData *createCoroutineData()
4579{
4580 return new CoroutineData{};
4581}
4582
4583void destroyCoroutineData(CoroutineData *coroData)
4584{
4585 delete coroData;
4586}
4587
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004588// suspend() pauses execution of the coroutine, and resumes execution from the
4589// caller's call to await().
4590// Returns true if await() is called again, or false if coroutine_destroy()
4591// is called.
4592bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004593{
Ben Claytonc3466532020-03-24 11:54:05 +00004594 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4595 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4596 ASSERT(coroData->inRoutine);
4597 coroData->inRoutine = false;
4598 coroData->mainFiber->notify();
4599 while(!coroData->inRoutine)
4600 {
4601 coroData->routineFiber->wait();
4602 }
4603 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004604}
4605
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004606// resume() is called by await(), blocking until the coroutine calls yield()
4607// or the coroutine terminates.
4608void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004609{
Ben Claytonc3466532020-03-24 11:54:05 +00004610 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4611 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4612 ASSERT(!coroData->inRoutine);
4613 coroData->inRoutine = true;
4614 coroData->routineFiber->notify();
4615 while(coroData->inRoutine)
4616 {
4617 coroData->mainFiber->wait();
4618 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004619}
4620
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004621// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4622// until the coroutine ends, and deletes the coroutine data.
4623void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004624{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004625 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004626 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4627 ASSERT(!coroData->inRoutine);
4628 if(!coroData->terminated)
4629 {
4630 coroData->done = true;
4631 coroData->inRoutine = true;
4632 coroData->routineFiber->notify();
4633 while(!coroData->terminated)
4634 {
4635 coroData->mainFiber->wait();
4636 }
4637 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004638 if(coroData->useInternalScheduler)
4639 {
4640 ::getOrCreateScheduler().unbind();
4641 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004642 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004643}
4644
4645namespace detail {
4646thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4647} // namespace detail
4648
4649void setHandleParam(Nucleus::CoroutineHandle handle)
4650{
4651 ASSERT(!detail::coroHandle);
4652 detail::coroHandle = handle;
4653}
4654
4655Nucleus::CoroutineHandle getHandleParam()
4656{
4657 ASSERT(detail::coroHandle);
4658 auto handle = detail::coroHandle;
4659 detail::coroHandle = {};
4660 return handle;
4661}
4662
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004663bool isDone(Nucleus::CoroutineHandle handle)
4664{
4665 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004666 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004667}
4668
4669void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4670{
4671 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4672 coroData->promisePtr = promisePtr;
4673}
4674
4675void *getPromisePtr(Nucleus::CoroutineHandle handle)
4676{
4677 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4678 return coroData->promisePtr;
4679}
4680
4681} // namespace coro
4682} // namespace
4683
4684// Used to generate coroutines.
4685// Lifetime: from yield to acquireCoroutine
4686class CoroutineGenerator
4687{
4688public:
4689 CoroutineGenerator()
4690 {
4691 }
4692
4693 // Inserts instructions at the top of the current function to make it a coroutine.
4694 void generateCoroutineBegin()
4695 {
4696 // Begin building the main coroutine_begin() function.
4697 // We insert these instructions at the top of the entry node,
4698 // before existing reactor-generated instructions.
4699
4700 // CoroutineHandle coroutine_begin(<Arguments>)
4701 // {
4702 // this->handle = coro::getHandleParam();
4703 //
4704 // YieldType promise;
4705 // coro::setPromisePtr(handle, &promise); // For await
4706 //
4707 // ... <REACTOR CODE> ...
4708 //
4709
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004710 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004711 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004712
4713 // YieldType promise;
4714 // coro::setPromisePtr(handle, &promise); // For await
4715 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004716 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004717 }
4718
4719 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4720 void generateYield(Value *val)
4721 {
4722 // ... <REACTOR CODE> ...
4723 //
4724 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004725 // if (!coro::suspend(handle)) {
4726 // return false; // coroutine has been stopped by the caller.
4727 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004728 //
4729 // ... <REACTOR CODE> ...
4730
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004731 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004732 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004733
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004734 // if (!coro::suspend(handle)) {
4735 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4736 auto doneBlock = Nucleus::createBasicBlock();
4737 auto resumeBlock = Nucleus::createBasicBlock();
4738 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4739
4740 // return false; // coroutine has been stopped by the caller.
4741 ::basicBlock = doneBlock;
4742 Nucleus::createRetVoid(); // coroutine return value is ignored.
4743
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004744 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004745 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004746 }
4747
4748 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4749
4750 // Generates the await function for the current coroutine.
4751 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4752 static FunctionUniquePtr generateAwaitFunction()
4753 {
4754 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4755 // {
4756 // if (coro::isDone())
4757 // {
4758 // return false;
4759 // }
4760 // else // resume
4761 // {
4762 // YieldType* promise = coro::getPromisePtr(handle);
4763 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004764 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004765 // return true;
4766 // }
4767 // }
4768
4769 // Subzero doesn't support bool types (IceType_i1) as return type
4770 const Ice::Type ReturnType = Ice::IceType_i32;
4771 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4772 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4773
4774 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4775 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4776
4777 Ice::Variable *handle = awaitFunc->getArgs()[0];
4778 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4779
4780 auto doneBlock = awaitFunc->makeNode();
4781 {
4782 // return false;
4783 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4784 doneBlock->appendInst(ret);
4785 }
4786
4787 auto resumeBlock = awaitFunc->makeNode();
4788 {
4789 // YieldType* promise = coro::getPromisePtr(handle);
4790 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4791
4792 // *out = *promise;
4793 // Load promise value
4794 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4795 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4796 resumeBlock->appendInst(load);
4797 // Then store it in output param
4798 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4799 resumeBlock->appendInst(store);
4800
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004801 // coro::resume(handle);
4802 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004803
4804 // return true;
4805 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4806 resumeBlock->appendInst(ret);
4807 }
4808
4809 // if (coro::isDone())
4810 // {
4811 // <doneBlock>
4812 // }
4813 // else // resume
4814 // {
4815 // <resumeBlock>
4816 // }
4817 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004818 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004819 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4820 bb->appendInst(br);
4821
4822 return FunctionUniquePtr{ awaitFunc };
4823 }
4824
4825 // Generates the destroy function for the current coroutine.
4826 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4827 static FunctionUniquePtr generateDestroyFunction()
4828 {
4829 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4830 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004831 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004832 // return;
4833 // }
4834
4835 const Ice::Type ReturnType = Ice::IceType_void;
4836 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4837
4838 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4839 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4840
4841 Ice::Variable *handle = destroyFunc->getArgs()[0];
4842
4843 auto *bb = destroyFunc->getEntryNode();
4844
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004845 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4846 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004847
4848 // return;
4849 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4850 bb->appendInst(ret);
4851
4852 return FunctionUniquePtr{ destroyFunc };
4853 }
4854
4855private:
4856 Ice::Variable *handle{};
4857 Ice::Variable *promise{};
4858};
4859
4860static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4861{
4862 // This doubles up as our coroutine handle
4863 auto coroData = coro::createCoroutineData();
4864
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004865 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4866 if(coroData->useInternalScheduler)
4867 {
4868 ::getOrCreateScheduler().bind();
4869 }
4870
Ben Clayton76e9e532020-03-16 20:35:04 +00004871 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004872 // Store handle in TLS so that the coroutine can grab it right away, before
4873 // any fiber switch occurs.
4874 coro::setHandleParam(coroData);
4875
Ben Claytonc3466532020-03-24 11:54:05 +00004876 ASSERT(!coroData->routineFiber);
4877 coroData->routineFiber = marl::Scheduler::Fiber::current();
4878
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004879 beginFunc();
4880
Ben Claytonc3466532020-03-24 11:54:05 +00004881 ASSERT(coroData->inRoutine);
4882 coroData->done = true; // coroutine is done.
4883 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4884 coroData->inRoutine = false;
4885 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004886 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004887
Ben Claytonc3466532020-03-24 11:54:05 +00004888 ASSERT(!coroData->mainFiber);
4889 coroData->mainFiber = marl::Scheduler::Fiber::current();
4890
4891 // block until the first yield or coroutine end
4892 ASSERT(!coroData->inRoutine);
4893 coroData->inRoutine = true;
4894 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4895 while(coroData->inRoutine)
4896 {
4897 coroData->mainFiber->wait();
4898 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004899
4900 return coroData;
4901}
4902
4903void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4904{
4905 // Start by creating a regular function
4906 createFunction(yieldType, params);
4907
4908 // Save in case yield() is called
4909 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4910 ::coroYieldType = yieldType;
4911}
4912
4913void Nucleus::yield(Value *val)
4914{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004915 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004916 Variable::materializeAll();
4917
4918 // On first yield, we start generating coroutine functions
4919 if(!::coroGen)
4920 {
4921 ::coroGen = std::make_shared<CoroutineGenerator>();
4922 ::coroGen->generateCoroutineBegin();
4923 }
4924
4925 ASSERT(::coroGen);
4926 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004927}
4928
Ben Clayton713b8d32019-12-17 20:37:56 +00004929static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4930{
4931 return false;
4932}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004933
4934static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4935{
4936}
Nicolas Capens157ba262019-12-10 17:49:14 -05004937
4938std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4939{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004940 if(::coroGen)
4941 {
4942 // Finish generating coroutine functions
4943 {
4944 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004945 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004946 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004947
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004948 auto awaitFunc = ::coroGen->generateAwaitFunction();
4949 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004950
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004951 // At this point, we no longer need the CoroutineGenerator.
4952 ::coroGen.reset();
4953 ::coroYieldType = nullptr;
4954
4955 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4956 { name, "await", "destroy" },
4957 cfgEdit);
4958
4959 return routine;
4960 }
4961 else
4962 {
4963 {
4964 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004965 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004966 }
4967
4968 ::coroYieldType = nullptr;
4969
4970 // Not an actual coroutine (no yields), so return stubs for await and destroy
4971 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4972
4973 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4974 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4975 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4976 return routine;
4977 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004978}
4979
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004980Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004981{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004982 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4983
4984 if(isCoroutine)
4985 {
4986 return rr::invokeCoroutineBegin(func);
4987 }
4988 else
4989 {
4990 // For regular routines, just invoke the begin func directly
4991 return func();
4992 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004993}
Nicolas Capens157ba262019-12-10 17:49:14 -05004994
4995} // namespace rr