blob: f17574b16a6e26c46cf4110161695fea3dd2e39b [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maiorano9c14bda2020-09-18 16:33:36 -040016#include "EmulatedIntrinsics.hpp"
17#include "OptimalIntrinsics.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050018#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000019#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050020#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040021
Nicolas Capens1a3ce872018-10-10 10:42:36 -040022#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000023#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040024
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040026#include "src/IceCfgNode.h"
27#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceELFStreamer.h"
29#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040030#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000031#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040032
Ben Clayton713b8d32019-12-17 20:37:56 +000033#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040034#include "llvm/Support/FileSystem.h"
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -050035#include "llvm/Support/ManagedStatic.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040036#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040037
Antonio Maiorano8bce0672020-02-28 13:13:45 -050038#include "marl/event.h"
39
Nicolas Capens6a990f82018-07-06 15:54:07 -040040#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000041# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040042#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040043
Nicolas Capensbd65da92017-01-05 16:31:06 -050044#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000045# ifndef WIN32_LEAN_AND_MEAN
46# define WIN32_LEAN_AND_MEAN
47# endif // !WIN32_LEAN_AND_MEAN
48# ifndef NOMINMAX
49# define NOMINMAX
50# endif // !NOMINMAX
51# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050052#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040053
Ben Clayton683bad82020-02-10 23:57:09 +000054#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040055#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000056#include <limits>
57#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040058
Antonio Maiorano02a39532020-01-21 15:15:34 -050059// Subzero utility functions
60// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050061namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050062namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050063
64Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
65{
66 uint32_t sequenceNumber = 0;
Nicolas Capensff010f92021-02-01 12:22:53 -050067 auto *function = Ice::Cfg::create(context, sequenceNumber).release();
68
69 function->setStackSizeLimit(512 * 1024); // 512 KiB
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050070
71 Ice::CfgLocalAllocatorScope allocScope{ function };
72
73 for(auto type : paramTypes)
74 {
75 Ice::Variable *arg = function->makeVariable(type);
76 function->addArg(arg);
77 }
78
79 Ice::CfgNode *node = function->makeNode();
80 function->setEntryNode(node);
81
82 return function;
83}
84
85Ice::Type getPointerType(Ice::Type elementType)
86{
87 if(sizeof(void *) == 8)
88 {
89 return Ice::IceType_i64;
90 }
91 else
92 {
93 return Ice::IceType_i32;
94 }
95}
96
97Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
98{
99 int typeSize = Ice::typeWidthInBytes(type);
100 int totalSize = typeSize * (arraySize ? arraySize : 1);
101
102 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
103 auto address = function->makeVariable(getPointerType(type));
104 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
105 function->getEntryNode()->getInsts().push_front(alloca);
106
107 return address;
108}
109
110Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500111{
112 if(sizeof(void *) == 8)
113 {
114 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
115 }
116 else
117 {
118 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
119 }
120}
121
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400122// TODO(amaiorano): remove this prototype once these are moved to separate header/cpp
123Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500124
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400125// Wrapper for calls on C functions with Ice types
126Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, Ice::Operand *callTarget, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
127{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500128 Ice::Variable *ret = nullptr;
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400129
130 // Subzero doesn't support boolean return values. Replace with an i32 temporarily,
131 // then truncate result to bool.
132 // TODO(b/151158858): Add support to Subzero's InstCall for bool-returning functions
133 const bool returningBool = (retTy == Ice::IceType_i1);
134 if(returningBool)
135 {
136 ret = function->makeVariable(Ice::IceType_i32);
137 }
138 else if(retTy != Ice::IceType_void)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500139 {
140 ret = function->makeVariable(retTy);
141 }
142
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400143 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, callTarget, false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500144 for(auto arg : iceArgs)
145 {
146 call->addArg(arg);
147 }
148
149 basicBlock->appendInst(call);
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400150
151 if(returningBool)
152 {
153 // Truncate result to bool so that if any (lsb) bits were set, result will be true
154 ret = createTruncate(function, basicBlock, ret, Ice::IceType_i1);
155 }
156
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500157 return ret;
158}
159
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400160Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
161{
162 Ice::Operand *callTarget = getConstantPointer(function->getContext(), fptr);
163 return Call(function, basicBlock, retTy, callTarget, iceArgs, isVariadic);
164}
165
Antonio Maiorano62427e02020-02-13 09:18:05 -0500166// Wrapper for calls on C functions with Ice types
167template<typename Return, typename... CArgs, typename... RArgs>
168Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
169{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400170 static_assert(sizeof...(CArgs) == sizeof...(RArgs), "Expected number of args don't match");
171
Nicolas Capens519cf222020-05-08 15:27:19 -0400172 Ice::Type retTy = T(rr::CToReactorT<Return>::type());
Antonio Maiorano62427e02020-02-13 09:18:05 -0500173 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500174 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500175}
176
Antonio Maiorano02a39532020-01-21 15:15:34 -0500177// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500178Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500179{
180 Ice::Variable *result = function->makeVariable(v->getType());
181 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
182 basicBlock->appendInst(cast);
183 return result;
184}
185
Antonio Maiorano16ae92a2020-03-10 10:53:24 -0400186Ice::Variable *createTruncate(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *from, Ice::Type toType)
187{
188 Ice::Variable *to = function->makeVariable(toType);
189 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Trunc, to, from);
190 basicBlock->appendInst(cast);
191 return to;
192}
193
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500194Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500195{
196 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
197 // absolute address. We circumvent this by casting to a non-const variable, and loading
198 // from that.
199 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
200 {
201 ptr = sz::createUnconstCast(function, basicBlock, cptr);
202 }
203
204 Ice::Variable *result = function->makeVariable(type);
205 auto load = Ice::InstLoad::create(function, result, ptr, align);
206 basicBlock->appendInst(load);
207
208 return result;
209}
210
211} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500212} // namespace
213
Ben Clayton713b8d32019-12-17 20:37:56 +0000214namespace rr {
215class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500216class CoroutineGenerator;
217} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500218
219namespace {
220
Antonio Maiorano8b4cf1c2021-01-26 14:40:03 -0500221// Used to automatically invoke llvm_shutdown() when driver is unloaded
222llvm::llvm_shutdown_obj llvmShutdownObj;
223
Nicolas Capens157ba262019-12-10 17:49:14 -0500224// Default configuration settings. Must be accessed under mutex lock.
225std::mutex defaultConfigLock;
226rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000227{
Nicolas Capens157ba262019-12-10 17:49:14 -0500228 // This uses a static in a function to avoid the cost of a global static
229 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
230 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000231 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500232 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000233}
234
Nicolas Capens157ba262019-12-10 17:49:14 -0500235Ice::GlobalContext *context = nullptr;
236Ice::Cfg *function = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400237Ice::CfgNode *entryBlock = nullptr;
238Ice::CfgNode *basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500239Ice::CfgNode *basicBlock = nullptr;
240Ice::CfgLocalAllocatorScope *allocator = nullptr;
241rr::ELFMemoryStreamer *routine = nullptr;
242
243std::mutex codegenMutex;
244
245Ice::ELFFileStreamer *elfFile = nullptr;
246Ice::Fdstream *out = nullptr;
247
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500248// Coroutine globals
249rr::Type *coroYieldType = nullptr;
250std::shared_ptr<rr::CoroutineGenerator> coroGen;
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500251marl::Scheduler &getOrCreateScheduler()
252{
253 static auto scheduler = [] {
Ben Claytonef3914c2020-06-15 22:17:46 +0100254 marl::Scheduler::Config cfg;
255 cfg.setWorkerThreadCount(8);
256 return std::make_unique<marl::Scheduler>(cfg);
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500257 }();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500258
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -0500259 return *scheduler;
260}
Nicolas Capens157ba262019-12-10 17:49:14 -0500261} // Anonymous namespace
262
263namespace {
264
265#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000266# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500267#endif
268
Ben Clayton713b8d32019-12-17 20:37:56 +0000269#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
270# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500271#endif
272
Antonio Maiorano370cba52019-12-31 11:36:07 -0500273Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400274{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500275 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100276 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500277 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000278 case rr::Optimization::Level::None: return Ice::Opt_m1;
279 case rr::Optimization::Level::Less: return Ice::Opt_m1;
280 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500281 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
282 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100283 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500284 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400285}
286
Antonio Maiorano370cba52019-12-31 11:36:07 -0500287Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
288{
289 switch(memoryOrder)
290 {
291 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
292 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
293 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
294 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
295 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
296 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
297 }
298 return Ice::Intrinsics::MemoryOrderInvalid;
299}
300
Nicolas Capens157ba262019-12-10 17:49:14 -0500301class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500302{
Nicolas Capens157ba262019-12-10 17:49:14 -0500303public:
304 const static bool ARM;
305 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400306
Nicolas Capens157ba262019-12-10 17:49:14 -0500307private:
308 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100309 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000310#if defined(__i386__) || defined(__x86_64__)
311# if defined(_WIN32)
312 __cpuid(registers, info);
313# else
314 __asm volatile("cpuid"
315 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
316 : "a"(info));
317# endif
318#else
319 registers[0] = 0;
320 registers[1] = 0;
321 registers[2] = 0;
322 registers[3] = 0;
323#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100324 }
325
Nicolas Capens157ba262019-12-10 17:49:14 -0500326 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500327 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000328#if defined(__arm__) || defined(__aarch64__)
329 return true;
330#elif defined(__i386__) || defined(__x86_64__)
331 return false;
332#elif defined(__mips__)
333 return false;
334#else
335# error "Unknown architecture"
336#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500337 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500338
Nicolas Capens157ba262019-12-10 17:49:14 -0500339 static bool detectSSE4_1()
340 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000341#if defined(__i386__) || defined(__x86_64__)
342 int registers[4];
343 cpuid(registers, 1);
344 return (registers[2] & 0x00080000) != 0;
345#else
346 return false;
347#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500348 }
349};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500350
Nicolas Capens157ba262019-12-10 17:49:14 -0500351const bool CPUID::ARM = CPUID::detectARM();
352const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
353const bool emulateIntrinsics = false;
354const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400355
Nicolas Capens157ba262019-12-10 17:49:14 -0500356constexpr bool subzeroDumpEnabled = false;
357constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500358
359#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500360static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
361static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500362#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500363
364} // anonymous namespace
365
366namespace rr {
367
Antonio Maioranoab210f92019-12-13 16:26:24 -0500368std::string BackendName()
369{
370 return "Subzero";
371}
372
Ben Clayton713b8d32019-12-17 20:37:56 +0000373const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500374 true, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500375};
376
377enum EmulatedType
378{
379 EmulatedShift = 16,
380 EmulatedV2 = 2 << EmulatedShift,
381 EmulatedV4 = 4 << EmulatedShift,
382 EmulatedV8 = 8 << EmulatedShift,
383 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
384
385 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
386 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
387 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000388 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
389 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500390 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
391};
392
Ben Clayton713b8d32019-12-17 20:37:56 +0000393class Value : public Ice::Operand
394{};
395class SwitchCases : public Ice::InstSwitch
396{};
397class BasicBlock : public Ice::CfgNode
398{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500399
400Ice::Type T(Type *t)
401{
402 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
403 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500404}
405
Nicolas Capens157ba262019-12-10 17:49:14 -0500406Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400407{
Ben Clayton713b8d32019-12-17 20:37:56 +0000408 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500409}
410
411Type *T(EmulatedType t)
412{
Ben Clayton713b8d32019-12-17 20:37:56 +0000413 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500414}
415
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500416std::vector<Ice::Type> T(const std::vector<Type *> &types)
417{
418 std::vector<Ice::Type> result;
419 result.reserve(types.size());
420 for(auto &t : types)
421 {
422 result.push_back(T(t));
423 }
424 return result;
425}
426
Nicolas Capens157ba262019-12-10 17:49:14 -0500427Value *V(Ice::Operand *v)
428{
Ben Clayton713b8d32019-12-17 20:37:56 +0000429 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500430}
431
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500432Ice::Operand *V(Value *v)
433{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500434 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500435}
436
Antonio Maiorano62427e02020-02-13 09:18:05 -0500437std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
438{
439 std::vector<Ice::Operand *> result;
440 result.reserve(values.size());
441 for(auto &v : values)
442 {
443 result.push_back(V(v));
444 }
445 return result;
446}
447
Nicolas Capens157ba262019-12-10 17:49:14 -0500448BasicBlock *B(Ice::CfgNode *b)
449{
Ben Clayton713b8d32019-12-17 20:37:56 +0000450 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500451}
452
453static size_t typeSize(Type *type)
454{
455 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400456 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500457 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800458 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000459 case Type_v2i32: return 8;
460 case Type_v4i16: return 8;
461 case Type_v2i16: return 4;
462 case Type_v8i8: return 8;
463 case Type_v4i8: return 4;
464 case Type_v2f32: return 8;
465 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500466 }
467 }
468
469 return Ice::typeWidthInBytes(T(type));
470}
471
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400472static void finalizeFunction()
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500473{
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400474 // Create a return if none was added
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500475 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
476 {
477 Nucleus::createRetVoid();
478 }
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400479
480 // Connect the entry block to the top of the initial basic block
481 auto br = Ice::InstBr::create(::function, ::basicBlockTop);
482 ::entryBlock->appendInst(br);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500483}
484
Ben Clayton713b8d32019-12-17 20:37:56 +0000485using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
486using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500487
488inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
489{
Ben Clayton713b8d32019-12-17 20:37:56 +0000490 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500491}
492
493inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
494{
495 return &sectionHeader(elfHeader)[index];
496}
497
498static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
499{
500 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
501
502 uint32_t index = relocation.getSymbol();
503 int table = relocationTable.sh_link;
504 void *symbolValue = nullptr;
505
506 if(index != SHN_UNDEF)
507 {
508 if(table == SHN_UNDEF) return nullptr;
509 const SectionHeader *symbolTable = elfSection(elfHeader, table);
510
511 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
512 if(index >= symtab_entries)
513 {
514 ASSERT(index < symtab_entries && "Symbol Index out of range");
515 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800516 }
517
Nicolas Capens157ba262019-12-10 17:49:14 -0500518 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000519 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500520 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800521
Nicolas Capens157ba262019-12-10 17:49:14 -0500522 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400523 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500524 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000525 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400526 }
527 else
528 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500529 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400530 }
Nicolas Capens66478362016-10-13 15:36:36 -0400531 }
532
Nicolas Capens157ba262019-12-10 17:49:14 -0500533 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000534 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500535
536 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400537 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400538 switch(relocation.getType())
539 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000540 case R_ARM_NONE:
541 // No relocation
542 break;
543 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500544 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000545 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500546 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
547 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
548 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400549 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000550 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500551 {
552 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
553 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
554 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400555 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000556 default:
557 ASSERT(false && "Unsupported relocation type");
558 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400559 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500560 }
561 else
562 {
563 switch(relocation.getType())
564 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000565 case R_386_NONE:
566 // No relocation
567 break;
568 case R_386_32:
569 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
570 break;
571 case R_386_PC32:
572 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
573 break;
574 default:
575 ASSERT(false && "Unsupported relocation type");
576 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500577 }
Nicolas Capens66478362016-10-13 15:36:36 -0400578 }
579
Nicolas Capens157ba262019-12-10 17:49:14 -0500580 return symbolValue;
581}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400582
Nicolas Capens157ba262019-12-10 17:49:14 -0500583static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
584{
585 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
586
587 uint32_t index = relocation.getSymbol();
588 int table = relocationTable.sh_link;
589 void *symbolValue = nullptr;
590
591 if(index != SHN_UNDEF)
592 {
593 if(table == SHN_UNDEF) return nullptr;
594 const SectionHeader *symbolTable = elfSection(elfHeader, table);
595
596 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
597 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400598 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500599 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400600 return nullptr;
601 }
602
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000604 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500605 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400606
Nicolas Capens157ba262019-12-10 17:49:14 -0500607 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400608 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500609 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000610 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500611 }
612 else
613 {
614 return nullptr;
615 }
616 }
Nicolas Capens66478362016-10-13 15:36:36 -0400617
Nicolas Capens157ba262019-12-10 17:49:14 -0500618 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000619 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
620 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400621
Nicolas Capens157ba262019-12-10 17:49:14 -0500622 switch(relocation.getType())
623 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000624 case R_X86_64_NONE:
625 // No relocation
626 break;
627 case R_X86_64_64:
628 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
629 break;
630 case R_X86_64_PC32:
631 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
632 break;
633 case R_X86_64_32S:
634 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
635 break;
636 default:
637 ASSERT(false && "Unsupported relocation type");
638 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500639 }
640
641 return symbolValue;
642}
643
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400644struct EntryPoint
Nicolas Capens157ba262019-12-10 17:49:14 -0500645{
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400646 const void *entry;
647 size_t codeSize = 0;
648};
649
650std::vector<EntryPoint> loadImage(uint8_t *const elfImage, const std::vector<const char *> &functionNames)
651{
652 ASSERT(functionNames.size() > 0);
653 std::vector<EntryPoint> entryPoints(functionNames.size());
654
Ben Clayton713b8d32019-12-17 20:37:56 +0000655 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500656
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400657 // TODO: assert?
Nicolas Capens157ba262019-12-10 17:49:14 -0500658 if(!elfHeader->checkMagic())
659 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400660 return {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500661 }
662
663 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000664 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
665#if defined(__i386__)
666 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
667#elif defined(__x86_64__)
668 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
669#elif defined(__arm__)
670 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
671#elif defined(__aarch64__)
672 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
673#elif defined(__mips__)
674 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
675#else
676# error "Unsupported platform"
677#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500678
Ben Clayton713b8d32019-12-17 20:37:56 +0000679 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500680
681 for(int i = 0; i < elfHeader->e_shnum; i++)
682 {
683 if(sectionHeader[i].sh_type == SHT_PROGBITS)
684 {
685 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
686 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400687 auto findSectionNameEntryIndex = [&]() -> size_t {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500688 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400689 const char *sectionName = reinterpret_cast<const char *>(elfImage + sectionNameOffset);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500690
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400691 for(size_t j = 0; j < functionNames.size(); ++j)
692 {
693 if(strstr(sectionName, functionNames[j]) != nullptr)
694 {
695 return j;
696 }
697 }
698
699 UNREACHABLE("Failed to find executable section that matches input function names");
700 return static_cast<size_t>(-1);
701 };
702
703 size_t index = findSectionNameEntryIndex();
704 entryPoints[index].entry = elfImage + sectionHeader[i].sh_offset;
705 entryPoints[index].codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400706 }
707 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500708 else if(sectionHeader[i].sh_type == SHT_REL)
709 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000710 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400711
Nicolas Capens157ba262019-12-10 17:49:14 -0500712 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
713 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000714 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500715 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
716 }
717 }
718 else if(sectionHeader[i].sh_type == SHT_RELA)
719 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000720 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500721
722 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
723 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000724 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500725 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
726 }
727 }
728 }
729
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400730 return entryPoints;
Nicolas Capens157ba262019-12-10 17:49:14 -0500731}
732
733template<typename T>
734struct ExecutableAllocator
735{
736 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000737 template<class U>
738 ExecutableAllocator(const ExecutableAllocator<U> &other)
739 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500740
741 using value_type = T;
742 using size_type = std::size_t;
743
744 T *allocate(size_type n)
745 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000746 return (T *)allocateMemoryPages(
747 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500748 }
749
750 void deallocate(T *p, size_type n)
751 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800752 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500753 }
754};
755
756class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
757{
758 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
759 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
760
761public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000762 ELFMemoryStreamer()
763 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500764 {
765 position = 0;
766 buffer.reserve(0x1000);
767 }
768
769 ~ELFMemoryStreamer() override
770 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500771 }
772
773 void write8(uint8_t Value) override
774 {
775 if(position == (uint64_t)buffer.size())
776 {
777 buffer.push_back(Value);
778 position++;
779 }
780 else if(position < (uint64_t)buffer.size())
781 {
782 buffer[position] = Value;
783 position++;
784 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000785 else
786 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500787 }
788
789 void writeBytes(llvm::StringRef Bytes) override
790 {
791 std::size_t oldSize = buffer.size();
792 buffer.resize(oldSize + Bytes.size());
793 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
794 position += Bytes.size();
795 }
796
797 uint64_t tell() const override { return position; }
798
799 void seek(uint64_t Off) override { position = Off; }
800
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400801 std::vector<EntryPoint> loadImageAndGetEntryPoints(const std::vector<const char *> &functionNames)
Nicolas Capens157ba262019-12-10 17:49:14 -0500802 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400803 auto entryPoints = loadImage(&buffer[0], functionNames);
Nicolas Capens157ba262019-12-10 17:49:14 -0500804
805#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500806 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
807#else
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400808 for(auto &entryPoint : entryPoints)
809 {
810 __builtin___clear_cache((char *)entryPoint.entry, (char *)entryPoint.entry + entryPoint.codeSize);
811 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500812#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500813
Antonio Maioranobc98fbe2020-03-17 15:46:22 -0400814 return entryPoints;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400815 }
816
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500817 void finalize()
818 {
819 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
820
821 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
822 }
823
Ben Clayton713b8d32019-12-17 20:37:56 +0000824 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400825 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500826 ASSERT(func);
827 funcs[index] = func;
828 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400829
Nicolas Capens157ba262019-12-10 17:49:14 -0500830 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400831 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500832 ASSERT(funcs[index]);
833 return funcs[index];
834 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400835
Antonio Maiorano02a39532020-01-21 15:15:34 -0500836 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500837 {
Nicolas Capens4e75f452021-01-28 01:52:56 -0500838 // Check if we already have a suitable constant.
839 for(const auto &c : constantsPool)
840 {
841 void *ptr = c.data.get();
842 size_t space = c.space;
843
844 void *alignedPtr = std::align(alignment, size, ptr, space);
845
846 if(space < size)
847 {
848 continue;
849 }
850
851 if(memcmp(data, alignedPtr, size) == 0)
852 {
853 return alignedPtr;
854 }
855 }
856
Antonio Maiorano02a39532020-01-21 15:15:34 -0500857 // TODO(b/148086935): Replace with a buffer allocator.
858 size_t space = size + alignment;
859 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
860 void *ptr = buf.get();
861 void *alignedPtr = std::align(alignment, size, ptr, space);
862 ASSERT(alignedPtr);
863 memcpy(alignedPtr, data, size);
Nicolas Capens4e75f452021-01-28 01:52:56 -0500864 constantsPool.emplace_back(std::move(buf), space);
865
Antonio Maiorano02a39532020-01-21 15:15:34 -0500866 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500867 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400868
Nicolas Capens157ba262019-12-10 17:49:14 -0500869private:
Nicolas Capens4e75f452021-01-28 01:52:56 -0500870 struct Constant
871 {
872 Constant(std::unique_ptr<uint8_t[]> data, size_t space)
873 : data(std::move(data))
874 , space(space)
875 {}
876
877 std::unique_ptr<uint8_t[]> data;
878 size_t space;
879 };
880
Ben Clayton713b8d32019-12-17 20:37:56 +0000881 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500882 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
883 std::size_t position;
Nicolas Capens4e75f452021-01-28 01:52:56 -0500884 std::vector<Constant> constantsPool;
Nicolas Capens157ba262019-12-10 17:49:14 -0500885};
886
Antonio Maiorano62427e02020-02-13 09:18:05 -0500887#ifdef ENABLE_RR_PRINT
888void VPrintf(const std::vector<Value *> &vals)
889{
Antonio Maiorano8cbee412020-06-10 15:59:20 -0400890 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(rr::DebugPrintf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500891}
892#endif // ENABLE_RR_PRINT
893
Nicolas Capens157ba262019-12-10 17:49:14 -0500894Nucleus::Nucleus()
895{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400896 ::codegenMutex.lock(); // SubzeroReactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500897
898 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
899 Ice::ClFlags::getParsedClFlags(Flags);
900
Ben Clayton713b8d32019-12-17 20:37:56 +0000901#if defined(__arm__)
902 Flags.setTargetArch(Ice::Target_ARM32);
903 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
904#elif defined(__mips__)
905 Flags.setTargetArch(Ice::Target_MIPS32);
906 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
907#else // x86
908 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
909 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
910#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500911 Flags.setOutFileType(Ice::FT_Elf);
912 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
913 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
914 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
915 Flags.setDisableHybridAssembly(true);
916
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500917 // Emit functions into separate sections in the ELF so we can find them by name
918 Flags.setFunctionSections(true);
919
Nicolas Capens157ba262019-12-10 17:49:14 -0500920 static llvm::raw_os_ostream cout(std::cout);
921 static llvm::raw_os_ostream cerr(std::cerr);
922
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500923 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500924 {
925 // Decorate text asm with liveness info
926 Flags.setDecorateAsm(true);
927 }
928
Ben Clayton713b8d32019-12-17 20:37:56 +0000929 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500930 {
931 std::error_code errorCode;
932 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
933 ::elfFile = new Ice::ELFFileStreamer(*out);
934 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
935 }
936 else
937 {
938 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
939 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
940 ::routine = elfMemory;
941 }
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400942
Nicolas Capens00c30ce2020-10-29 09:17:25 -0400943#if !__has_feature(memory_sanitizer)
944 // thread_local variables in shared libraries are initialized at load-time,
945 // but this is not observed by MemorySanitizer if the loader itself was not
946 // instrumented, leading to false-positive unitialized variable errors.
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400947 ASSERT(Variable::unmaterializedVariables == nullptr);
Nicolas Capens46485a02020-06-17 01:31:10 -0400948#endif
Antonio Maioranof14f6c42020-11-03 16:34:35 -0500949 Variable::unmaterializedVariables = new Variable::UnmaterializedVariables{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500950}
951
952Nucleus::~Nucleus()
953{
Nicolas Capens7d6b5912020-04-28 15:57:57 -0400954 delete Variable::unmaterializedVariables;
955 Variable::unmaterializedVariables = nullptr;
956
Nicolas Capens157ba262019-12-10 17:49:14 -0500957 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500958 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500959
960 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500961 ::allocator = nullptr;
962
Nicolas Capens157ba262019-12-10 17:49:14 -0500963 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500964 ::function = nullptr;
965
Nicolas Capens157ba262019-12-10 17:49:14 -0500966 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500967 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500968
969 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500970 ::elfFile = nullptr;
971
Nicolas Capens157ba262019-12-10 17:49:14 -0500972 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500973 ::out = nullptr;
974
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400975 ::entryBlock = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500976 ::basicBlock = nullptr;
Antonio Maiorano22d73d12020-03-20 00:13:28 -0400977 ::basicBlockTop = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500978
979 ::codegenMutex.unlock();
980}
981
982void Nucleus::setDefaultConfig(const Config &cfg)
983{
984 std::unique_lock<std::mutex> lock(::defaultConfigLock);
985 ::defaultConfig() = cfg;
986}
987
988void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
989{
990 std::unique_lock<std::mutex> lock(::defaultConfigLock);
991 auto &config = ::defaultConfig();
992 config = cfgEdit.apply(config);
993}
994
995Config Nucleus::getDefaultConfig()
996{
997 std::unique_lock<std::mutex> lock(::defaultConfigLock);
998 return ::defaultConfig();
999}
1000
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001001// This function lowers and produces executable binary code in memory for the input functions,
1002// and returns a Routine with the entry points to these functions.
1003template<size_t Count>
1004static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -05001005{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001006 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
1007 // and GlobalContext::emitItems.
1008
Nicolas Capens81bc9d92019-12-16 15:05:57 -05001009 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -05001010 {
1011 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001012 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -05001013 }
1014
1015 ::context->emitFileHeader();
1016
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001017 // Translate
1018
1019 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -05001020 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001021 Ice::Cfg *currFunc = functions[i];
1022
1023 // Install function allocator in TLS for Cfg-specific container allocators
1024 Ice::CfgLocalAllocatorScope allocScope(currFunc);
1025
1026 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
1027
1028 rr::optimize(currFunc);
1029
1030 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001031 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001032
1033 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -05001034 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001035
1036 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
1037
1038 if(subzeroEmitTextAsm)
1039 {
1040 currFunc->emit();
1041 }
1042
1043 currFunc->emitIAS();
Nicolas Capensff010f92021-02-01 12:22:53 -05001044
1045 if(currFunc->hasError())
1046 {
1047 return nullptr;
1048 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001049 }
1050
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001051 // Emit items
1052
1053 ::context->lowerGlobals("");
1054
Nicolas Capens157ba262019-12-10 17:49:14 -05001055 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001056
1057 for(size_t i = 0; i < Count; ++i)
1058 {
1059 Ice::Cfg *currFunc = functions[i];
1060
1061 // Accumulate globals from functions to emit into the "last" section at the end
1062 auto globals = currFunc->getGlobalInits();
1063 if(globals && !globals->empty())
1064 {
1065 ::context->getGlobals()->merge(globals.get());
1066 }
1067
1068 auto assembler = currFunc->releaseAssembler();
1069 assembler->alignFunction();
1070 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
1071 }
1072
Nicolas Capens157ba262019-12-10 17:49:14 -05001073 ::context->lowerGlobals("last");
1074 ::context->lowerConstants();
1075 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001076
Nicolas Capens157ba262019-12-10 17:49:14 -05001077 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001078 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -05001079 objectWriter->writeNonUserSections();
1080
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001081 // Done compiling functions, get entry pointers to each of them
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001082 auto entryPoints = ::routine->loadImageAndGetEntryPoints({ names, names + Count });
1083 ASSERT(entryPoints.size() == Count);
1084 for(size_t i = 0; i < entryPoints.size(); ++i)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001085 {
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04001086 ::routine->setEntry(i, entryPoints[i].entry);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001087 }
1088
1089 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001090
1091 Routine *handoffRoutine = ::routine;
1092 ::routine = nullptr;
1093
1094 return std::shared_ptr<Routine>(handoffRoutine);
1095}
1096
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001097std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1098{
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001099 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001100 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1101}
1102
Nicolas Capens157ba262019-12-10 17:49:14 -05001103Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1104{
1105 Ice::Type type = T(t);
1106 int typeSize = Ice::typeWidthInBytes(type);
1107 int totalSize = typeSize * (arraySize ? arraySize : 1);
1108
1109 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1110 auto address = ::function->makeVariable(T(getPointerType(t)));
1111 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1112 ::function->getEntryNode()->getInsts().push_front(alloca);
1113
1114 return V(address);
1115}
1116
1117BasicBlock *Nucleus::createBasicBlock()
1118{
1119 return B(::function->makeNode());
1120}
1121
1122BasicBlock *Nucleus::getInsertBlock()
1123{
1124 return B(::basicBlock);
1125}
1126
1127void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1128{
Ben Clayton713b8d32019-12-17 20:37:56 +00001129 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001130
1131 Variable::materializeAll();
1132
1133 ::basicBlock = basicBlock;
1134}
1135
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001136void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001137{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001138 ASSERT(::function == nullptr);
1139 ASSERT(::allocator == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001140 ASSERT(::entryBlock == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001141 ASSERT(::basicBlock == nullptr);
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001142 ASSERT(::basicBlockTop == nullptr);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001143
1144 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1145
1146 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1147 // becomes invalid if another one is created; for example, when creating await and destroy functions
1148 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1149 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001150 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1151
Antonio Maiorano22d73d12020-03-20 00:13:28 -04001152 ::entryBlock = ::function->getEntryNode();
1153 ::basicBlock = ::function->makeNode();
1154 ::basicBlockTop = ::basicBlock;
Nicolas Capens157ba262019-12-10 17:49:14 -05001155}
1156
1157Value *Nucleus::getArgument(unsigned int index)
1158{
1159 return V(::function->getArgs()[index]);
1160}
1161
1162void Nucleus::createRetVoid()
1163{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001164 RR_DEBUG_INFO_UPDATE_LOC();
1165
Nicolas Capens157ba262019-12-10 17:49:14 -05001166 // Code generated after this point is unreachable, so any variables
1167 // being read can safely return an undefined value. We have to avoid
1168 // materializing variables after the terminator ret instruction.
1169 Variable::killUnmaterialized();
1170
1171 Ice::InstRet *ret = Ice::InstRet::create(::function);
1172 ::basicBlock->appendInst(ret);
1173}
1174
1175void Nucleus::createRet(Value *v)
1176{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001177 RR_DEBUG_INFO_UPDATE_LOC();
1178
Nicolas Capens157ba262019-12-10 17:49:14 -05001179 // Code generated after this point is unreachable, so any variables
1180 // being read can safely return an undefined value. We have to avoid
1181 // materializing variables after the terminator ret instruction.
1182 Variable::killUnmaterialized();
1183
1184 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1185 ::basicBlock->appendInst(ret);
1186}
1187
1188void Nucleus::createBr(BasicBlock *dest)
1189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001190 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001191 Variable::materializeAll();
1192
1193 auto br = Ice::InstBr::create(::function, dest);
1194 ::basicBlock->appendInst(br);
1195}
1196
1197void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1198{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001199 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001200 Variable::materializeAll();
1201
1202 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1203 ::basicBlock->appendInst(br);
1204}
1205
1206static bool isCommutative(Ice::InstArithmetic::OpKind op)
1207{
1208 switch(op)
1209 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001210 case Ice::InstArithmetic::Add:
1211 case Ice::InstArithmetic::Fadd:
1212 case Ice::InstArithmetic::Mul:
1213 case Ice::InstArithmetic::Fmul:
1214 case Ice::InstArithmetic::And:
1215 case Ice::InstArithmetic::Or:
1216 case Ice::InstArithmetic::Xor:
1217 return true;
1218 default:
1219 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001220 }
1221}
1222
1223static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1224{
1225 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1226
1227 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1228
1229 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1230 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1231 ::basicBlock->appendInst(arithmetic);
1232
1233 return V(result);
1234}
1235
1236Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1237{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001238 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001239 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1240}
1241
1242Value *Nucleus::createSub(Value *lhs, Value *rhs)
1243{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001244 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001245 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1246}
1247
1248Value *Nucleus::createMul(Value *lhs, Value *rhs)
1249{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001250 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001251 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1252}
1253
1254Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1255{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001256 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001257 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1258}
1259
1260Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1261{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001262 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001263 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1264}
1265
1266Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001268 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001269 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1270}
1271
1272Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1273{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001274 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001275 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1276}
1277
1278Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1279{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001280 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001281 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1282}
1283
1284Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1285{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001286 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001287 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1288}
1289
1290Value *Nucleus::createURem(Value *lhs, Value *rhs)
1291{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001292 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001293 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1294}
1295
1296Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1297{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001298 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001299 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1300}
1301
1302Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1303{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001304 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001305 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1306 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001307 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001308 return nullptr;
1309}
1310
1311RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1312{
1313 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001314}
1315
1316Value *Nucleus::createShl(Value *lhs, Value *rhs)
1317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001319 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1320}
1321
1322Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001325 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1326}
1327
1328Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001331 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1332}
1333
1334Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001337 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1338}
1339
1340Value *Nucleus::createOr(Value *lhs, Value *rhs)
1341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001343 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1344}
1345
1346Value *Nucleus::createXor(Value *lhs, Value *rhs)
1347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001348 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001349 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1350}
1351
1352Value *Nucleus::createNeg(Value *v)
1353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001354 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001355 return createSub(createNullValue(T(v->getType())), v);
1356}
1357
1358Value *Nucleus::createFNeg(Value *v)
1359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001360 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001361 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1362 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001363
1364 return createFSub(negativeZero, v);
1365}
1366
1367Value *Nucleus::createNot(Value *v)
1368{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001369 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001370 if(Ice::isScalarIntegerType(v->getType()))
1371 {
1372 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1373 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001374 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001375 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001376 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001377 return createXor(v, createConstantVector(c, T(v->getType())));
1378 }
1379}
1380
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001381static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1382{
1383#if defined(__i386__) || defined(__x86_64__)
1384 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1385 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1386 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1387#else
1388 if(atomic)
1389 {
1390 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1391 }
1392 if(memoryOrder != std::memory_order_relaxed)
1393 {
1394 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1395 }
1396#endif
1397
1398 // Vulkan doesn't allow sequential memory order
1399 ASSERT(memoryOrder != std::memory_order_seq_cst);
1400}
1401
Nicolas Capens157ba262019-12-10 17:49:14 -05001402Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1403{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001404 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001405 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001406
1407 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001408 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001409
Ben Clayton713b8d32019-12-17 20:37:56 +00001410 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001411 {
1412 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001413 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001414 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001415 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001416 auto pointer = RValue<Pointer<Byte>>(ptr);
1417 Int x = *Pointer<Int>(pointer);
1418
1419 Int4 vector;
1420 vector = Insert(vector, x, 0);
1421
Antonio Maiorano02a39532020-01-21 15:15:34 -05001422 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001423 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1424 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001425 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001426 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001427 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001428 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001429 auto pointer = RValue<Pointer<Byte>>(ptr);
1430 Int x = *Pointer<Int>(pointer);
1431 Int y = *Pointer<Int>(pointer + 4);
1432
1433 Int4 vector;
1434 vector = Insert(vector, x, 0);
1435 vector = Insert(vector, y, 1);
1436
Antonio Maiorano02a39532020-01-21 15:15:34 -05001437 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001438 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1439 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001440 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001441 else
1442 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001443 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001444 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001445 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001446 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001447 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001448 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001449 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1450 load->addArg(ptr);
1451 load->addArg(::context->getConstantInt32(typeSize(type)));
1452 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001453 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001454 }
1455 else
1456 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001457 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001458 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001459
Antonio Maiorano02a39532020-01-21 15:15:34 -05001460 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001461 return V(result);
1462}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001463
Nicolas Capens157ba262019-12-10 17:49:14 -05001464Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1465{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001466 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001467 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001468
Ben Clayton713b8d32019-12-17 20:37:56 +00001469#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001470 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001471 if(align != 0)
1472 {
1473 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1474 call->addArg(ptr);
1475 call->addArg(::context->getConstantInt64(typeSize(type)));
1476 ::basicBlock->appendInst(call);
1477 }
1478#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001479
Nicolas Capens157ba262019-12-10 17:49:14 -05001480 int valueType = (int)reinterpret_cast<intptr_t>(type);
1481
Ben Clayton713b8d32019-12-17 20:37:56 +00001482 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001483 {
1484 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001485 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001486 if(typeSize(type) == 4)
1487 {
1488 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1489 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1490 ::basicBlock->appendInst(bitcast);
1491
1492 RValue<Int4> v(V(vector));
1493
1494 auto pointer = RValue<Pointer<Byte>>(ptr);
1495 Int x = Extract(v, 0);
1496 *Pointer<Int>(pointer) = x;
1497 }
1498 else if(typeSize(type) == 8)
1499 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001500 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001501 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1502 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1503 ::basicBlock->appendInst(bitcast);
1504
1505 RValue<Int4> v(V(vector));
1506
1507 auto pointer = RValue<Pointer<Byte>>(ptr);
1508 Int x = Extract(v, 0);
1509 *Pointer<Int>(pointer) = x;
1510 Int y = Extract(v, 1);
1511 *Pointer<Int>(pointer + 4) = y;
1512 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001513 else
1514 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001515 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001516 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001517 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001518 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001519 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1520 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1521 store->addArg(value);
1522 store->addArg(ptr);
1523 store->addArg(::context->getConstantInt32(typeSize(type)));
1524 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001525 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001526 }
1527 else
1528 {
1529 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001530
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001531 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001532 ::basicBlock->appendInst(store);
1533 }
1534
1535 return value;
1536}
1537
1538Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1539{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001540 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001541 ASSERT(index->getType() == Ice::IceType_i32);
1542
1543 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1544 {
1545 int32_t offset = constant->getValue() * (int)typeSize(type);
1546
1547 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001548 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001549 return ptr;
1550 }
1551
Nicolas Capens157ba262019-12-10 17:49:14 -05001552 return createAdd(ptr, createConstantInt(offset));
1553 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001554
Nicolas Capens157ba262019-12-10 17:49:14 -05001555 if(!Ice::isByteSizedType(T(type)))
1556 {
1557 index = createMul(index, createConstantInt((int)typeSize(type)));
1558 }
1559
Ben Clayton713b8d32019-12-17 20:37:56 +00001560 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001561 {
1562 if(unsignedIndex)
1563 {
1564 index = createZExt(index, T(Ice::IceType_i64));
1565 }
1566 else
1567 {
1568 index = createSExt(index, T(Ice::IceType_i64));
1569 }
1570 }
1571
1572 return createAdd(ptr, index);
1573}
1574
Antonio Maiorano370cba52019-12-31 11:36:07 -05001575static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1576{
1577 Ice::Variable *result = ::function->makeVariable(value->getType());
1578
1579 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1580 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1581 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1582 auto op = ::context->getConstantInt32(rmwOp);
1583 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1584 inst->addArg(op);
1585 inst->addArg(ptr);
1586 inst->addArg(value);
1587 inst->addArg(order);
1588 ::basicBlock->appendInst(inst);
1589
1590 return V(result);
1591}
1592
Nicolas Capens157ba262019-12-10 17:49:14 -05001593Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1594{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001595 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001596 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001597}
1598
1599Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1600{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001601 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001602 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001603}
1604
1605Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1606{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001607 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001608 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001609}
1610
1611Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1612{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001613 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001614 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001615}
1616
1617Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1618{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001619 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001620 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001621}
1622
1623Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1624{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001625 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001626 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001627}
1628
1629Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1630{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001631 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001632 Ice::Variable *result = ::function->makeVariable(value->getType());
1633
1634 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1635 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1636 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1637 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1638 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1639 inst->addArg(ptr);
1640 inst->addArg(compare);
1641 inst->addArg(value);
1642 inst->addArg(orderEq);
1643 inst->addArg(orderNeq);
1644 ::basicBlock->appendInst(inst);
1645
1646 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001647}
1648
1649static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1650{
1651 if(v->getType() == T(destType))
1652 {
1653 return v;
1654 }
1655
1656 Ice::Variable *result = ::function->makeVariable(T(destType));
1657 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1658 ::basicBlock->appendInst(cast);
1659
1660 return V(result);
1661}
1662
1663Value *Nucleus::createTrunc(Value *v, Type *destType)
1664{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001665 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001666 return createCast(Ice::InstCast::Trunc, v, destType);
1667}
1668
1669Value *Nucleus::createZExt(Value *v, Type *destType)
1670{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001671 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001672 return createCast(Ice::InstCast::Zext, v, destType);
1673}
1674
1675Value *Nucleus::createSExt(Value *v, Type *destType)
1676{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001677 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001678 return createCast(Ice::InstCast::Sext, v, destType);
1679}
1680
1681Value *Nucleus::createFPToUI(Value *v, Type *destType)
1682{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001683 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001684 return createCast(Ice::InstCast::Fptoui, v, destType);
1685}
1686
1687Value *Nucleus::createFPToSI(Value *v, Type *destType)
1688{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001689 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001690 return createCast(Ice::InstCast::Fptosi, v, destType);
1691}
1692
1693Value *Nucleus::createSIToFP(Value *v, Type *destType)
1694{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001695 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001696 return createCast(Ice::InstCast::Sitofp, v, destType);
1697}
1698
1699Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1700{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001701 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001702 return createCast(Ice::InstCast::Fptrunc, v, destType);
1703}
1704
1705Value *Nucleus::createFPExt(Value *v, Type *destType)
1706{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001707 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001708 return createCast(Ice::InstCast::Fpext, v, destType);
1709}
1710
1711Value *Nucleus::createBitCast(Value *v, Type *destType)
1712{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001713 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001714 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1715 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1716 // emulate them by writing to the stack and reading back as the destination type.
1717 if(emulateMismatchedBitCast)
1718 {
1719 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1720 {
1721 Value *address = allocateStackVariable(destType);
1722 createStore(v, address, T(v->getType()));
1723 return createLoad(address, destType);
1724 }
1725 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1726 {
1727 Value *address = allocateStackVariable(T(v->getType()));
1728 createStore(v, address, T(v->getType()));
1729 return createLoad(address, destType);
1730 }
1731 }
1732
1733 return createCast(Ice::InstCast::Bitcast, v, destType);
1734}
1735
1736static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1737{
1738 ASSERT(lhs->getType() == rhs->getType());
1739
1740 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1741 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1742 ::basicBlock->appendInst(cmp);
1743
1744 return V(result);
1745}
1746
1747Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1748{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001749 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001750 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1751}
1752
1753Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1754{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001755 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001756 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1757}
1758
1759Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1760{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001761 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001762 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1763}
1764
1765Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1766{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001767 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001768 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1769}
1770
1771Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1772{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001773 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001774 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1775}
1776
1777Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001780 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1781}
1782
1783Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1784{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001785 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001786 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1787}
1788
1789Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1790{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001791 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001792 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1793}
1794
1795Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001798 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1799}
1800
1801Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1802{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001803 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001804 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1805}
1806
1807Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1808{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001809 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001810 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1811}
1812
1813static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1814{
1815 ASSERT(lhs->getType() == rhs->getType());
1816 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1817
1818 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1819 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1820 ::basicBlock->appendInst(cmp);
1821
1822 return V(result);
1823}
1824
1825Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1826{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001827 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001828 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1829}
1830
1831Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1832{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001833 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001834 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1835}
1836
1837Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001840 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1841}
1842
1843Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001846 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1847}
1848
1849Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001852 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1853}
1854
1855Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1856{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001857 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001858 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1859}
1860
1861Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1862{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001863 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001864 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1865}
1866
1867Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1868{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001869 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001870 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1871}
1872
1873Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1874{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001875 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001876 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1877}
1878
1879Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1880{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001881 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001882 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1883}
1884
1885Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1886{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001887 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001888 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1889}
1890
1891Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1892{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001893 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001894 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1895}
1896
1897Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1898{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001899 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001900 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1901}
1902
1903Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1904{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001905 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001906 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1907}
1908
1909Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1910{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001911 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001912 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001913 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001914 ::basicBlock->appendInst(extract);
1915
1916 return V(result);
1917}
1918
1919Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1920{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001921 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001922 auto result = ::function->makeVariable(vector->getType());
1923 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1924 ::basicBlock->appendInst(insert);
1925
1926 return V(result);
1927}
1928
1929Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001932 ASSERT(V1->getType() == V2->getType());
1933
1934 int size = Ice::typeNumElements(V1->getType());
1935 auto result = ::function->makeVariable(V1->getType());
1936 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1937
1938 for(int i = 0; i < size; i++)
1939 {
1940 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1941 }
1942
1943 ::basicBlock->appendInst(shuffle);
1944
1945 return V(result);
1946}
1947
1948Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1949{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001950 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001951 ASSERT(ifTrue->getType() == ifFalse->getType());
1952
1953 auto result = ::function->makeVariable(ifTrue->getType());
1954 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1955 ::basicBlock->appendInst(select);
1956
1957 return V(result);
1958}
1959
1960SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1961{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001962 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001963 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1964 ::basicBlock->appendInst(switchInst);
1965
Ben Clayton713b8d32019-12-17 20:37:56 +00001966 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001967}
1968
1969void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1970{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001971 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001972 switchCases->addBranch(label, label, branch);
1973}
1974
1975void Nucleus::createUnreachable()
1976{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001977 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001978 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1979 ::basicBlock->appendInst(unreachable);
1980}
1981
Antonio Maiorano62427e02020-02-13 09:18:05 -05001982Type *Nucleus::getType(Value *value)
1983{
1984 return T(V(value)->getType());
1985}
1986
1987Type *Nucleus::getContainedType(Type *vectorType)
1988{
1989 Ice::Type vecTy = T(vectorType);
1990 switch(vecTy)
1991 {
1992 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1993 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1994 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1995 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1996 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1997 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1998 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1999 default:
2000 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
2001 return {};
2002 }
2003}
2004
Nicolas Capens157ba262019-12-10 17:49:14 -05002005Type *Nucleus::getPointerType(Type *ElementType)
2006{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05002007 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05002008}
2009
Antonio Maiorano62427e02020-02-13 09:18:05 -05002010static constexpr Ice::Type getNaturalIntType()
2011{
2012 constexpr size_t intSize = sizeof(int);
2013 static_assert(intSize == 4 || intSize == 8, "");
2014 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
2015}
2016
2017Type *Nucleus::getPrintfStorageType(Type *valueType)
2018{
2019 Ice::Type valueTy = T(valueType);
2020 switch(valueTy)
2021 {
2022 case Ice::IceType_i32:
2023 return T(getNaturalIntType());
2024
2025 case Ice::IceType_f32:
2026 return T(Ice::IceType_f64);
2027
2028 default:
2029 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
2030 return {};
2031 }
2032}
2033
Nicolas Capens157ba262019-12-10 17:49:14 -05002034Value *Nucleus::createNullValue(Type *Ty)
2035{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002036 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002037 if(Ice::isVectorType(T(Ty)))
2038 {
2039 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002040 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002041 return createConstantVector(c, Ty);
2042 }
2043 else
2044 {
2045 return V(::context->getConstantZero(T(Ty)));
2046 }
2047}
2048
2049Value *Nucleus::createConstantLong(int64_t i)
2050{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002051 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002052 return V(::context->getConstantInt64(i));
2053}
2054
2055Value *Nucleus::createConstantInt(int i)
2056{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002057 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002058 return V(::context->getConstantInt32(i));
2059}
2060
2061Value *Nucleus::createConstantInt(unsigned int i)
2062{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002063 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002064 return V(::context->getConstantInt32(i));
2065}
2066
2067Value *Nucleus::createConstantBool(bool b)
2068{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002069 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002070 return V(::context->getConstantInt1(b));
2071}
2072
2073Value *Nucleus::createConstantByte(signed char i)
2074{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002075 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002076 return V(::context->getConstantInt8(i));
2077}
2078
2079Value *Nucleus::createConstantByte(unsigned char i)
2080{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002081 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002082 return V(::context->getConstantInt8(i));
2083}
2084
2085Value *Nucleus::createConstantShort(short i)
2086{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002087 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002088 return V(::context->getConstantInt16(i));
2089}
2090
2091Value *Nucleus::createConstantShort(unsigned short i)
2092{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002093 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002094 return V(::context->getConstantInt16(i));
2095}
2096
2097Value *Nucleus::createConstantFloat(float x)
2098{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002099 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002100 return V(::context->getConstantFloat(x));
2101}
2102
2103Value *Nucleus::createNullPointer(Type *Ty)
2104{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002105 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002106 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002107}
2108
Antonio Maiorano02a39532020-01-21 15:15:34 -05002109static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2110{
2111 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2112}
2113
Nicolas Capens157ba262019-12-10 17:49:14 -05002114Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2115{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002116 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002117 const int vectorSize = 16;
2118 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2119 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002120
2121 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002122 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002123
Antonio Maioranoa0957112020-03-04 15:06:19 -05002124 // TODO(b/148082873): Fix global variable constants when generating multiple functions
Antonio Maiorano02a39532020-01-21 15:15:34 -05002125 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002126
2127 switch((int)reinterpret_cast<intptr_t>(type))
2128 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002129 case Ice::IceType_v4i32:
2130 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002131 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002132 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002133 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002134 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002135 }
2136 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002137 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002138 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002139 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002140 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002141 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002142 }
2143 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002144 case Ice::IceType_v8i16:
2145 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002146 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002147 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002148 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002149 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002150 }
2151 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002152 case Ice::IceType_v16i8:
2153 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002154 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002155 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002156 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002157 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002158 }
2159 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002160 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002161 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002162 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002163 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002164 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002165 }
2166 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002167 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002168 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002169 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002170 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002171 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002172 }
2173 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002174 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002175 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002176 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002177 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002178 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002179 }
2180 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002181 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002182 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002183 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002184 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002185 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002186 }
2187 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002188 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002189 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002190 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002191 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002192 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002193 }
2194 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002195 default:
2196 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002197 }
2198
Antonio Maiorano02a39532020-01-21 15:15:34 -05002199 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002200
Antonio Maiorano02a39532020-01-21 15:15:34 -05002201 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002202 return V(result);
2203}
2204
2205Value *Nucleus::createConstantVector(const double *constants, Type *type)
2206{
Ben Clayton713b8d32019-12-17 20:37:56 +00002207 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002208}
2209
Antonio Maiorano62427e02020-02-13 09:18:05 -05002210Value *Nucleus::createConstantString(const char *v)
2211{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002212 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002213 return V(IceConstantData(v, strlen(v) + 1));
2214}
2215
Nicolas Capens519cf222020-05-08 15:27:19 -04002216Type *Void::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002217{
2218 return T(Ice::IceType_void);
2219}
2220
Nicolas Capens519cf222020-05-08 15:27:19 -04002221Type *Bool::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002222{
2223 return T(Ice::IceType_i1);
2224}
2225
Nicolas Capens519cf222020-05-08 15:27:19 -04002226Type *Byte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002227{
2228 return T(Ice::IceType_i8);
2229}
2230
Nicolas Capens519cf222020-05-08 15:27:19 -04002231Type *SByte::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002232{
2233 return T(Ice::IceType_i8);
2234}
2235
Nicolas Capens519cf222020-05-08 15:27:19 -04002236Type *Short::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002237{
2238 return T(Ice::IceType_i16);
2239}
2240
Nicolas Capens519cf222020-05-08 15:27:19 -04002241Type *UShort::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002242{
2243 return T(Ice::IceType_i16);
2244}
2245
Nicolas Capens519cf222020-05-08 15:27:19 -04002246Type *Byte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002247{
2248 return T(Type_v4i8);
2249}
2250
Nicolas Capens519cf222020-05-08 15:27:19 -04002251Type *SByte4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002252{
2253 return T(Type_v4i8);
2254}
2255
Ben Clayton713b8d32019-12-17 20:37:56 +00002256namespace {
2257RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002258{
Ben Clayton713b8d32019-12-17 20:37:56 +00002259 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002260}
2261
Ben Clayton713b8d32019-12-17 20:37:56 +00002262RValue<Byte> Extract(RValue<Byte8> val, int i)
2263{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002264 return RValue<Byte>(Nucleus::createExtractElement(val.value(), Byte::type(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002265}
2266
2267RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2268{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002269 return RValue<Byte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Ben Clayton713b8d32019-12-17 20:37:56 +00002270}
2271} // namespace
2272
Nicolas Capens157ba262019-12-10 17:49:14 -05002273RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2274{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002275 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002276 if(emulateIntrinsics)
2277 {
2278 Byte8 result;
2279 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2280 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2281 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2282 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2283 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2284 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2285 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2286 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2287
2288 return result;
2289 }
2290 else
2291 {
2292 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002293 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002294 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2295 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002296 paddusb->addArg(x.value());
2297 paddusb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002298 ::basicBlock->appendInst(paddusb);
2299
2300 return RValue<Byte8>(V(result));
2301 }
2302}
2303
2304RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002307 if(emulateIntrinsics)
2308 {
2309 Byte8 result;
2310 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2311 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2312 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2313 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2314 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2315 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2316 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2317 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2318
2319 return result;
2320 }
2321 else
2322 {
2323 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002324 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002325 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2326 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002327 psubusw->addArg(x.value());
2328 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002329 ::basicBlock->appendInst(psubusw);
2330
2331 return RValue<Byte8>(V(result));
2332 }
2333}
2334
2335RValue<SByte> Extract(RValue<SByte8> val, int i)
2336{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002337 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002338 return RValue<SByte>(Nucleus::createExtractElement(val.value(), SByte::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002339}
2340
2341RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2342{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002343 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002344 return RValue<SByte8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002345}
2346
2347RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2348{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002349 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002350 if(emulateIntrinsics)
2351 {
2352 SByte8 result;
2353 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2354 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2355 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2356 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2357 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2358 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2359 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2360 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2361
2362 return result;
2363 }
2364 else
2365 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002366#if defined(__i386__) || defined(__x86_64__)
2367 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2368 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2369 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002370
Ben Clayton713b8d32019-12-17 20:37:56 +00002371 return As<SByte8>(hi | lo);
2372#else
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002373 return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton713b8d32019-12-17 20:37:56 +00002374#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002375 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002376}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002377
Nicolas Capens157ba262019-12-10 17:49:14 -05002378RValue<Int> SignMask(RValue<Byte8> x)
2379{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002380 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002381 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002382 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002383 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2384 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002385 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002386 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002387 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002388 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002389 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002390 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2391 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002392 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002393 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002394
Nicolas Capens157ba262019-12-10 17:49:14 -05002395 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002396 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002397}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002398
2399// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2400// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002401// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value(), y.value()));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002402// }
2403
Nicolas Capens157ba262019-12-10 17:49:14 -05002404RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2405{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002406 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002407 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002408}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002409
Nicolas Capens519cf222020-05-08 15:27:19 -04002410Type *Byte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002411{
2412 return T(Type_v8i8);
2413}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002414
Nicolas Capens598f8d82016-09-26 15:09:10 -04002415// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2416// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002417// return RValue<SByte8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002418// }
2419
2420// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2421// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002422// return RValue<SByte8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002423// }
2424
Nicolas Capens157ba262019-12-10 17:49:14 -05002425RValue<SByte> SaturateSigned(RValue<Short> x)
2426{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002427 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002428 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2429}
2430
2431RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2432{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002433 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002434 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002435 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002436 SByte8 result;
2437 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2438 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2439 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2440 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2441 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2442 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2443 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2444 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002445
Nicolas Capens157ba262019-12-10 17:49:14 -05002446 return result;
2447 }
2448 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002449 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002450 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002451 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002452 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2453 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002454 paddsb->addArg(x.value());
2455 paddsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002456 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002457
Nicolas Capens157ba262019-12-10 17:49:14 -05002458 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002459 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002460}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002461
Nicolas Capens157ba262019-12-10 17:49:14 -05002462RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2463{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002464 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002465 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002466 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002467 SByte8 result;
2468 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2469 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2470 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2471 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2472 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2473 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2474 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2475 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002476
Nicolas Capens157ba262019-12-10 17:49:14 -05002477 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002478 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002479 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002480 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002481 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002482 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002483 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2484 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002485 psubsb->addArg(x.value());
2486 psubsb->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002487 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002488
Nicolas Capens157ba262019-12-10 17:49:14 -05002489 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002490 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002491}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002492
Nicolas Capens157ba262019-12-10 17:49:14 -05002493RValue<Int> SignMask(RValue<SByte8> x)
2494{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002495 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002496 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002497 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002498 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2499 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002500 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002501 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002502 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002503 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002504 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002505 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2506 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002507 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002508 ::basicBlock->appendInst(movmsk);
2509
2510 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002511 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002512}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002513
Nicolas Capens157ba262019-12-10 17:49:14 -05002514RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2515{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002516 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002517 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002518}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002519
Nicolas Capens157ba262019-12-10 17:49:14 -05002520RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2521{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002522 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002523 return RValue<Byte8>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002524}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002525
Nicolas Capens519cf222020-05-08 15:27:19 -04002526Type *SByte8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002527{
2528 return T(Type_v8i8);
2529}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002530
Nicolas Capens519cf222020-05-08 15:27:19 -04002531Type *Byte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002532{
2533 return T(Ice::IceType_v16i8);
2534}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002535
Nicolas Capens519cf222020-05-08 15:27:19 -04002536Type *SByte16::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002537{
2538 return T(Ice::IceType_v16i8);
2539}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002540
Nicolas Capens519cf222020-05-08 15:27:19 -04002541Type *Short2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002542{
2543 return T(Type_v2i16);
2544}
Nicolas Capensd4227962016-11-09 14:24:25 -05002545
Nicolas Capens519cf222020-05-08 15:27:19 -04002546Type *UShort2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002547{
2548 return T(Type_v2i16);
2549}
Nicolas Capensd4227962016-11-09 14:24:25 -05002550
Nicolas Capens157ba262019-12-10 17:49:14 -05002551Short4::Short4(RValue<Int4> cast)
2552{
Ben Clayton713b8d32019-12-17 20:37:56 +00002553 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002554 Value *short8 = Nucleus::createBitCast(cast.value(), Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002555 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2556
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002557 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value();
Nicolas Capens519cf222020-05-08 15:27:19 -04002558 Value *short4 = Nucleus::createBitCast(int2, Short4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05002559
2560 storeValue(short4);
2561}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002562
2563// Short4::Short4(RValue<Float> cast)
2564// {
2565// }
2566
Nicolas Capens157ba262019-12-10 17:49:14 -05002567Short4::Short4(RValue<Float4> cast)
2568{
Antonio Maioranoa0957112020-03-04 15:06:19 -05002569 // TODO(b/150791192): Generalize and optimize
2570 auto smin = std::numeric_limits<short>::min();
2571 auto smax = std::numeric_limits<short>::max();
2572 *this = Short4(Int4(Max(Min(cast, Float4(smax)), Float4(smin))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002573}
2574
2575RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2576{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002577 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002578 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002579 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002580 Short4 result;
2581 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2582 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2583 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2584 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002585
2586 return result;
2587 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002588 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002589 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002590 return RValue<Short4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002591 }
2592}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002593
Nicolas Capens157ba262019-12-10 17:49:14 -05002594RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2595{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002596 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002597 if(emulateIntrinsics)
2598 {
2599 Short4 result;
2600 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2601 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2602 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2603 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2604
2605 return result;
2606 }
2607 else
2608 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002609 return RValue<Short4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002610 }
2611}
2612
2613RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2614{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002615 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002616 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002617 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002618 ::basicBlock->appendInst(cmp);
2619
2620 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002621 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002622 ::basicBlock->appendInst(select);
2623
2624 return RValue<Short4>(V(result));
2625}
2626
2627RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2628{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002629 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002630 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002631 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002632 ::basicBlock->appendInst(cmp);
2633
2634 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002635 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002636 ::basicBlock->appendInst(select);
2637
2638 return RValue<Short4>(V(result));
2639}
2640
2641RValue<Short> SaturateSigned(RValue<Int> x)
2642{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002643 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002644 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2645}
2646
2647RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2648{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002649 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002650 if(emulateIntrinsics)
2651 {
2652 Short4 result;
2653 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2654 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2655 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2656 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2657
2658 return result;
2659 }
2660 else
2661 {
2662 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002663 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002664 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2665 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002666 paddsw->addArg(x.value());
2667 paddsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002668 ::basicBlock->appendInst(paddsw);
2669
2670 return RValue<Short4>(V(result));
2671 }
2672}
2673
2674RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2675{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002676 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002677 if(emulateIntrinsics)
2678 {
2679 Short4 result;
2680 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2681 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2682 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2683 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2684
2685 return result;
2686 }
2687 else
2688 {
2689 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002690 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002691 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2692 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002693 psubsw->addArg(x.value());
2694 psubsw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002695 ::basicBlock->appendInst(psubsw);
2696
2697 return RValue<Short4>(V(result));
2698 }
2699}
2700
2701RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2702{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002703 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002704 if(emulateIntrinsics)
2705 {
2706 Short4 result;
2707 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2708 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2709 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2710 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2711
2712 return result;
2713 }
2714 else
2715 {
2716 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002717 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002718 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2719 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002720 pmulhw->addArg(x.value());
2721 pmulhw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002722 ::basicBlock->appendInst(pmulhw);
2723
2724 return RValue<Short4>(V(result));
2725 }
2726}
2727
2728RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2729{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002730 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002731 if(emulateIntrinsics)
2732 {
2733 Int2 result;
2734 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2735 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2736
2737 return result;
2738 }
2739 else
2740 {
2741 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002742 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002743 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2744 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002745 pmaddwd->addArg(x.value());
2746 pmaddwd->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002747 ::basicBlock->appendInst(pmaddwd);
2748
2749 return As<Int2>(V(result));
2750 }
2751}
2752
2753RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2754{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002755 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002756 if(emulateIntrinsics)
2757 {
2758 SByte8 result;
2759 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2760 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2761 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2762 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2763 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2764 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2765 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2766 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2767
2768 return result;
2769 }
2770 else
2771 {
2772 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002773 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002774 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2775 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002776 pack->addArg(x.value());
2777 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002778 ::basicBlock->appendInst(pack);
2779
2780 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2781 }
2782}
2783
2784RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2785{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002786 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002787 if(emulateIntrinsics)
2788 {
2789 Byte8 result;
2790 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2791 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2792 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2793 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2794 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2795 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2796 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2797 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2798
2799 return result;
2800 }
2801 else
2802 {
2803 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002804 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002805 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2806 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002807 pack->addArg(x.value());
2808 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002809 ::basicBlock->appendInst(pack);
2810
2811 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2812 }
2813}
2814
2815RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2816{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002817 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002818 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002819}
2820
2821RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2822{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002823 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002824 return RValue<Short4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05002825}
2826
Nicolas Capens519cf222020-05-08 15:27:19 -04002827Type *Short4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05002828{
2829 return T(Type_v4i16);
2830}
2831
2832UShort4::UShort4(RValue<Float4> cast, bool saturate)
2833{
2834 if(saturate)
2835 {
2836 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002837 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002838 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2839 // PackUnsigned takes care of 0x0000 saturation.
2840 Int4 int4(Min(cast, Float4(0xFFFF)));
2841 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002842 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002843 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002844 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002845 // ARM saturates the 32-bit integer result on overflow/undeflow.
2846 Int4 int4(cast);
2847 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002848 }
2849 else
2850 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002851 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002852 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002853 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002854 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002855 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002856 *this = Short4(Int4(cast));
2857 }
2858}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002859
Nicolas Capens157ba262019-12-10 17:49:14 -05002860RValue<UShort> Extract(RValue<UShort4> val, int i)
2861{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002862 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002863}
2864
2865RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2866{
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002867 return RValue<UShort4>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05002868}
2869
2870RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2871{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002872 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002873 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002874
Nicolas Capens157ba262019-12-10 17:49:14 -05002875 {
2876 UShort4 result;
2877 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2878 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2879 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2880 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2881
2882 return result;
2883 }
2884 else
2885 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002886 return RValue<UShort4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002887 }
2888}
2889
2890RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2891{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002892 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002893 if(emulateIntrinsics)
2894 {
2895 UShort4 result;
2896 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2897 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2898 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2899 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2900
2901 return result;
2902 }
2903 else
2904 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002905 return RValue<UShort4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002906 }
2907}
2908
2909RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2910{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002911 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002912 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002913 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002914 ::basicBlock->appendInst(cmp);
2915
2916 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002917 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002918 ::basicBlock->appendInst(select);
2919
2920 return RValue<UShort4>(V(result));
2921}
2922
2923RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2924{
2925 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002926 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002927 ::basicBlock->appendInst(cmp);
2928
2929 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002930 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002931 ::basicBlock->appendInst(select);
2932
2933 return RValue<UShort4>(V(result));
2934}
2935
2936RValue<UShort> SaturateUnsigned(RValue<Int> x)
2937{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002938 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002939 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2940}
2941
2942RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2943{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002944 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002945 if(emulateIntrinsics)
2946 {
2947 UShort4 result;
2948 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2949 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2950 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2951 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2952
2953 return result;
2954 }
2955 else
2956 {
2957 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002958 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002959 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2960 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002961 paddusw->addArg(x.value());
2962 paddusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002963 ::basicBlock->appendInst(paddusw);
2964
2965 return RValue<UShort4>(V(result));
2966 }
2967}
2968
2969RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2970{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002971 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002972 if(emulateIntrinsics)
2973 {
2974 UShort4 result;
2975 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2976 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2977 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2978 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2979
2980 return result;
2981 }
2982 else
2983 {
2984 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002985 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002986 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2987 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04002988 psubusw->addArg(x.value());
2989 psubusw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05002990 ::basicBlock->appendInst(psubusw);
2991
2992 return RValue<UShort4>(V(result));
2993 }
2994}
2995
2996RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2997{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002998 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002999 if(emulateIntrinsics)
3000 {
3001 UShort4 result;
3002 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
3003 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
3004 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
3005 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
3006
3007 return result;
3008 }
3009 else
3010 {
3011 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003012 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003013 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3014 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003015 pmulhuw->addArg(x.value());
3016 pmulhuw->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003017 ::basicBlock->appendInst(pmulhuw);
3018
3019 return RValue<UShort4>(V(result));
3020 }
3021}
3022
3023RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
3024{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003025 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003026 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3027
3028 // Scalarized implementation.
3029 Int4 result;
3030 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
3031 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
3032 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
3033 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
3034
3035 return result;
3036}
3037
3038RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
3039{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003040 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003041 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
3042
3043 if(false) // Partial product based implementation.
3044 {
3045 auto xh = x >> 16;
3046 auto yh = y >> 16;
3047 auto xl = x & UInt4(0x0000FFFF);
3048 auto yl = y & UInt4(0x0000FFFF);
3049 auto xlyh = xl * yh;
3050 auto xhyl = xh * yl;
3051 auto xlyhh = xlyh >> 16;
3052 auto xhylh = xhyl >> 16;
3053 auto xlyhl = xlyh & UInt4(0x0000FFFF);
3054 auto xhyll = xhyl & UInt4(0x0000FFFF);
3055 auto xlylh = (xl * yl) >> 16;
3056 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
3057
3058 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003059 }
3060
Nicolas Capens157ba262019-12-10 17:49:14 -05003061 // Scalarized implementation.
3062 Int4 result;
3063 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
3064 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
3065 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
3066 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
3067
3068 return As<UInt4>(result);
3069}
3070
3071RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3072{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003073 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003074 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003075 return UShort4(0);
3076}
3077
Nicolas Capens519cf222020-05-08 15:27:19 -04003078Type *UShort4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003079{
3080 return T(Type_v4i16);
3081}
3082
3083RValue<Short> Extract(RValue<Short8> val, int i)
3084{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003085 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003086 return RValue<Short>(Nucleus::createExtractElement(val.value(), Short::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003087}
3088
3089RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3090{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003091 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003092 return RValue<Short8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003093}
3094
3095RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3096{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003097 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003098 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003099 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003100 Short8 result;
3101 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3102 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3103 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3104 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3105 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3106 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3107 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3108 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003109
Nicolas Capens157ba262019-12-10 17:49:14 -05003110 return result;
3111 }
3112 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003113 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003114 return RValue<Short8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003115 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003116}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003117
Nicolas Capens157ba262019-12-10 17:49:14 -05003118RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3119{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003120 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003121 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003122 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003123 Short8 result;
3124 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3125 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3126 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3127 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3128 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3129 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3130 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3131 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003132
Nicolas Capens157ba262019-12-10 17:49:14 -05003133 return result;
3134 }
3135 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003136 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003137 return RValue<Short8>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003138 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003139}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003140
Nicolas Capens157ba262019-12-10 17:49:14 -05003141RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3142{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003143 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003144 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003145 return Int4(0);
3146}
3147
3148RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3149{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003150 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003151 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003152 return Short8(0);
3153}
3154
Nicolas Capens519cf222020-05-08 15:27:19 -04003155Type *Short8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003156{
3157 return T(Ice::IceType_v8i16);
3158}
3159
3160RValue<UShort> Extract(RValue<UShort8> val, int i)
3161{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003162 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003163 return RValue<UShort>(Nucleus::createExtractElement(val.value(), UShort::type(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003164}
3165
3166RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3167{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003168 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003169 return RValue<UShort8>(Nucleus::createInsertElement(val.value(), element.value(), i));
Nicolas Capens157ba262019-12-10 17:49:14 -05003170}
3171
3172RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3173{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003174 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003175 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003176 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003177 UShort8 result;
3178 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3179 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3180 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3181 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3182 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3183 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3184 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3185 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003186
Nicolas Capens157ba262019-12-10 17:49:14 -05003187 return result;
3188 }
3189 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003190 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003191 return RValue<UShort8>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003192 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003193}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003194
Nicolas Capens157ba262019-12-10 17:49:14 -05003195RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3196{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003197 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003198 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003199 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003200 UShort8 result;
3201 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3202 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3203 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3204 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3205 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3206 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3207 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3208 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003209
Nicolas Capens157ba262019-12-10 17:49:14 -05003210 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003211 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003212 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003213 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003214 return RValue<UShort8>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003215 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003216}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003217
Nicolas Capens157ba262019-12-10 17:49:14 -05003218RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3219{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003220 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003221 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003222 return UShort8(0);
3223}
3224
Nicolas Capens519cf222020-05-08 15:27:19 -04003225Type *UShort8::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003226{
3227 return T(Ice::IceType_v8i16);
3228}
3229
Ben Clayton713b8d32019-12-17 20:37:56 +00003230RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003231{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003232 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003233 RValue<Int> res = val;
3234 val += 1;
3235 return res;
3236}
3237
Ben Clayton713b8d32019-12-17 20:37:56 +00003238const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003239{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003240 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003241 val += 1;
3242 return val;
3243}
3244
Ben Clayton713b8d32019-12-17 20:37:56 +00003245RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003246{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003247 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003248 RValue<Int> res = val;
3249 val -= 1;
3250 return res;
3251}
3252
Ben Clayton713b8d32019-12-17 20:37:56 +00003253const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003254{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003255 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003256 val -= 1;
3257 return val;
3258}
3259
3260RValue<Int> RoundInt(RValue<Float> cast)
3261{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003262 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003263 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003264 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003265 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3266 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003267 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003268 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003269 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003270 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003271 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003272 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3273 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003274 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003275 ::basicBlock->appendInst(nearbyint);
3276
3277 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003278 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003279}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003280
Nicolas Capens519cf222020-05-08 15:27:19 -04003281Type *Int::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003282{
3283 return T(Ice::IceType_i32);
3284}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003285
Nicolas Capens519cf222020-05-08 15:27:19 -04003286Type *Long::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003287{
3288 return T(Ice::IceType_i64);
3289}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003290
Nicolas Capens157ba262019-12-10 17:49:14 -05003291UInt::UInt(RValue<Float> cast)
3292{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003293 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003294 // Smallest positive value representable in UInt, but not in Int
3295 const unsigned int ustart = 0x80000000u;
3296 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003297
Nicolas Capens157ba262019-12-10 17:49:14 -05003298 // If the value is negative, store 0, otherwise store the result of the conversion
3299 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003300 // Check if the value can be represented as an Int
3301 IfThenElse(cast >= ustartf,
3302 // If the value is too large, subtract ustart and re-add it after conversion.
3303 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3304 // Otherwise, just convert normally
3305 Int(cast)))
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003306 .value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003307}
Nicolas Capensa8086512016-11-07 17:32:17 -05003308
Ben Clayton713b8d32019-12-17 20:37:56 +00003309RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003310{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003311 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003312 RValue<UInt> res = val;
3313 val += 1;
3314 return res;
3315}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003316
Ben Clayton713b8d32019-12-17 20:37:56 +00003317const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003318{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003319 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003320 val += 1;
3321 return val;
3322}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003323
Ben Clayton713b8d32019-12-17 20:37:56 +00003324RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003325{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003326 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003327 RValue<UInt> res = val;
3328 val -= 1;
3329 return res;
3330}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331
Ben Clayton713b8d32019-12-17 20:37:56 +00003332const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003333{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003334 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003335 val -= 1;
3336 return val;
3337}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003338
Nicolas Capens598f8d82016-09-26 15:09:10 -04003339// RValue<UInt> RoundUInt(RValue<Float> cast)
3340// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003341// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003342// }
3343
Nicolas Capens519cf222020-05-08 15:27:19 -04003344Type *UInt::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003345{
3346 return T(Ice::IceType_i32);
3347}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003348
3349// Int2::Int2(RValue<Int> cast)
3350// {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003351// Value *extend = Nucleus::createZExt(cast.value(), Long::type());
Nicolas Capens519cf222020-05-08 15:27:19 -04003352// Value *vector = Nucleus::createBitCast(extend, Int2::type());
Nicolas Capens598f8d82016-09-26 15:09:10 -04003353//
3354// Constant *shuffle[2];
3355// shuffle[0] = Nucleus::createConstantInt(0);
3356// shuffle[1] = Nucleus::createConstantInt(0);
3357//
Nicolas Capens519cf222020-05-08 15:27:19 -04003358// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::type()), Nucleus::createConstantVector(shuffle, 2));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003359//
3360// storeValue(replicate);
3361// }
3362
Nicolas Capens157ba262019-12-10 17:49:14 -05003363RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3364{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003365 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003366 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003367 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003368 Int2 result;
3369 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3370 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003371
Nicolas Capens157ba262019-12-10 17:49:14 -05003372 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003373 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003374 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003375 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003376 return RValue<Int2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003377 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003378}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003379
Nicolas Capens157ba262019-12-10 17:49:14 -05003380RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3381{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003382 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003383 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003384 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003385 Int2 result;
3386 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3387 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3388
3389 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003390 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003391 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003392 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003393 return RValue<Int2>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003394 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003395}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003396
Nicolas Capens519cf222020-05-08 15:27:19 -04003397Type *Int2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003398{
3399 return T(Type_v2i32);
3400}
3401
3402RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3403{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003404 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003405 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003406 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003407 UInt2 result;
3408 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3409 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003410
Nicolas Capens157ba262019-12-10 17:49:14 -05003411 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003412 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003413 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003414 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003415 return RValue<UInt2>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003416 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003417}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003418
Nicolas Capens157ba262019-12-10 17:49:14 -05003419RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3420{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003421 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003422 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003423 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003424 UInt2 result;
3425 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3426 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003427
Nicolas Capens157ba262019-12-10 17:49:14 -05003428 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003429 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003430 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003431 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003432 return RValue<UInt2>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003433 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003434}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003435
Nicolas Capens519cf222020-05-08 15:27:19 -04003436Type *UInt2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003437{
3438 return T(Type_v2i32);
3439}
3440
Ben Clayton713b8d32019-12-17 20:37:56 +00003441Int4::Int4(RValue<Byte4> cast)
3442 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003443{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003444 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003445 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003446 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3447
3448 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003449 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003450 Value *b = Nucleus::createBitCast(a, Byte16::type());
3451 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::type()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003452
Ben Clayton713b8d32019-12-17 20:37:56 +00003453 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003454 Value *d = Nucleus::createBitCast(c, Short8::type());
3455 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::type()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003456
Nicolas Capens519cf222020-05-08 15:27:19 -04003457 Value *f = Nucleus::createBitCast(e, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003458 storeValue(f);
3459}
3460
Ben Clayton713b8d32019-12-17 20:37:56 +00003461Int4::Int4(RValue<SByte4> cast)
3462 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003463{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003464 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003465 Value *x = Nucleus::createBitCast(cast.value(), Int::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003466 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3467
Ben Clayton713b8d32019-12-17 20:37:56 +00003468 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003469 Value *b = Nucleus::createBitCast(a, Byte16::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003470 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3471
Ben Clayton713b8d32019-12-17 20:37:56 +00003472 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens519cf222020-05-08 15:27:19 -04003473 Value *d = Nucleus::createBitCast(c, Short8::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003474 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3475
3476 *this = As<Int4>(e) >> 24;
3477}
3478
Ben Clayton713b8d32019-12-17 20:37:56 +00003479Int4::Int4(RValue<Short4> cast)
3480 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003481{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003482 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003483 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003484 Value *c = Nucleus::createShuffleVector(cast.value(), cast.value(), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003485
3486 *this = As<Int4>(c) >> 16;
3487}
3488
Ben Clayton713b8d32019-12-17 20:37:56 +00003489Int4::Int4(RValue<UShort4> cast)
3490 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003491{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003492 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003493 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003494 Value *c = Nucleus::createShuffleVector(cast.value(), Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
Nicolas Capens519cf222020-05-08 15:27:19 -04003495 Value *d = Nucleus::createBitCast(c, Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003496 storeValue(d);
3497}
3498
Ben Clayton713b8d32019-12-17 20:37:56 +00003499Int4::Int4(RValue<Int> rhs)
3500 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003501{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003502 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003503 Value *vector = Nucleus::createBitCast(rhs.value(), Int4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003504
Ben Clayton713b8d32019-12-17 20:37:56 +00003505 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003506 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3507
3508 storeValue(replicate);
3509}
3510
3511RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3512{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003513 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003514 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003515 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003516 Int4 result;
3517 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3518 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3519 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3520 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003521
Nicolas Capens157ba262019-12-10 17:49:14 -05003522 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003523 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003524 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003525 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003526 return RValue<Int4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003527 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003528}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003529
Nicolas Capens157ba262019-12-10 17:49:14 -05003530RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3531{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003532 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003533 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003534 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003535 Int4 result;
3536 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3537 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3538 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3539 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003540
Nicolas Capens157ba262019-12-10 17:49:14 -05003541 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003542 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003543 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003544 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003545 return RValue<Int4>(Nucleus::createAShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003546 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003547}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003548
Nicolas Capens157ba262019-12-10 17:49:14 -05003549RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3550{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003551 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003552 return RValue<Int4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003553}
3554
3555RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3556{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003557 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003558 return RValue<Int4>(Nucleus::createICmpSLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003559}
3560
3561RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3562{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003563 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003564 return RValue<Int4>(Nucleus::createICmpSLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003565}
3566
3567RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3568{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003569 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003570 return RValue<Int4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003571}
3572
3573RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3574{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003575 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003576 return RValue<Int4>(Nucleus::createICmpSGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003577}
3578
3579RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3580{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003581 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003582 return RValue<Int4>(Nucleus::createICmpSGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003583}
3584
3585RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3586{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003587 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003588 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003589 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003590 ::basicBlock->appendInst(cmp);
3591
3592 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003593 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003594 ::basicBlock->appendInst(select);
3595
3596 return RValue<Int4>(V(result));
3597}
3598
3599RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3600{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003601 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003602 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003603 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003604 ::basicBlock->appendInst(cmp);
3605
3606 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003607 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003608 ::basicBlock->appendInst(select);
3609
3610 return RValue<Int4>(V(result));
3611}
3612
3613RValue<Int4> RoundInt(RValue<Float4> cast)
3614{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003615 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003616 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003617 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003618 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3619 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003620 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003621 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003622 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003623 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003624 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003625 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3626 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003627 nearbyint->addArg(cast.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003628 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003629
3630 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003631 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003632}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003633
Nicolas Capenseeb81842021-01-12 17:44:40 -05003634RValue<Int4> RoundIntClamped(RValue<Float4> cast)
3635{
3636 RR_DEBUG_INFO_UPDATE_LOC();
3637
3638 // cvtps2dq produces 0x80000000, a negative value, for input larger than
3639 // 2147483520.0, so clamp to 2147483520. Values less than -2147483520.0
3640 // saturate to 0x80000000.
3641 RValue<Float4> clamped = Min(cast, Float4(0x7FFFFF80));
3642
3643 if(emulateIntrinsics || CPUID::ARM)
3644 {
3645 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3646 return Int4((clamped + Float4(0x00C00000)) - Float4(0x00C00000));
3647 }
3648 else
3649 {
3650 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3651 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3652 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3653 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3654 nearbyint->addArg(clamped.value());
3655 ::basicBlock->appendInst(nearbyint);
3656
3657 return RValue<Int4>(V(result));
3658 }
3659}
3660
Nicolas Capens157ba262019-12-10 17:49:14 -05003661RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3662{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003663 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003664 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003665 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003666 Short8 result;
3667 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3668 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3669 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3670 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3671 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3672 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3673 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3674 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003675
Nicolas Capens157ba262019-12-10 17:49:14 -05003676 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003677 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003678 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003679 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003680 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003681 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003682 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3683 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003684 pack->addArg(x.value());
3685 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003686 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003687
Nicolas Capens157ba262019-12-10 17:49:14 -05003688 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003689 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003690}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003691
Nicolas Capens157ba262019-12-10 17:49:14 -05003692RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3693{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003694 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003695 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003696 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003697 RValue<Int4> sx = As<Int4>(x);
3698 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003699
Nicolas Capens157ba262019-12-10 17:49:14 -05003700 RValue<Int4> sy = As<Int4>(y);
3701 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003702
Nicolas Capens157ba262019-12-10 17:49:14 -05003703 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003704 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003705 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003706 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003707 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003708 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003709 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3710 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003711 pack->addArg(x.value());
3712 pack->addArg(y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003713 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003714
Nicolas Capens157ba262019-12-10 17:49:14 -05003715 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003716 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003717}
Nicolas Capens33438a62017-09-27 11:47:35 -04003718
Nicolas Capens157ba262019-12-10 17:49:14 -05003719RValue<Int> SignMask(RValue<Int4> x)
3720{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003721 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003722 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003723 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003724 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3725 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003726 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003727 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003728 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003729 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003730 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003731 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3732 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003733 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003734 ::basicBlock->appendInst(movmsk);
3735
3736 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003737 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003738}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003739
Nicolas Capens519cf222020-05-08 15:27:19 -04003740Type *Int4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003741{
3742 return T(Ice::IceType_v4i32);
3743}
3744
Ben Clayton713b8d32019-12-17 20:37:56 +00003745UInt4::UInt4(RValue<Float4> cast)
3746 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003747{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003748 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003749 // Smallest positive value representable in UInt, but not in Int
3750 const unsigned int ustart = 0x80000000u;
3751 const float ustartf = float(ustart);
3752
3753 // Check if the value can be represented as an Int
3754 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3755 // If the value is too large, subtract ustart and re-add it after conversion.
3756 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003757 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003758 (~uiValue & Int4(cast));
3759 // If the value is negative, store 0, otherwise store the result of the conversion
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003760 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003761}
3762
Ben Clayton713b8d32019-12-17 20:37:56 +00003763UInt4::UInt4(RValue<UInt> rhs)
3764 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003765{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003766 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003767 Value *vector = Nucleus::createBitCast(rhs.value(), UInt4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003768
Ben Clayton713b8d32019-12-17 20:37:56 +00003769 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003770 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3771
3772 storeValue(replicate);
3773}
3774
3775RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3776{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003777 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003778 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003779 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003780 UInt4 result;
3781 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3782 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3783 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3784 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003785
Nicolas Capens157ba262019-12-10 17:49:14 -05003786 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003787 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003788 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003789 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003790 return RValue<UInt4>(Nucleus::createShl(lhs.value(), V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003791 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003792}
Ben Clayton88816fa2019-05-15 17:08:14 +01003793
Nicolas Capens157ba262019-12-10 17:49:14 -05003794RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3795{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003796 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003797 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003798 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003799 UInt4 result;
3800 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3801 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3802 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3803 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003804
Nicolas Capens157ba262019-12-10 17:49:14 -05003805 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003806 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003807 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003808 {
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003809 return RValue<UInt4>(Nucleus::createLShr(lhs.value(), V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003810 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003811}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003812
Nicolas Capens157ba262019-12-10 17:49:14 -05003813RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3814{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003815 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003816 return RValue<UInt4>(Nucleus::createICmpEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003817}
3818
3819RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3820{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003821 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003822 return RValue<UInt4>(Nucleus::createICmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003823}
3824
3825RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3826{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003827 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003828 return RValue<UInt4>(Nucleus::createICmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003829}
3830
3831RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3832{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003833 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003834 return RValue<UInt4>(Nucleus::createICmpNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003835}
3836
3837RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003840 return RValue<UInt4>(Nucleus::createICmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003841}
3842
3843RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3844{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003845 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003846 return RValue<UInt4>(Nucleus::createICmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05003847}
3848
3849RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3850{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003851 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003852 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003853 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003854 ::basicBlock->appendInst(cmp);
3855
3856 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003857 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003858 ::basicBlock->appendInst(select);
3859
3860 return RValue<UInt4>(V(result));
3861}
3862
3863RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3864{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003865 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003866 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003867 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003868 ::basicBlock->appendInst(cmp);
3869
3870 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003871 auto select = Ice::InstSelect::create(::function, result, condition, y.value(), x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003872 ::basicBlock->appendInst(select);
3873
3874 return RValue<UInt4>(V(result));
3875}
3876
Nicolas Capens519cf222020-05-08 15:27:19 -04003877Type *UInt4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003878{
3879 return T(Ice::IceType_v4i32);
3880}
3881
Nicolas Capens519cf222020-05-08 15:27:19 -04003882Type *Half::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003883{
3884 return T(Ice::IceType_i16);
3885}
3886
3887RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3888{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003889 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003890 return 1.0f / x;
3891}
3892
3893RValue<Float> RcpSqrt_pp(RValue<Float> x)
3894{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003895 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003896 return Rcp_pp(Sqrt(x));
3897}
3898
3899RValue<Float> Sqrt(RValue<Float> x)
3900{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003901 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003902 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003903 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003904 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3905 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003906 sqrt->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003907 ::basicBlock->appendInst(sqrt);
3908
3909 return RValue<Float>(V(result));
3910}
3911
3912RValue<Float> Round(RValue<Float> x)
3913{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003914 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003915 return Float4(Round(Float4(x))).x;
3916}
3917
3918RValue<Float> Trunc(RValue<Float> x)
3919{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003920 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003921 return Float4(Trunc(Float4(x))).x;
3922}
3923
3924RValue<Float> Frac(RValue<Float> x)
3925{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003926 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003927 return Float4(Frac(Float4(x))).x;
3928}
3929
3930RValue<Float> Floor(RValue<Float> x)
3931{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003932 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003933 return Float4(Floor(Float4(x))).x;
3934}
3935
3936RValue<Float> Ceil(RValue<Float> x)
3937{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003938 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003939 return Float4(Ceil(Float4(x))).x;
3940}
3941
Nicolas Capens519cf222020-05-08 15:27:19 -04003942Type *Float::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003943{
3944 return T(Ice::IceType_f32);
3945}
3946
Nicolas Capens519cf222020-05-08 15:27:19 -04003947Type *Float2::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05003948{
3949 return T(Type_v2f32);
3950}
3951
Ben Clayton713b8d32019-12-17 20:37:56 +00003952Float4::Float4(RValue<Float> rhs)
3953 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003954{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003955 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003956 Value *vector = Nucleus::createBitCast(rhs.value(), Float4::type());
Nicolas Capens157ba262019-12-10 17:49:14 -05003957
Ben Clayton713b8d32019-12-17 20:37:56 +00003958 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003959 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3960
3961 storeValue(replicate);
3962}
3963
3964RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3965{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003966 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003967 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003968 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003969 ::basicBlock->appendInst(cmp);
3970
3971 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003972 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003973 ::basicBlock->appendInst(select);
3974
3975 return RValue<Float4>(V(result));
3976}
3977
3978RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3979{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003980 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003981 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003982 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003983 ::basicBlock->appendInst(cmp);
3984
3985 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04003986 auto select = Ice::InstSelect::create(::function, result, condition, x.value(), y.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05003987 ::basicBlock->appendInst(select);
3988
3989 return RValue<Float4>(V(result));
3990}
3991
3992RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3993{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003994 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003995 return Float4(1.0f) / x;
3996}
3997
3998RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3999{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004000 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004001 return Rcp_pp(Sqrt(x));
4002}
4003
Antonio Maioranod1561872020-12-14 14:03:53 -05004004bool HasRcpApprox()
4005{
4006 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4007 return false;
4008}
4009
4010RValue<Float4> RcpApprox(RValue<Float4> x, bool exactAtPow2)
4011{
4012 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4013 UNREACHABLE("RValue<Float4> RcpApprox()");
4014 return { 0.0f };
4015}
4016
4017RValue<Float> RcpApprox(RValue<Float> x, bool exactAtPow2)
4018{
4019 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4020 UNREACHABLE("RValue<Float> RcpApprox()");
4021 return { 0.0f };
4022}
4023
Antonio Maiorano1cc5b332020-12-14 16:57:28 -05004024bool HasRcpSqrtApprox()
4025{
4026 return false;
4027}
4028
4029RValue<Float4> RcpSqrtApprox(RValue<Float4> x)
4030{
4031 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4032 UNREACHABLE("RValue<Float4> RcpSqrtApprox()");
4033 return { 0.0f };
4034}
4035
4036RValue<Float> RcpSqrtApprox(RValue<Float> x)
4037{
4038 // TODO(b/175612820): Update once we implement x86 SSE rcp_ss and rsqrt_ss intrinsics in Subzero
4039 UNREACHABLE("RValue<Float> RcpSqrtApprox()");
4040 return { 0.0f };
4041}
4042
Nicolas Capens157ba262019-12-10 17:49:14 -05004043RValue<Float4> Sqrt(RValue<Float4> x)
4044{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004045 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004046 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04004047 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004048 Float4 result;
4049 result.x = Sqrt(Float(Float4(x).x));
4050 result.y = Sqrt(Float(Float4(x).y));
4051 result.z = Sqrt(Float(Float4(x).z));
4052 result.w = Sqrt(Float(Float4(x).w));
4053
4054 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04004055 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004056 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04004057 {
Nicolas Capens157ba262019-12-10 17:49:14 -05004058 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004059 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04004060 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4061 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004062 sqrt->addArg(x.value());
Nicolas Capensd52e9362016-10-31 23:23:15 -04004063 ::basicBlock->appendInst(sqrt);
4064
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04004065 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04004066 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04004067}
Nicolas Capens157ba262019-12-10 17:49:14 -05004068
4069RValue<Int> SignMask(RValue<Float4> x)
4070{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004071 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004072 if(emulateIntrinsics || CPUID::ARM)
4073 {
4074 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
4075 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
4076 }
4077 else
4078 {
4079 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004080 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004081 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4082 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004083 movmsk->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004084 ::basicBlock->appendInst(movmsk);
4085
4086 return RValue<Int>(V(result));
4087 }
4088}
4089
4090RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
4091{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004092 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004093 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004094}
4095
4096RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
4097{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004098 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004099 return RValue<Int4>(Nucleus::createFCmpOLT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004100}
4101
4102RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
4103{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004104 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004105 return RValue<Int4>(Nucleus::createFCmpOLE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004106}
4107
4108RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
4109{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004110 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004111 return RValue<Int4>(Nucleus::createFCmpONE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004112}
4113
4114RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
4115{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004116 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004117 return RValue<Int4>(Nucleus::createFCmpOGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004118}
4119
4120RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
4121{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004122 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004123 return RValue<Int4>(Nucleus::createFCmpOGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004124}
4125
4126RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
4127{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004128 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004129 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004130}
4131
4132RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
4133{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004134 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004135 return RValue<Int4>(Nucleus::createFCmpULT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004136}
4137
4138RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
4139{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004140 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004141 return RValue<Int4>(Nucleus::createFCmpULE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004142}
4143
4144RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
4145{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004146 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004147 return RValue<Int4>(Nucleus::createFCmpUNE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004148}
4149
4150RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
4151{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004152 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004153 return RValue<Int4>(Nucleus::createFCmpUGE(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004154}
4155
4156RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
4157{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004158 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004159 return RValue<Int4>(Nucleus::createFCmpUGT(x.value(), y.value()));
Nicolas Capens157ba262019-12-10 17:49:14 -05004160}
4161
4162RValue<Float4> Round(RValue<Float4> x)
4163{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004164 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004165 if(emulateIntrinsics || CPUID::ARM)
4166 {
4167 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4168 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4169 }
4170 else if(CPUID::SSE4_1)
4171 {
4172 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004173 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004174 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4175 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004176 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004177 round->addArg(::context->getConstantInt32(0));
4178 ::basicBlock->appendInst(round);
4179
4180 return RValue<Float4>(V(result));
4181 }
4182 else
4183 {
4184 return Float4(RoundInt(x));
4185 }
4186}
4187
4188RValue<Float4> Trunc(RValue<Float4> x)
4189{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004190 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004191 if(CPUID::SSE4_1)
4192 {
4193 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004194 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004195 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4196 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004197 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004198 round->addArg(::context->getConstantInt32(3));
4199 ::basicBlock->appendInst(round);
4200
4201 return RValue<Float4>(V(result));
4202 }
4203 else
4204 {
4205 return Float4(Int4(x));
4206 }
4207}
4208
4209RValue<Float4> Frac(RValue<Float4> x)
4210{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004211 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004212 Float4 frc;
4213
4214 if(CPUID::SSE4_1)
4215 {
4216 frc = x - Floor(x);
4217 }
4218 else
4219 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004220 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004221
Ben Clayton713b8d32019-12-17 20:37:56 +00004222 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004223 }
4224
4225 // x - floor(x) can be 1.0 for very small negative x.
4226 // Clamp against the value just below 1.0.
4227 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4228}
4229
4230RValue<Float4> Floor(RValue<Float4> x)
4231{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004232 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004233 if(CPUID::SSE4_1)
4234 {
4235 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004236 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004237 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4238 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004239 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004240 round->addArg(::context->getConstantInt32(1));
4241 ::basicBlock->appendInst(round);
4242
4243 return RValue<Float4>(V(result));
4244 }
4245 else
4246 {
4247 return x - Frac(x);
4248 }
4249}
4250
4251RValue<Float4> Ceil(RValue<Float4> x)
4252{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004253 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004254 if(CPUID::SSE4_1)
4255 {
4256 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004257 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004258 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4259 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004260 round->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004261 round->addArg(::context->getConstantInt32(2));
4262 ::basicBlock->appendInst(round);
4263
4264 return RValue<Float4>(V(result));
4265 }
4266 else
4267 {
4268 return -Floor(-x);
4269 }
4270}
4271
Nicolas Capens519cf222020-05-08 15:27:19 -04004272Type *Float4::type()
Nicolas Capens157ba262019-12-10 17:49:14 -05004273{
4274 return T(Ice::IceType_v4f32);
4275}
4276
4277RValue<Long> Ticks()
4278{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004279 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004280 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004281 return Long(Int(0));
4282}
4283
Ben Clayton713b8d32019-12-17 20:37:56 +00004284RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004285{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004286 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004287 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004288}
4289
Ben Clayton713b8d32019-12-17 20:37:56 +00004290RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004291{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004292 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004293 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004294}
4295
Ben Clayton713b8d32019-12-17 20:37:56 +00004296Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004297{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004298 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004299 return V(sz::Call(::function, ::basicBlock, T(retTy), V(fptr.value()), V(args), false));
Nicolas Capens157ba262019-12-10 17:49:14 -05004300}
4301
4302void Breakpoint()
4303{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004304 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004305 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004306 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4307 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4308 ::basicBlock->appendInst(trap);
4309}
4310
Ben Clayton713b8d32019-12-17 20:37:56 +00004311void Nucleus::createFence(std::memory_order memoryOrder)
4312{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004313 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004314 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4315 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4316 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4317 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4318 inst->addArg(order);
4319 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004320}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004321
Ben Clayton713b8d32019-12-17 20:37:56 +00004322Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004324 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004325 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004326 return nullptr;
4327}
4328void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004330 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004331 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004332}
Nicolas Capens157ba262019-12-10 17:49:14 -05004333
4334RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4335{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004336 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004337 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4338}
4339
4340RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4341{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004342 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004343 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4344}
4345
4346void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4347{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004348 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004349 return emulated::Scatter(base, val, offsets, mask, alignment);
4350}
4351
4352void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4353{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004354 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004355 return emulated::Scatter(base, val, offsets, mask, alignment);
4356}
4357
4358RValue<Float> Exp2(RValue<Float> x)
4359{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004360 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004361 return emulated::Exp2(x);
4362}
4363
4364RValue<Float> Log2(RValue<Float> x)
4365{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004366 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004367 return emulated::Log2(x);
4368}
4369
4370RValue<Float4> Sin(RValue<Float4> x)
4371{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004372 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004373 return optimal::Sin(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004374}
4375
4376RValue<Float4> Cos(RValue<Float4> x)
4377{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004378 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004379 return optimal::Cos(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004380}
4381
4382RValue<Float4> Tan(RValue<Float4> x)
4383{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004384 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004385 return optimal::Tan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004386}
4387
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004388RValue<Float4> Asin(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004389{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004390 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004391 if(p == Precision::Full)
4392 {
4393 return emulated::Asin(x);
4394 }
4395 return optimal::Asin_8_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004396}
4397
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004398RValue<Float4> Acos(RValue<Float4> x, Precision p)
Nicolas Capens157ba262019-12-10 17:49:14 -05004399{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004400 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004401 // Surprisingly, deqp-vk's precision.acos.highp/mediump tests pass when using the 4-term polynomial approximation
4402 // version of acos, unlike for Asin, which requires higher precision algorithms.
4403 return optimal::Acos_4_terms(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004404}
4405
4406RValue<Float4> Atan(RValue<Float4> x)
4407{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004408 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004409 return optimal::Atan(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004410}
4411
4412RValue<Float4> Sinh(RValue<Float4> x)
4413{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004414 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004415 return optimal::Sinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004416}
4417
4418RValue<Float4> Cosh(RValue<Float4> x)
4419{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004420 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004421 return optimal::Cosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004422}
4423
4424RValue<Float4> Tanh(RValue<Float4> x)
4425{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004426 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004427 return optimal::Tanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004428}
4429
4430RValue<Float4> Asinh(RValue<Float4> x)
4431{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004432 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004433 return optimal::Asinh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004434}
4435
4436RValue<Float4> Acosh(RValue<Float4> x)
4437{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004438 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004439 return optimal::Acosh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004440}
4441
4442RValue<Float4> Atanh(RValue<Float4> x)
4443{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004444 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004445 return optimal::Atanh(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004446}
4447
4448RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4449{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004450 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004451 return optimal::Atan2(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004452}
4453
4454RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4455{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004456 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004457 return optimal::Pow(x, y);
Nicolas Capens157ba262019-12-10 17:49:14 -05004458}
4459
4460RValue<Float4> Exp(RValue<Float4> x)
4461{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004462 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004463 return optimal::Exp(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004464}
4465
4466RValue<Float4> Log(RValue<Float4> x)
4467{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004468 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004469 return optimal::Log(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004470}
4471
4472RValue<Float4> Exp2(RValue<Float4> x)
4473{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004474 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004475 return optimal::Exp2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004476}
4477
4478RValue<Float4> Log2(RValue<Float4> x)
4479{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004480 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano9c14bda2020-09-18 16:33:36 -04004481 return optimal::Log2(x);
Nicolas Capens157ba262019-12-10 17:49:14 -05004482}
4483
4484RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4485{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004486 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004487 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004488 {
Ben Claytonce54c592020-02-07 11:30:51 +00004489 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004490 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004491 }
4492 else
4493 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004494 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004495 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4496 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4497 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004498 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004499 ::basicBlock->appendInst(ctlz);
4500
4501 return RValue<UInt>(V(result));
4502 }
4503}
4504
4505RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4506{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004507 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004508 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004509 {
Ben Claytonce54c592020-02-07 11:30:51 +00004510 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004511 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004512 }
4513 else
4514 {
4515 // TODO: implement vectorized version in Subzero
4516 UInt4 result;
4517 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4518 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4519 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4520 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4521 return result;
4522 }
4523}
4524
4525RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4526{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004527 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004528 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004529 {
Ben Claytonce54c592020-02-07 11:30:51 +00004530 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004531 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004532 }
4533 else
4534 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004535 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004536 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4537 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4538 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
Nicolas Capensb6e8c3f2020-05-01 23:28:37 -04004539 ctlz->addArg(x.value());
Nicolas Capens157ba262019-12-10 17:49:14 -05004540 ::basicBlock->appendInst(ctlz);
4541
4542 return RValue<UInt>(V(result));
4543 }
4544}
4545
4546RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4547{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004548 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004549 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004550 {
Ben Claytonce54c592020-02-07 11:30:51 +00004551 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004552 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004553 }
4554 else
4555 {
4556 // TODO: implement vectorized version in Subzero
4557 UInt4 result;
4558 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4559 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4560 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4561 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4562 return result;
4563 }
4564}
4565
Antonio Maiorano370cba52019-12-31 11:36:07 -05004566RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4567{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004568 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004569 return emulated::MinAtomic(x, y, memoryOrder);
4570}
4571
4572RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4573{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004574 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004575 return emulated::MinAtomic(x, y, memoryOrder);
4576}
4577
4578RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4579{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004580 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004581 return emulated::MaxAtomic(x, y, memoryOrder);
4582}
4583
4584RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4585{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004586 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004587 return emulated::MaxAtomic(x, y, memoryOrder);
4588}
4589
Antonio Maioranoaae33732020-02-14 14:52:34 -05004590void EmitDebugLocation()
4591{
4592#ifdef ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004593 emitPrintLocation(getCallerBacktrace());
Antonio Maiorano4b777772020-06-22 14:55:37 -04004594#endif // ENABLE_RR_DEBUG_INFO
Antonio Maioranoaae33732020-02-14 14:52:34 -05004595}
Ben Clayton713b8d32019-12-17 20:37:56 +00004596void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004597void FlushDebug() {}
4598
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004599namespace {
4600namespace coro {
4601
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004602// Instance data per generated coroutine
4603// This is the "handle" type used for Coroutine functions
4604// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4605struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004606{
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004607 bool useInternalScheduler = false;
Ben Claytonc3466532020-03-24 11:54:05 +00004608 bool done = false; // the coroutine should stop at the next yield()
4609 bool terminated = false; // the coroutine has finished.
4610 bool inRoutine = false; // is the coroutine currently executing?
4611 marl::Scheduler::Fiber *mainFiber = nullptr;
4612 marl::Scheduler::Fiber *routineFiber = nullptr;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004613 void *promisePtr = nullptr;
4614};
4615
4616CoroutineData *createCoroutineData()
4617{
4618 return new CoroutineData{};
4619}
4620
4621void destroyCoroutineData(CoroutineData *coroData)
4622{
4623 delete coroData;
4624}
4625
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004626// suspend() pauses execution of the coroutine, and resumes execution from the
4627// caller's call to await().
4628// Returns true if await() is called again, or false if coroutine_destroy()
4629// is called.
4630bool suspend(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004631{
Ben Claytonc3466532020-03-24 11:54:05 +00004632 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4633 ASSERT(marl::Scheduler::Fiber::current() == coroData->routineFiber);
4634 ASSERT(coroData->inRoutine);
4635 coroData->inRoutine = false;
4636 coroData->mainFiber->notify();
4637 while(!coroData->inRoutine)
4638 {
4639 coroData->routineFiber->wait();
4640 }
4641 return !coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004642}
4643
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004644// resume() is called by await(), blocking until the coroutine calls yield()
4645// or the coroutine terminates.
4646void resume(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004647{
Ben Claytonc3466532020-03-24 11:54:05 +00004648 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4649 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4650 ASSERT(!coroData->inRoutine);
4651 coroData->inRoutine = true;
4652 coroData->routineFiber->notify();
4653 while(coroData->inRoutine)
4654 {
4655 coroData->mainFiber->wait();
4656 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004657}
4658
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004659// stop() is called by coroutine_destroy(), signalling that it's done, then blocks
4660// until the coroutine ends, and deletes the coroutine data.
4661void stop(Nucleus::CoroutineHandle handle)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004662{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004663 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004664 ASSERT(marl::Scheduler::Fiber::current() == coroData->mainFiber);
4665 ASSERT(!coroData->inRoutine);
4666 if(!coroData->terminated)
4667 {
4668 coroData->done = true;
4669 coroData->inRoutine = true;
4670 coroData->routineFiber->notify();
4671 while(!coroData->terminated)
4672 {
4673 coroData->mainFiber->wait();
4674 }
4675 }
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004676 if(coroData->useInternalScheduler)
4677 {
4678 ::getOrCreateScheduler().unbind();
4679 }
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004680 coro::destroyCoroutineData(coroData); // free the coroutine data.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004681}
4682
4683namespace detail {
4684thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4685} // namespace detail
4686
4687void setHandleParam(Nucleus::CoroutineHandle handle)
4688{
4689 ASSERT(!detail::coroHandle);
4690 detail::coroHandle = handle;
4691}
4692
4693Nucleus::CoroutineHandle getHandleParam()
4694{
4695 ASSERT(detail::coroHandle);
4696 auto handle = detail::coroHandle;
4697 detail::coroHandle = {};
4698 return handle;
4699}
4700
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004701bool isDone(Nucleus::CoroutineHandle handle)
4702{
4703 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
Ben Claytonc3466532020-03-24 11:54:05 +00004704 return coroData->done;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004705}
4706
4707void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4708{
4709 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4710 coroData->promisePtr = promisePtr;
4711}
4712
4713void *getPromisePtr(Nucleus::CoroutineHandle handle)
4714{
4715 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4716 return coroData->promisePtr;
4717}
4718
4719} // namespace coro
4720} // namespace
4721
4722// Used to generate coroutines.
4723// Lifetime: from yield to acquireCoroutine
4724class CoroutineGenerator
4725{
4726public:
4727 CoroutineGenerator()
4728 {
4729 }
4730
4731 // Inserts instructions at the top of the current function to make it a coroutine.
4732 void generateCoroutineBegin()
4733 {
4734 // Begin building the main coroutine_begin() function.
4735 // We insert these instructions at the top of the entry node,
4736 // before existing reactor-generated instructions.
4737
4738 // CoroutineHandle coroutine_begin(<Arguments>)
4739 // {
4740 // this->handle = coro::getHandleParam();
4741 //
4742 // YieldType promise;
4743 // coro::setPromisePtr(handle, &promise); // For await
4744 //
4745 // ... <REACTOR CODE> ...
4746 //
4747
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004748 // this->handle = coro::getHandleParam();
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004749 this->handle = sz::Call(::function, ::entryBlock, coro::getHandleParam);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004750
4751 // YieldType promise;
4752 // coro::setPromisePtr(handle, &promise); // For await
4753 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004754 sz::Call(::function, ::entryBlock, coro::setPromisePtr, this->handle, this->promise);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004755 }
4756
4757 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4758 void generateYield(Value *val)
4759 {
4760 // ... <REACTOR CODE> ...
4761 //
4762 // promise = val;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004763 // if (!coro::suspend(handle)) {
4764 // return false; // coroutine has been stopped by the caller.
4765 // }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004766 //
4767 // ... <REACTOR CODE> ...
4768
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004769 // promise = val;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004770 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004771
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004772 // if (!coro::suspend(handle)) {
4773 auto result = sz::Call(::function, ::basicBlock, coro::suspend, this->handle);
4774 auto doneBlock = Nucleus::createBasicBlock();
4775 auto resumeBlock = Nucleus::createBasicBlock();
4776 Nucleus::createCondBr(V(result), resumeBlock, doneBlock);
4777
4778 // return false; // coroutine has been stopped by the caller.
4779 ::basicBlock = doneBlock;
4780 Nucleus::createRetVoid(); // coroutine return value is ignored.
4781
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004782 // ... <REACTOR CODE> ...
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004783 ::basicBlock = resumeBlock;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004784 }
4785
4786 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4787
4788 // Generates the await function for the current coroutine.
4789 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4790 static FunctionUniquePtr generateAwaitFunction()
4791 {
4792 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4793 // {
4794 // if (coro::isDone())
4795 // {
4796 // return false;
4797 // }
4798 // else // resume
4799 // {
4800 // YieldType* promise = coro::getPromisePtr(handle);
4801 // *out = *promise;
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004802 // coro::resume(handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004803 // return true;
4804 // }
4805 // }
4806
4807 // Subzero doesn't support bool types (IceType_i1) as return type
4808 const Ice::Type ReturnType = Ice::IceType_i32;
4809 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4810 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4811
4812 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4813 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4814
4815 Ice::Variable *handle = awaitFunc->getArgs()[0];
4816 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4817
4818 auto doneBlock = awaitFunc->makeNode();
4819 {
4820 // return false;
4821 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4822 doneBlock->appendInst(ret);
4823 }
4824
4825 auto resumeBlock = awaitFunc->makeNode();
4826 {
4827 // YieldType* promise = coro::getPromisePtr(handle);
4828 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4829
4830 // *out = *promise;
4831 // Load promise value
4832 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4833 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4834 resumeBlock->appendInst(load);
4835 // Then store it in output param
4836 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4837 resumeBlock->appendInst(store);
4838
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004839 // coro::resume(handle);
4840 sz::Call(awaitFunc, resumeBlock, coro::resume, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004841
4842 // return true;
4843 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4844 resumeBlock->appendInst(ret);
4845 }
4846
4847 // if (coro::isDone())
4848 // {
4849 // <doneBlock>
4850 // }
4851 // else // resume
4852 // {
4853 // <resumeBlock>
4854 // }
4855 Ice::CfgNode *bb = awaitFunc->getEntryNode();
Antonio Maioranobc98fbe2020-03-17 15:46:22 -04004856 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004857 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4858 bb->appendInst(br);
4859
4860 return FunctionUniquePtr{ awaitFunc };
4861 }
4862
4863 // Generates the destroy function for the current coroutine.
4864 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4865 static FunctionUniquePtr generateDestroyFunction()
4866 {
4867 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4868 // {
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004869 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004870 // return;
4871 // }
4872
4873 const Ice::Type ReturnType = Ice::IceType_void;
4874 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4875
4876 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4877 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4878
4879 Ice::Variable *handle = destroyFunc->getArgs()[0];
4880
4881 auto *bb = destroyFunc->getEntryNode();
4882
Antonio Maiorano8bce0672020-02-28 13:13:45 -05004883 // coro::stop(handle); // signal and wait for coroutine to stop, and delete coroutine data
4884 sz::Call(destroyFunc, bb, coro::stop, handle);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004885
4886 // return;
4887 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4888 bb->appendInst(ret);
4889
4890 return FunctionUniquePtr{ destroyFunc };
4891 }
4892
4893private:
4894 Ice::Variable *handle{};
4895 Ice::Variable *promise{};
4896};
4897
4898static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4899{
4900 // This doubles up as our coroutine handle
4901 auto coroData = coro::createCoroutineData();
4902
Antonio Maiorano8f2d48f2020-02-28 13:39:11 -05004903 coroData->useInternalScheduler = (marl::Scheduler::get() == nullptr);
4904 if(coroData->useInternalScheduler)
4905 {
4906 ::getOrCreateScheduler().bind();
4907 }
4908
Ben Clayton76e9e532020-03-16 20:35:04 +00004909 auto run = [=] {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004910 // Store handle in TLS so that the coroutine can grab it right away, before
4911 // any fiber switch occurs.
4912 coro::setHandleParam(coroData);
4913
Ben Claytonc3466532020-03-24 11:54:05 +00004914 ASSERT(!coroData->routineFiber);
4915 coroData->routineFiber = marl::Scheduler::Fiber::current();
4916
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004917 beginFunc();
4918
Ben Claytonc3466532020-03-24 11:54:05 +00004919 ASSERT(coroData->inRoutine);
4920 coroData->done = true; // coroutine is done.
4921 coroData->terminated = true; // signal that the coroutine data is ready for freeing.
4922 coroData->inRoutine = false;
4923 coroData->mainFiber->notify();
Ben Clayton76e9e532020-03-16 20:35:04 +00004924 };
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004925
Ben Claytonc3466532020-03-24 11:54:05 +00004926 ASSERT(!coroData->mainFiber);
4927 coroData->mainFiber = marl::Scheduler::Fiber::current();
4928
4929 // block until the first yield or coroutine end
4930 ASSERT(!coroData->inRoutine);
4931 coroData->inRoutine = true;
4932 marl::schedule(marl::Task(run, marl::Task::Flags::SameThread));
4933 while(coroData->inRoutine)
4934 {
4935 coroData->mainFiber->wait();
4936 }
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004937
4938 return coroData;
4939}
4940
4941void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4942{
4943 // Start by creating a regular function
4944 createFunction(yieldType, params);
4945
4946 // Save in case yield() is called
4947 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4948 ::coroYieldType = yieldType;
4949}
4950
4951void Nucleus::yield(Value *val)
4952{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004953 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004954 Variable::materializeAll();
4955
4956 // On first yield, we start generating coroutine functions
4957 if(!::coroGen)
4958 {
4959 ::coroGen = std::make_shared<CoroutineGenerator>();
4960 ::coroGen->generateCoroutineBegin();
4961 }
4962
4963 ASSERT(::coroGen);
4964 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004965}
4966
Ben Clayton713b8d32019-12-17 20:37:56 +00004967static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4968{
4969 return false;
4970}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004971
4972static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4973{
4974}
Nicolas Capens157ba262019-12-10 17:49:14 -05004975
4976std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4977{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004978 if(::coroGen)
4979 {
4980 // Finish generating coroutine functions
4981 {
4982 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04004983 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004984 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004985
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004986 auto awaitFunc = ::coroGen->generateAwaitFunction();
4987 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004988
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004989 // At this point, we no longer need the CoroutineGenerator.
4990 ::coroGen.reset();
4991 ::coroYieldType = nullptr;
4992
4993 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4994 { name, "await", "destroy" },
4995 cfgEdit);
4996
4997 return routine;
4998 }
4999 else
5000 {
5001 {
5002 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
Antonio Maiorano22d73d12020-03-20 00:13:28 -04005003 finalizeFunction();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005004 }
5005
5006 ::coroYieldType = nullptr;
5007
5008 // Not an actual coroutine (no yields), so return stubs for await and destroy
5009 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
5010
5011 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
5012 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
5013 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
5014 return routine;
5015 }
Nicolas Capens157ba262019-12-10 17:49:14 -05005016}
5017
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005018Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00005019{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05005020 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
5021
5022 if(isCoroutine)
5023 {
5024 return rr::invokeCoroutineBegin(func);
5025 }
5026 else
5027 {
5028 // For regular routines, just invoke the begin func directly
5029 return func();
5030 }
Ben Clayton713b8d32019-12-17 20:37:56 +00005031}
Nicolas Capens157ba262019-12-10 17:49:14 -05005032
5033} // namespace rr