blob: 7efeb5bb72fba24a7672f55511df7e16b24eee07 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000017#include "Reactor.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040018
Nicolas Capens1a3ce872018-10-10 10:42:36 -040019#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000020#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040021
Nicolas Capens598f8d82016-09-26 15:09:10 -040022#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040023#include "src/IceCfgNode.h"
24#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000025#include "src/IceELFStreamer.h"
26#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040027#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040029
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031#include "llvm/Support/FileSystem.h"
32#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040033
34#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000035# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040036#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040037
Nicolas Capensbd65da92017-01-05 16:31:06 -050038#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# ifndef WIN32_LEAN_AND_MEAN
40# define WIN32_LEAN_AND_MEAN
41# endif // !WIN32_LEAN_AND_MEAN
42# ifndef NOMINMAX
43# define NOMINMAX
44# endif // !NOMINMAX
45# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050046#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040047
Ben Clayton683bad82020-02-10 23:57:09 +000048#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040049#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000050#include <limits>
51#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040052
Antonio Maiorano02a39532020-01-21 15:15:34 -050053// Subzero utility functions
54// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050055namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050056namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050057void replaceEntryNode(Ice::Cfg *function, Ice::CfgNode *newEntryNode)
58{
59 ASSERT_MSG(function->getEntryNode() != nullptr, "Function should have an entry node");
60
61 if(function->getEntryNode() == newEntryNode)
62 {
63 return;
64 }
65
66 // Make this the new entry node
67 function->setEntryNode(newEntryNode);
68
69 // Reorder nodes so that new entry block comes first. This is required
70 // by Cfg::renumberInstructions, which expects the first node in the list
71 // to be the entry node.
72 {
73 auto nodes = function->getNodes();
74
75 // TODO(amaiorano): Fast path if newEntryNode is last? Can avoid linear search.
76
77 auto iter = std::find(nodes.begin(), nodes.end(), newEntryNode);
78 ASSERT_MSG(iter != nodes.end(), "New node should be in the function's node list");
79
80 nodes.erase(iter);
81 nodes.insert(nodes.begin(), newEntryNode);
82
83 // swapNodes replaces its nodes with the input one, and renumbers them,
84 // so our new entry node will be 0, and the previous will be 1.
85 function->swapNodes(nodes);
86 }
87}
88
89Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
90{
91 uint32_t sequenceNumber = 0;
92 auto function = Ice::Cfg::create(context, sequenceNumber).release();
93
94 Ice::CfgLocalAllocatorScope allocScope{ function };
95
96 for(auto type : paramTypes)
97 {
98 Ice::Variable *arg = function->makeVariable(type);
99 function->addArg(arg);
100 }
101
102 Ice::CfgNode *node = function->makeNode();
103 function->setEntryNode(node);
104
105 return function;
106}
107
108Ice::Type getPointerType(Ice::Type elementType)
109{
110 if(sizeof(void *) == 8)
111 {
112 return Ice::IceType_i64;
113 }
114 else
115 {
116 return Ice::IceType_i32;
117 }
118}
119
120Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
121{
122 int typeSize = Ice::typeWidthInBytes(type);
123 int totalSize = typeSize * (arraySize ? arraySize : 1);
124
125 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
126 auto address = function->makeVariable(getPointerType(type));
127 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
128 function->getEntryNode()->getInsts().push_front(alloca);
129
130 return address;
131}
132
133Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500134{
135 if(sizeof(void *) == 8)
136 {
137 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
138 }
139 else
140 {
141 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
142 }
143}
144
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500145// Wrapper for calls on C functions with Ice types
146template<typename Return, typename... CArgs, typename... RArgs>
147Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
148{
149 Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
150
151 // Subzero doesn't support boolean return values. Replace with an i32.
152 if(retTy == Ice::IceType_i1)
153 {
154 retTy = Ice::IceType_i32;
155 }
156
157 Ice::Variable *ret = nullptr;
158 if(retTy != Ice::IceType_void)
159 {
160 ret = function->makeVariable(retTy);
161 }
162
Ben Clayton683bad82020-02-10 23:57:09 +0000163 std::array<Ice::Variable *, sizeof...(args)> iceArgs {{ std::forward<RArgs>(args)... }};
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500164
165 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, getConstantPointer(function->getContext(), reinterpret_cast<void const *>(fptr)), false);
166 for(auto arg : iceArgs)
167 {
168 call->addArg(arg);
169 }
170
171 basicBlock->appendInst(call);
172 return ret;
173}
174
Antonio Maiorano02a39532020-01-21 15:15:34 -0500175// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500176Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500177{
178 Ice::Variable *result = function->makeVariable(v->getType());
179 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
180 basicBlock->appendInst(cast);
181 return result;
182}
183
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500184Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500185{
186 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
187 // absolute address. We circumvent this by casting to a non-const variable, and loading
188 // from that.
189 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
190 {
191 ptr = sz::createUnconstCast(function, basicBlock, cptr);
192 }
193
194 Ice::Variable *result = function->makeVariable(type);
195 auto load = Ice::InstLoad::create(function, result, ptr, align);
196 basicBlock->appendInst(load);
197
198 return result;
199}
200
201} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500202} // namespace
203
Ben Clayton713b8d32019-12-17 20:37:56 +0000204namespace rr {
205class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500206class CoroutineGenerator;
207} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500208
209namespace {
210
211// Default configuration settings. Must be accessed under mutex lock.
212std::mutex defaultConfigLock;
213rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000214{
Nicolas Capens157ba262019-12-10 17:49:14 -0500215 // This uses a static in a function to avoid the cost of a global static
216 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
217 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000218 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500219 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000220}
221
Nicolas Capens157ba262019-12-10 17:49:14 -0500222Ice::GlobalContext *context = nullptr;
223Ice::Cfg *function = nullptr;
224Ice::CfgNode *basicBlock = nullptr;
225Ice::CfgLocalAllocatorScope *allocator = nullptr;
226rr::ELFMemoryStreamer *routine = nullptr;
227
228std::mutex codegenMutex;
229
230Ice::ELFFileStreamer *elfFile = nullptr;
231Ice::Fdstream *out = nullptr;
232
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500233// Coroutine globals
234rr::Type *coroYieldType = nullptr;
235std::shared_ptr<rr::CoroutineGenerator> coroGen;
236
Nicolas Capens157ba262019-12-10 17:49:14 -0500237} // Anonymous namespace
238
239namespace {
240
241#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000242# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500243#endif
244
Ben Clayton713b8d32019-12-17 20:37:56 +0000245#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
246# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500247#endif
248
Antonio Maiorano370cba52019-12-31 11:36:07 -0500249Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400250{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500251 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100252 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500253 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000254 case rr::Optimization::Level::None: return Ice::Opt_m1;
255 case rr::Optimization::Level::Less: return Ice::Opt_m1;
256 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500257 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
258 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100259 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500260 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400261}
262
Antonio Maiorano370cba52019-12-31 11:36:07 -0500263Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
264{
265 switch(memoryOrder)
266 {
267 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
268 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
269 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
270 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
271 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
272 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
273 }
274 return Ice::Intrinsics::MemoryOrderInvalid;
275}
276
Nicolas Capens157ba262019-12-10 17:49:14 -0500277class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500278{
Nicolas Capens157ba262019-12-10 17:49:14 -0500279public:
280 const static bool ARM;
281 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400282
Nicolas Capens157ba262019-12-10 17:49:14 -0500283private:
284 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100285 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000286#if defined(__i386__) || defined(__x86_64__)
287# if defined(_WIN32)
288 __cpuid(registers, info);
289# else
290 __asm volatile("cpuid"
291 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
292 : "a"(info));
293# endif
294#else
295 registers[0] = 0;
296 registers[1] = 0;
297 registers[2] = 0;
298 registers[3] = 0;
299#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100300 }
301
Nicolas Capens157ba262019-12-10 17:49:14 -0500302 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500303 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000304#if defined(__arm__) || defined(__aarch64__)
305 return true;
306#elif defined(__i386__) || defined(__x86_64__)
307 return false;
308#elif defined(__mips__)
309 return false;
310#else
311# error "Unknown architecture"
312#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500313 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500314
Nicolas Capens157ba262019-12-10 17:49:14 -0500315 static bool detectSSE4_1()
316 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000317#if defined(__i386__) || defined(__x86_64__)
318 int registers[4];
319 cpuid(registers, 1);
320 return (registers[2] & 0x00080000) != 0;
321#else
322 return false;
323#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500324 }
325};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500326
Nicolas Capens157ba262019-12-10 17:49:14 -0500327const bool CPUID::ARM = CPUID::detectARM();
328const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
329const bool emulateIntrinsics = false;
330const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400331
Nicolas Capens157ba262019-12-10 17:49:14 -0500332constexpr bool subzeroDumpEnabled = false;
333constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500334
335#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500336static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
337static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500338#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500339
340} // anonymous namespace
341
342namespace rr {
343
Antonio Maioranoab210f92019-12-13 16:26:24 -0500344std::string BackendName()
345{
346 return "Subzero";
347}
348
Ben Clayton713b8d32019-12-17 20:37:56 +0000349const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500350#if defined(_WIN32)
351 true, // CoroutinesSupported
352#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000353 false, // CoroutinesSupported
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500354#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500355};
356
357enum EmulatedType
358{
359 EmulatedShift = 16,
360 EmulatedV2 = 2 << EmulatedShift,
361 EmulatedV4 = 4 << EmulatedShift,
362 EmulatedV8 = 8 << EmulatedShift,
363 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
364
365 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
366 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
367 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000368 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
369 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500370 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
371};
372
Ben Clayton713b8d32019-12-17 20:37:56 +0000373class Value : public Ice::Operand
374{};
375class SwitchCases : public Ice::InstSwitch
376{};
377class BasicBlock : public Ice::CfgNode
378{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500379
380Ice::Type T(Type *t)
381{
382 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
383 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500384}
385
Nicolas Capens157ba262019-12-10 17:49:14 -0500386Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400387{
Ben Clayton713b8d32019-12-17 20:37:56 +0000388 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500389}
390
391Type *T(EmulatedType t)
392{
Ben Clayton713b8d32019-12-17 20:37:56 +0000393 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500394}
395
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500396std::vector<Ice::Type> T(const std::vector<Type *> &types)
397{
398 std::vector<Ice::Type> result;
399 result.reserve(types.size());
400 for(auto &t : types)
401 {
402 result.push_back(T(t));
403 }
404 return result;
405}
406
Nicolas Capens157ba262019-12-10 17:49:14 -0500407Value *V(Ice::Operand *v)
408{
Ben Clayton713b8d32019-12-17 20:37:56 +0000409 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500410}
411
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500412Ice::Operand *V(Value *v)
413{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500414 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500415}
416
Nicolas Capens157ba262019-12-10 17:49:14 -0500417BasicBlock *B(Ice::CfgNode *b)
418{
Ben Clayton713b8d32019-12-17 20:37:56 +0000419 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500420}
421
422static size_t typeSize(Type *type)
423{
424 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400425 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500426 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800427 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000428 case Type_v2i32: return 8;
429 case Type_v4i16: return 8;
430 case Type_v2i16: return 4;
431 case Type_v8i8: return 8;
432 case Type_v4i8: return 4;
433 case Type_v2f32: return 8;
434 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500435 }
436 }
437
438 return Ice::typeWidthInBytes(T(type));
439}
440
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500441static void createRetVoidIfNoRet()
442{
443 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
444 {
445 Nucleus::createRetVoid();
446 }
447}
448
Ben Clayton713b8d32019-12-17 20:37:56 +0000449using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
450using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500451
452inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
453{
Ben Clayton713b8d32019-12-17 20:37:56 +0000454 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500455}
456
457inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
458{
459 return &sectionHeader(elfHeader)[index];
460}
461
462static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
463{
464 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
465
466 uint32_t index = relocation.getSymbol();
467 int table = relocationTable.sh_link;
468 void *symbolValue = nullptr;
469
470 if(index != SHN_UNDEF)
471 {
472 if(table == SHN_UNDEF) return nullptr;
473 const SectionHeader *symbolTable = elfSection(elfHeader, table);
474
475 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
476 if(index >= symtab_entries)
477 {
478 ASSERT(index < symtab_entries && "Symbol Index out of range");
479 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800480 }
481
Nicolas Capens157ba262019-12-10 17:49:14 -0500482 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000483 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500484 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800485
Nicolas Capens157ba262019-12-10 17:49:14 -0500486 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400487 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500488 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000489 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400490 }
491 else
492 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500493 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400494 }
Nicolas Capens66478362016-10-13 15:36:36 -0400495 }
496
Nicolas Capens157ba262019-12-10 17:49:14 -0500497 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000498 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500499
500 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400501 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400502 switch(relocation.getType())
503 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000504 case R_ARM_NONE:
505 // No relocation
506 break;
507 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500508 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000509 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500510 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
511 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
512 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400513 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000514 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500515 {
516 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
517 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
518 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400519 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000520 default:
521 ASSERT(false && "Unsupported relocation type");
522 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400523 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500524 }
525 else
526 {
527 switch(relocation.getType())
528 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000529 case R_386_NONE:
530 // No relocation
531 break;
532 case R_386_32:
533 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
534 break;
535 case R_386_PC32:
536 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
537 break;
538 default:
539 ASSERT(false && "Unsupported relocation type");
540 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500541 }
Nicolas Capens66478362016-10-13 15:36:36 -0400542 }
543
Nicolas Capens157ba262019-12-10 17:49:14 -0500544 return symbolValue;
545}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400546
Nicolas Capens157ba262019-12-10 17:49:14 -0500547static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
548{
549 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
550
551 uint32_t index = relocation.getSymbol();
552 int table = relocationTable.sh_link;
553 void *symbolValue = nullptr;
554
555 if(index != SHN_UNDEF)
556 {
557 if(table == SHN_UNDEF) return nullptr;
558 const SectionHeader *symbolTable = elfSection(elfHeader, table);
559
560 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
561 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400562 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500563 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400564 return nullptr;
565 }
566
Nicolas Capens157ba262019-12-10 17:49:14 -0500567 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000568 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500569 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400570
Nicolas Capens157ba262019-12-10 17:49:14 -0500571 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400572 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500573 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000574 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500575 }
576 else
577 {
578 return nullptr;
579 }
580 }
Nicolas Capens66478362016-10-13 15:36:36 -0400581
Nicolas Capens157ba262019-12-10 17:49:14 -0500582 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000583 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
584 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400585
Nicolas Capens157ba262019-12-10 17:49:14 -0500586 switch(relocation.getType())
587 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000588 case R_X86_64_NONE:
589 // No relocation
590 break;
591 case R_X86_64_64:
592 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
593 break;
594 case R_X86_64_PC32:
595 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
596 break;
597 case R_X86_64_32S:
598 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
599 break;
600 default:
601 ASSERT(false && "Unsupported relocation type");
602 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 }
604
605 return symbolValue;
606}
607
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500608void *loadImage(uint8_t *const elfImage, size_t &codeSize, const char *functionName = nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -0500609{
Ben Clayton713b8d32019-12-17 20:37:56 +0000610 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500611
612 if(!elfHeader->checkMagic())
613 {
614 return nullptr;
615 }
616
617 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000618 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
619#if defined(__i386__)
620 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
621#elif defined(__x86_64__)
622 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
623#elif defined(__arm__)
624 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
625#elif defined(__aarch64__)
626 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
627#elif defined(__mips__)
628 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
629#else
630# error "Unsupported platform"
631#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500632
Ben Clayton713b8d32019-12-17 20:37:56 +0000633 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500634 void *entry = nullptr;
635
636 for(int i = 0; i < elfHeader->e_shnum; i++)
637 {
638 if(sectionHeader[i].sh_type == SHT_PROGBITS)
639 {
640 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
641 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500642 auto getCurrSectionName = [&]() {
643 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
644 return reinterpret_cast<const char *>(elfImage + sectionNameOffset);
645 };
646 if(functionName && strstr(getCurrSectionName(), functionName) == nullptr)
647 {
648 continue;
649 }
650
Nicolas Capens157ba262019-12-10 17:49:14 -0500651 entry = elfImage + sectionHeader[i].sh_offset;
652 codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400653 }
654 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500655 else if(sectionHeader[i].sh_type == SHT_REL)
656 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000657 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400658
Nicolas Capens157ba262019-12-10 17:49:14 -0500659 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
660 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000661 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500662 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
663 }
664 }
665 else if(sectionHeader[i].sh_type == SHT_RELA)
666 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000667 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500668
669 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
670 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000671 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500672 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
673 }
674 }
675 }
676
677 return entry;
678}
679
680template<typename T>
681struct ExecutableAllocator
682{
683 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000684 template<class U>
685 ExecutableAllocator(const ExecutableAllocator<U> &other)
686 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500687
688 using value_type = T;
689 using size_type = std::size_t;
690
691 T *allocate(size_type n)
692 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000693 return (T *)allocateMemoryPages(
694 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500695 }
696
697 void deallocate(T *p, size_type n)
698 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800699 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500700 }
701};
702
703class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
704{
705 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
706 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
707
708public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000709 ELFMemoryStreamer()
710 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500711 {
712 position = 0;
713 buffer.reserve(0x1000);
714 }
715
716 ~ELFMemoryStreamer() override
717 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500718 }
719
720 void write8(uint8_t Value) override
721 {
722 if(position == (uint64_t)buffer.size())
723 {
724 buffer.push_back(Value);
725 position++;
726 }
727 else if(position < (uint64_t)buffer.size())
728 {
729 buffer[position] = Value;
730 position++;
731 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000732 else
733 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500734 }
735
736 void writeBytes(llvm::StringRef Bytes) override
737 {
738 std::size_t oldSize = buffer.size();
739 buffer.resize(oldSize + Bytes.size());
740 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
741 position += Bytes.size();
742 }
743
744 uint64_t tell() const override { return position; }
745
746 void seek(uint64_t Off) override { position = Off; }
747
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500748 const void *getEntryByName(const char *name)
Nicolas Capens157ba262019-12-10 17:49:14 -0500749 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500750 size_t codeSize = 0;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500751 const void *entry = loadImage(&buffer[0], codeSize, name);
Nicolas Capens157ba262019-12-10 17:49:14 -0500752
753#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500754 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
755#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000756 __builtin___clear_cache((char *)entry, (char *)entry + codeSize);
Nicolas Capens157ba262019-12-10 17:49:14 -0500757#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500758
Nicolas Capens598f8d82016-09-26 15:09:10 -0400759 return entry;
760 }
761
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500762 void finalize()
763 {
764 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
765
766 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
767 }
768
Ben Clayton713b8d32019-12-17 20:37:56 +0000769 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400770 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500771 ASSERT(func);
772 funcs[index] = func;
773 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400774
Nicolas Capens157ba262019-12-10 17:49:14 -0500775 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400776 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500777 ASSERT(funcs[index]);
778 return funcs[index];
779 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400780
Antonio Maiorano02a39532020-01-21 15:15:34 -0500781 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500782 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500783 // TODO(b/148086935): Replace with a buffer allocator.
784 size_t space = size + alignment;
785 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
786 void *ptr = buf.get();
787 void *alignedPtr = std::align(alignment, size, ptr, space);
788 ASSERT(alignedPtr);
789 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500790 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500791 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500792 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400793
Nicolas Capens157ba262019-12-10 17:49:14 -0500794private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000795 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500796 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
797 std::size_t position;
798 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500799};
800
801Nucleus::Nucleus()
802{
Ben Clayton713b8d32019-12-17 20:37:56 +0000803 ::codegenMutex.lock(); // Reactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500804
805 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
806 Ice::ClFlags::getParsedClFlags(Flags);
807
Ben Clayton713b8d32019-12-17 20:37:56 +0000808#if defined(__arm__)
809 Flags.setTargetArch(Ice::Target_ARM32);
810 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
811#elif defined(__mips__)
812 Flags.setTargetArch(Ice::Target_MIPS32);
813 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
814#else // x86
815 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
816 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
817#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500818 Flags.setOutFileType(Ice::FT_Elf);
819 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
820 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
821 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
822 Flags.setDisableHybridAssembly(true);
823
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500824 // Emit functions into separate sections in the ELF so we can find them by name
825 Flags.setFunctionSections(true);
826
Nicolas Capens157ba262019-12-10 17:49:14 -0500827 static llvm::raw_os_ostream cout(std::cout);
828 static llvm::raw_os_ostream cerr(std::cerr);
829
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500830 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500831 {
832 // Decorate text asm with liveness info
833 Flags.setDecorateAsm(true);
834 }
835
Ben Clayton713b8d32019-12-17 20:37:56 +0000836 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500837 {
838 std::error_code errorCode;
839 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
840 ::elfFile = new Ice::ELFFileStreamer(*out);
841 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
842 }
843 else
844 {
845 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
846 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
847 ::routine = elfMemory;
848 }
849}
850
851Nucleus::~Nucleus()
852{
853 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500854 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500855
856 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500857 ::allocator = nullptr;
858
Nicolas Capens157ba262019-12-10 17:49:14 -0500859 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500860 ::function = nullptr;
861
Nicolas Capens157ba262019-12-10 17:49:14 -0500862 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500863 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500864
865 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500866 ::elfFile = nullptr;
867
Nicolas Capens157ba262019-12-10 17:49:14 -0500868 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500869 ::out = nullptr;
870
871 ::basicBlock = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500872
873 ::codegenMutex.unlock();
874}
875
876void Nucleus::setDefaultConfig(const Config &cfg)
877{
878 std::unique_lock<std::mutex> lock(::defaultConfigLock);
879 ::defaultConfig() = cfg;
880}
881
882void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
883{
884 std::unique_lock<std::mutex> lock(::defaultConfigLock);
885 auto &config = ::defaultConfig();
886 config = cfgEdit.apply(config);
887}
888
889Config Nucleus::getDefaultConfig()
890{
891 std::unique_lock<std::mutex> lock(::defaultConfigLock);
892 return ::defaultConfig();
893}
894
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500895// This function lowers and produces executable binary code in memory for the input functions,
896// and returns a Routine with the entry points to these functions.
897template<size_t Count>
898static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500899{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500900 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
901 // and GlobalContext::emitItems.
902
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500903 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500904 {
905 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500906 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500907 }
908
909 ::context->emitFileHeader();
910
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500911 // Translate
912
913 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500914 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500915 Ice::Cfg *currFunc = functions[i];
916
917 // Install function allocator in TLS for Cfg-specific container allocators
918 Ice::CfgLocalAllocatorScope allocScope(currFunc);
919
920 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
921
922 rr::optimize(currFunc);
923
924 currFunc->computeInOutEdges();
925 ASSERT(!currFunc->hasError());
926
927 currFunc->translate();
928 ASSERT(!currFunc->hasError());
929
930 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
931
932 if(subzeroEmitTextAsm)
933 {
934 currFunc->emit();
935 }
936
937 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -0500938 }
939
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500940 // Emit items
941
942 ::context->lowerGlobals("");
943
Nicolas Capens157ba262019-12-10 17:49:14 -0500944 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500945
946 for(size_t i = 0; i < Count; ++i)
947 {
948 Ice::Cfg *currFunc = functions[i];
949
950 // Accumulate globals from functions to emit into the "last" section at the end
951 auto globals = currFunc->getGlobalInits();
952 if(globals && !globals->empty())
953 {
954 ::context->getGlobals()->merge(globals.get());
955 }
956
957 auto assembler = currFunc->releaseAssembler();
958 assembler->alignFunction();
959 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
960 }
961
Nicolas Capens157ba262019-12-10 17:49:14 -0500962 ::context->lowerGlobals("last");
963 ::context->lowerConstants();
964 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500965
Nicolas Capens157ba262019-12-10 17:49:14 -0500966 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500967 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -0500968 objectWriter->writeNonUserSections();
969
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500970 // Done compiling functions, get entry pointers to each of them
971 for(size_t i = 0; i < Count; ++i)
972 {
973 const void *entry = ::routine->getEntryByName(names[i]);
974 ::routine->setEntry(i, entry);
975 }
976
977 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -0500978
979 Routine *handoffRoutine = ::routine;
980 ::routine = nullptr;
981
982 return std::shared_ptr<Routine>(handoffRoutine);
983}
984
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500985std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
986{
987 createRetVoidIfNoRet();
988 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
989}
990
Nicolas Capens157ba262019-12-10 17:49:14 -0500991Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
992{
993 Ice::Type type = T(t);
994 int typeSize = Ice::typeWidthInBytes(type);
995 int totalSize = typeSize * (arraySize ? arraySize : 1);
996
997 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
998 auto address = ::function->makeVariable(T(getPointerType(t)));
999 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1000 ::function->getEntryNode()->getInsts().push_front(alloca);
1001
1002 return V(address);
1003}
1004
1005BasicBlock *Nucleus::createBasicBlock()
1006{
1007 return B(::function->makeNode());
1008}
1009
1010BasicBlock *Nucleus::getInsertBlock()
1011{
1012 return B(::basicBlock);
1013}
1014
1015void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1016{
Ben Clayton713b8d32019-12-17 20:37:56 +00001017 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001018
1019 Variable::materializeAll();
1020
1021 ::basicBlock = basicBlock;
1022}
1023
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001024void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001025{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001026 ASSERT(::function == nullptr);
1027 ASSERT(::allocator == nullptr);
1028 ASSERT(::basicBlock == nullptr);
1029
1030 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1031
1032 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1033 // becomes invalid if another one is created; for example, when creating await and destroy functions
1034 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1035 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001036 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1037
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001038 ::basicBlock = ::function->getEntryNode();
Nicolas Capens157ba262019-12-10 17:49:14 -05001039}
1040
1041Value *Nucleus::getArgument(unsigned int index)
1042{
1043 return V(::function->getArgs()[index]);
1044}
1045
1046void Nucleus::createRetVoid()
1047{
1048 // Code generated after this point is unreachable, so any variables
1049 // being read can safely return an undefined value. We have to avoid
1050 // materializing variables after the terminator ret instruction.
1051 Variable::killUnmaterialized();
1052
1053 Ice::InstRet *ret = Ice::InstRet::create(::function);
1054 ::basicBlock->appendInst(ret);
1055}
1056
1057void Nucleus::createRet(Value *v)
1058{
1059 // Code generated after this point is unreachable, so any variables
1060 // being read can safely return an undefined value. We have to avoid
1061 // materializing variables after the terminator ret instruction.
1062 Variable::killUnmaterialized();
1063
1064 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1065 ::basicBlock->appendInst(ret);
1066}
1067
1068void Nucleus::createBr(BasicBlock *dest)
1069{
1070 Variable::materializeAll();
1071
1072 auto br = Ice::InstBr::create(::function, dest);
1073 ::basicBlock->appendInst(br);
1074}
1075
1076void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1077{
1078 Variable::materializeAll();
1079
1080 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1081 ::basicBlock->appendInst(br);
1082}
1083
1084static bool isCommutative(Ice::InstArithmetic::OpKind op)
1085{
1086 switch(op)
1087 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001088 case Ice::InstArithmetic::Add:
1089 case Ice::InstArithmetic::Fadd:
1090 case Ice::InstArithmetic::Mul:
1091 case Ice::InstArithmetic::Fmul:
1092 case Ice::InstArithmetic::And:
1093 case Ice::InstArithmetic::Or:
1094 case Ice::InstArithmetic::Xor:
1095 return true;
1096 default:
1097 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001098 }
1099}
1100
1101static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1102{
1103 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1104
1105 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1106
1107 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1108 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1109 ::basicBlock->appendInst(arithmetic);
1110
1111 return V(result);
1112}
1113
1114Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1115{
1116 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1117}
1118
1119Value *Nucleus::createSub(Value *lhs, Value *rhs)
1120{
1121 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1122}
1123
1124Value *Nucleus::createMul(Value *lhs, Value *rhs)
1125{
1126 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1127}
1128
1129Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1130{
1131 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1132}
1133
1134Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1135{
1136 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1137}
1138
1139Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1140{
1141 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1142}
1143
1144Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1145{
1146 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1147}
1148
1149Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1150{
1151 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1152}
1153
1154Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1155{
1156 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1157}
1158
1159Value *Nucleus::createURem(Value *lhs, Value *rhs)
1160{
1161 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1162}
1163
1164Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1165{
1166 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1167}
1168
1169Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1170{
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001171 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1172 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001173 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001174 return nullptr;
1175}
1176
1177RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1178{
1179 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001180}
1181
1182Value *Nucleus::createShl(Value *lhs, Value *rhs)
1183{
1184 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1185}
1186
1187Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1188{
1189 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1190}
1191
1192Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1193{
1194 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1195}
1196
1197Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1198{
1199 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1200}
1201
1202Value *Nucleus::createOr(Value *lhs, Value *rhs)
1203{
1204 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1205}
1206
1207Value *Nucleus::createXor(Value *lhs, Value *rhs)
1208{
1209 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1210}
1211
1212Value *Nucleus::createNeg(Value *v)
1213{
1214 return createSub(createNullValue(T(v->getType())), v);
1215}
1216
1217Value *Nucleus::createFNeg(Value *v)
1218{
Ben Clayton713b8d32019-12-17 20:37:56 +00001219 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1220 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001221
1222 return createFSub(negativeZero, v);
1223}
1224
1225Value *Nucleus::createNot(Value *v)
1226{
1227 if(Ice::isScalarIntegerType(v->getType()))
1228 {
1229 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1230 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001231 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001232 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001233 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001234 return createXor(v, createConstantVector(c, T(v->getType())));
1235 }
1236}
1237
1238Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1239{
Ben Clayton713b8d32019-12-17 20:37:56 +00001240 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -05001241 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
1242
1243 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001244 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001245
Ben Clayton713b8d32019-12-17 20:37:56 +00001246 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001247 {
1248 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001249 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001250 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001251 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001252 auto pointer = RValue<Pointer<Byte>>(ptr);
1253 Int x = *Pointer<Int>(pointer);
1254
1255 Int4 vector;
1256 vector = Insert(vector, x, 0);
1257
Antonio Maiorano02a39532020-01-21 15:15:34 -05001258 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001259 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1260 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001261 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001262 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001263 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001264 auto pointer = RValue<Pointer<Byte>>(ptr);
1265 Int x = *Pointer<Int>(pointer);
1266 Int y = *Pointer<Int>(pointer + 4);
1267
1268 Int4 vector;
1269 vector = Insert(vector, x, 0);
1270 vector = Insert(vector, y, 1);
1271
Antonio Maiorano02a39532020-01-21 15:15:34 -05001272 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001273 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1274 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001275 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001276 else
1277 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001278 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001279 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001280 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001281 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001282 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001283 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001284 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1285 load->addArg(ptr);
1286 load->addArg(::context->getConstantInt32(typeSize(type)));
1287 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001288 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001289 }
1290 else
1291 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001292 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001293 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001294
Antonio Maiorano02a39532020-01-21 15:15:34 -05001295 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001296 return V(result);
1297}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001298
Nicolas Capens157ba262019-12-10 17:49:14 -05001299Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1300{
Ben Clayton713b8d32019-12-17 20:37:56 +00001301 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -05001302 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
Nicolas Capens598f8d82016-09-26 15:09:10 -04001303
Ben Clayton713b8d32019-12-17 20:37:56 +00001304#if __has_feature(memory_sanitizer)
1305 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
1306 if(align != 0)
1307 {
1308 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1309 call->addArg(ptr);
1310 call->addArg(::context->getConstantInt64(typeSize(type)));
1311 ::basicBlock->appendInst(call);
1312 }
1313#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001314
Nicolas Capens157ba262019-12-10 17:49:14 -05001315 int valueType = (int)reinterpret_cast<intptr_t>(type);
1316
Ben Clayton713b8d32019-12-17 20:37:56 +00001317 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001318 {
1319 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001320 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001321 if(typeSize(type) == 4)
1322 {
1323 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1324 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1325 ::basicBlock->appendInst(bitcast);
1326
1327 RValue<Int4> v(V(vector));
1328
1329 auto pointer = RValue<Pointer<Byte>>(ptr);
1330 Int x = Extract(v, 0);
1331 *Pointer<Int>(pointer) = x;
1332 }
1333 else if(typeSize(type) == 8)
1334 {
1335 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1336 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1337 ::basicBlock->appendInst(bitcast);
1338
1339 RValue<Int4> v(V(vector));
1340
1341 auto pointer = RValue<Pointer<Byte>>(ptr);
1342 Int x = Extract(v, 0);
1343 *Pointer<Int>(pointer) = x;
1344 Int y = Extract(v, 1);
1345 *Pointer<Int>(pointer + 4) = y;
1346 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001347 else
1348 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001349 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001350 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001351 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001352 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001353 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1354 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1355 store->addArg(value);
1356 store->addArg(ptr);
1357 store->addArg(::context->getConstantInt32(typeSize(type)));
1358 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001359 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001360 }
1361 else
1362 {
1363 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001364
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001365 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001366 ::basicBlock->appendInst(store);
1367 }
1368
1369 return value;
1370}
1371
1372Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1373{
1374 ASSERT(index->getType() == Ice::IceType_i32);
1375
1376 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1377 {
1378 int32_t offset = constant->getValue() * (int)typeSize(type);
1379
1380 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001381 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001382 return ptr;
1383 }
1384
Nicolas Capens157ba262019-12-10 17:49:14 -05001385 return createAdd(ptr, createConstantInt(offset));
1386 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001387
Nicolas Capens157ba262019-12-10 17:49:14 -05001388 if(!Ice::isByteSizedType(T(type)))
1389 {
1390 index = createMul(index, createConstantInt((int)typeSize(type)));
1391 }
1392
Ben Clayton713b8d32019-12-17 20:37:56 +00001393 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001394 {
1395 if(unsignedIndex)
1396 {
1397 index = createZExt(index, T(Ice::IceType_i64));
1398 }
1399 else
1400 {
1401 index = createSExt(index, T(Ice::IceType_i64));
1402 }
1403 }
1404
1405 return createAdd(ptr, index);
1406}
1407
Antonio Maiorano370cba52019-12-31 11:36:07 -05001408static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1409{
1410 Ice::Variable *result = ::function->makeVariable(value->getType());
1411
1412 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1413 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1414 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1415 auto op = ::context->getConstantInt32(rmwOp);
1416 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1417 inst->addArg(op);
1418 inst->addArg(ptr);
1419 inst->addArg(value);
1420 inst->addArg(order);
1421 ::basicBlock->appendInst(inst);
1422
1423 return V(result);
1424}
1425
Nicolas Capens157ba262019-12-10 17:49:14 -05001426Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1427{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001428 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001429}
1430
1431Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1432{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001433 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001434}
1435
1436Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1437{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001438 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001439}
1440
1441Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1442{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001443 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001444}
1445
1446Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1447{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001448 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001449}
1450
1451Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1452{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001453 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001454}
1455
1456Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1457{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001458 Ice::Variable *result = ::function->makeVariable(value->getType());
1459
1460 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1461 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1462 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1463 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1464 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1465 inst->addArg(ptr);
1466 inst->addArg(compare);
1467 inst->addArg(value);
1468 inst->addArg(orderEq);
1469 inst->addArg(orderNeq);
1470 ::basicBlock->appendInst(inst);
1471
1472 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001473}
1474
1475static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1476{
1477 if(v->getType() == T(destType))
1478 {
1479 return v;
1480 }
1481
1482 Ice::Variable *result = ::function->makeVariable(T(destType));
1483 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1484 ::basicBlock->appendInst(cast);
1485
1486 return V(result);
1487}
1488
1489Value *Nucleus::createTrunc(Value *v, Type *destType)
1490{
1491 return createCast(Ice::InstCast::Trunc, v, destType);
1492}
1493
1494Value *Nucleus::createZExt(Value *v, Type *destType)
1495{
1496 return createCast(Ice::InstCast::Zext, v, destType);
1497}
1498
1499Value *Nucleus::createSExt(Value *v, Type *destType)
1500{
1501 return createCast(Ice::InstCast::Sext, v, destType);
1502}
1503
1504Value *Nucleus::createFPToUI(Value *v, Type *destType)
1505{
1506 return createCast(Ice::InstCast::Fptoui, v, destType);
1507}
1508
1509Value *Nucleus::createFPToSI(Value *v, Type *destType)
1510{
1511 return createCast(Ice::InstCast::Fptosi, v, destType);
1512}
1513
1514Value *Nucleus::createSIToFP(Value *v, Type *destType)
1515{
1516 return createCast(Ice::InstCast::Sitofp, v, destType);
1517}
1518
1519Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1520{
1521 return createCast(Ice::InstCast::Fptrunc, v, destType);
1522}
1523
1524Value *Nucleus::createFPExt(Value *v, Type *destType)
1525{
1526 return createCast(Ice::InstCast::Fpext, v, destType);
1527}
1528
1529Value *Nucleus::createBitCast(Value *v, Type *destType)
1530{
1531 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1532 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1533 // emulate them by writing to the stack and reading back as the destination type.
1534 if(emulateMismatchedBitCast)
1535 {
1536 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1537 {
1538 Value *address = allocateStackVariable(destType);
1539 createStore(v, address, T(v->getType()));
1540 return createLoad(address, destType);
1541 }
1542 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1543 {
1544 Value *address = allocateStackVariable(T(v->getType()));
1545 createStore(v, address, T(v->getType()));
1546 return createLoad(address, destType);
1547 }
1548 }
1549
1550 return createCast(Ice::InstCast::Bitcast, v, destType);
1551}
1552
1553static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1554{
1555 ASSERT(lhs->getType() == rhs->getType());
1556
1557 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1558 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1559 ::basicBlock->appendInst(cmp);
1560
1561 return V(result);
1562}
1563
1564Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1565{
1566 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1567}
1568
1569Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1570{
1571 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1572}
1573
1574Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1575{
1576 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1577}
1578
1579Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1580{
1581 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1582}
1583
1584Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1585{
1586 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1587}
1588
1589Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1590{
1591 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1592}
1593
1594Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1595{
1596 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1597}
1598
1599Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1600{
1601 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1602}
1603
1604Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1605{
1606 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1607}
1608
1609Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1610{
1611 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1612}
1613
1614Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1615{
1616 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1617}
1618
1619static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1620{
1621 ASSERT(lhs->getType() == rhs->getType());
1622 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1623
1624 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1625 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1626 ::basicBlock->appendInst(cmp);
1627
1628 return V(result);
1629}
1630
1631Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1632{
1633 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1634}
1635
1636Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1637{
1638 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1639}
1640
1641Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1642{
1643 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1644}
1645
1646Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1647{
1648 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1649}
1650
1651Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1652{
1653 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1654}
1655
1656Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1657{
1658 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1659}
1660
1661Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1662{
1663 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1664}
1665
1666Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1667{
1668 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1669}
1670
1671Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1672{
1673 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1674}
1675
1676Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1677{
1678 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1679}
1680
1681Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1682{
1683 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1684}
1685
1686Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1687{
1688 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1689}
1690
1691Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1692{
1693 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1694}
1695
1696Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1697{
1698 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1699}
1700
1701Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1702{
1703 auto result = ::function->makeVariable(T(type));
1704 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1705 ::basicBlock->appendInst(extract);
1706
1707 return V(result);
1708}
1709
1710Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1711{
1712 auto result = ::function->makeVariable(vector->getType());
1713 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1714 ::basicBlock->appendInst(insert);
1715
1716 return V(result);
1717}
1718
1719Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1720{
1721 ASSERT(V1->getType() == V2->getType());
1722
1723 int size = Ice::typeNumElements(V1->getType());
1724 auto result = ::function->makeVariable(V1->getType());
1725 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1726
1727 for(int i = 0; i < size; i++)
1728 {
1729 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1730 }
1731
1732 ::basicBlock->appendInst(shuffle);
1733
1734 return V(result);
1735}
1736
1737Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1738{
1739 ASSERT(ifTrue->getType() == ifFalse->getType());
1740
1741 auto result = ::function->makeVariable(ifTrue->getType());
1742 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1743 ::basicBlock->appendInst(select);
1744
1745 return V(result);
1746}
1747
1748SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1749{
1750 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1751 ::basicBlock->appendInst(switchInst);
1752
Ben Clayton713b8d32019-12-17 20:37:56 +00001753 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001754}
1755
1756void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1757{
1758 switchCases->addBranch(label, label, branch);
1759}
1760
1761void Nucleus::createUnreachable()
1762{
1763 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1764 ::basicBlock->appendInst(unreachable);
1765}
1766
1767Type *Nucleus::getPointerType(Type *ElementType)
1768{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001769 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001770}
1771
1772Value *Nucleus::createNullValue(Type *Ty)
1773{
1774 if(Ice::isVectorType(T(Ty)))
1775 {
1776 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001777 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001778 return createConstantVector(c, Ty);
1779 }
1780 else
1781 {
1782 return V(::context->getConstantZero(T(Ty)));
1783 }
1784}
1785
1786Value *Nucleus::createConstantLong(int64_t i)
1787{
1788 return V(::context->getConstantInt64(i));
1789}
1790
1791Value *Nucleus::createConstantInt(int i)
1792{
1793 return V(::context->getConstantInt32(i));
1794}
1795
1796Value *Nucleus::createConstantInt(unsigned int i)
1797{
1798 return V(::context->getConstantInt32(i));
1799}
1800
1801Value *Nucleus::createConstantBool(bool b)
1802{
1803 return V(::context->getConstantInt1(b));
1804}
1805
1806Value *Nucleus::createConstantByte(signed char i)
1807{
1808 return V(::context->getConstantInt8(i));
1809}
1810
1811Value *Nucleus::createConstantByte(unsigned char i)
1812{
1813 return V(::context->getConstantInt8(i));
1814}
1815
1816Value *Nucleus::createConstantShort(short i)
1817{
1818 return V(::context->getConstantInt16(i));
1819}
1820
1821Value *Nucleus::createConstantShort(unsigned short i)
1822{
1823 return V(::context->getConstantInt16(i));
1824}
1825
1826Value *Nucleus::createConstantFloat(float x)
1827{
1828 return V(::context->getConstantFloat(x));
1829}
1830
1831Value *Nucleus::createNullPointer(Type *Ty)
1832{
Ben Clayton713b8d32019-12-17 20:37:56 +00001833 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05001834}
1835
Antonio Maiorano02a39532020-01-21 15:15:34 -05001836static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
1837{
1838 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
1839}
1840
Nicolas Capens157ba262019-12-10 17:49:14 -05001841Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1842{
1843 const int vectorSize = 16;
1844 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
1845 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05001846
1847 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00001848 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001849
1850 // TODO(148082873): Fix global variable constants when generating multiple functions
1851 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001852
1853 switch((int)reinterpret_cast<intptr_t>(type))
1854 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001855 case Ice::IceType_v4i32:
1856 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001857 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001858 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001859 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001860 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001861 }
1862 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001863 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001864 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001865 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001866 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001867 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001868 }
1869 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001870 case Ice::IceType_v8i16:
1871 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001872 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001873 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001874 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001875 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001876 }
1877 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001878 case Ice::IceType_v16i8:
1879 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001880 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001881 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001882 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001883 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001884 }
1885 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001886 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001887 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001888 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001889 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001890 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001891 }
1892 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001893 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001894 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001895 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001896 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001897 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001898 }
1899 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001900 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05001901 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001902 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001903 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001904 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001905 }
1906 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001907 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05001908 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001909 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001910 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001911 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001912 }
1913 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001914 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05001915 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001916 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001917 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001918 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001919 }
1920 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001921 default:
1922 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001923 }
1924
Antonio Maiorano02a39532020-01-21 15:15:34 -05001925 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05001926
Antonio Maiorano02a39532020-01-21 15:15:34 -05001927 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001928 return V(result);
1929}
1930
1931Value *Nucleus::createConstantVector(const double *constants, Type *type)
1932{
Ben Clayton713b8d32019-12-17 20:37:56 +00001933 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05001934}
1935
1936Type *Void::getType()
1937{
1938 return T(Ice::IceType_void);
1939}
1940
1941Type *Bool::getType()
1942{
1943 return T(Ice::IceType_i1);
1944}
1945
1946Type *Byte::getType()
1947{
1948 return T(Ice::IceType_i8);
1949}
1950
1951Type *SByte::getType()
1952{
1953 return T(Ice::IceType_i8);
1954}
1955
1956Type *Short::getType()
1957{
1958 return T(Ice::IceType_i16);
1959}
1960
1961Type *UShort::getType()
1962{
1963 return T(Ice::IceType_i16);
1964}
1965
1966Type *Byte4::getType()
1967{
1968 return T(Type_v4i8);
1969}
1970
1971Type *SByte4::getType()
1972{
1973 return T(Type_v4i8);
1974}
1975
Ben Clayton713b8d32019-12-17 20:37:56 +00001976namespace {
1977RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05001978{
Ben Clayton713b8d32019-12-17 20:37:56 +00001979 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05001980}
1981
Ben Clayton713b8d32019-12-17 20:37:56 +00001982RValue<Byte> Extract(RValue<Byte8> val, int i)
1983{
1984 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
1985}
1986
1987RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
1988{
1989 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
1990}
1991} // namespace
1992
Nicolas Capens157ba262019-12-10 17:49:14 -05001993RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
1994{
1995 if(emulateIntrinsics)
1996 {
1997 Byte8 result;
1998 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1999 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2000 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2001 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2002 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2003 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2004 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2005 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2006
2007 return result;
2008 }
2009 else
2010 {
2011 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002012 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002013 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2014 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2015 paddusb->addArg(x.value);
2016 paddusb->addArg(y.value);
2017 ::basicBlock->appendInst(paddusb);
2018
2019 return RValue<Byte8>(V(result));
2020 }
2021}
2022
2023RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2024{
2025 if(emulateIntrinsics)
2026 {
2027 Byte8 result;
2028 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2029 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2030 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2031 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2032 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2033 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2034 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2035 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2036
2037 return result;
2038 }
2039 else
2040 {
2041 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002042 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002043 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2044 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2045 psubusw->addArg(x.value);
2046 psubusw->addArg(y.value);
2047 ::basicBlock->appendInst(psubusw);
2048
2049 return RValue<Byte8>(V(result));
2050 }
2051}
2052
2053RValue<SByte> Extract(RValue<SByte8> val, int i)
2054{
2055 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
2056}
2057
2058RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2059{
2060 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2061}
2062
2063RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2064{
2065 if(emulateIntrinsics)
2066 {
2067 SByte8 result;
2068 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2069 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2070 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2071 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2072 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2073 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2074 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2075 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2076
2077 return result;
2078 }
2079 else
2080 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002081#if defined(__i386__) || defined(__x86_64__)
2082 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2083 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2084 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002085
Ben Clayton713b8d32019-12-17 20:37:56 +00002086 return As<SByte8>(hi | lo);
2087#else
2088 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2089#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002090 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002091}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002092
Nicolas Capens157ba262019-12-10 17:49:14 -05002093RValue<Int> SignMask(RValue<Byte8> x)
2094{
2095 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002096 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002097 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2098 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002099 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002100 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002101 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002102 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002103 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002104 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2105 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2106 movmsk->addArg(x.value);
2107 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002108
Nicolas Capens157ba262019-12-10 17:49:14 -05002109 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002110 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002111}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002112
2113// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2114// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05002115// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002116// }
2117
Nicolas Capens157ba262019-12-10 17:49:14 -05002118RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2119{
2120 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2121}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002122
Nicolas Capens157ba262019-12-10 17:49:14 -05002123Type *Byte8::getType()
2124{
2125 return T(Type_v8i8);
2126}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002127
Nicolas Capens598f8d82016-09-26 15:09:10 -04002128// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2129// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002130// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002131// }
2132
2133// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2134// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002135// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002136// }
2137
Nicolas Capens157ba262019-12-10 17:49:14 -05002138RValue<SByte> SaturateSigned(RValue<Short> x)
2139{
2140 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2141}
2142
2143RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2144{
2145 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002146 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002147 SByte8 result;
2148 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2149 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2150 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2151 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2152 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2153 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2154 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2155 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002156
Nicolas Capens157ba262019-12-10 17:49:14 -05002157 return result;
2158 }
2159 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002160 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002161 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002162 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002163 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2164 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2165 paddsb->addArg(x.value);
2166 paddsb->addArg(y.value);
2167 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002168
Nicolas Capens157ba262019-12-10 17:49:14 -05002169 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002170 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002171}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002172
Nicolas Capens157ba262019-12-10 17:49:14 -05002173RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2174{
2175 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002176 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002177 SByte8 result;
2178 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2179 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2180 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2181 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2182 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2183 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2184 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2185 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002186
Nicolas Capens157ba262019-12-10 17:49:14 -05002187 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002188 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002189 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002190 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002191 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002192 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002193 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2194 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2195 psubsb->addArg(x.value);
2196 psubsb->addArg(y.value);
2197 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002198
Nicolas Capens157ba262019-12-10 17:49:14 -05002199 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002200 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002201}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002202
Nicolas Capens157ba262019-12-10 17:49:14 -05002203RValue<Int> SignMask(RValue<SByte8> x)
2204{
2205 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002206 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002207 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2208 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002209 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002210 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002211 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002212 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002213 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002214 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2215 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2216 movmsk->addArg(x.value);
2217 ::basicBlock->appendInst(movmsk);
2218
2219 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002220 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002221}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002222
Nicolas Capens157ba262019-12-10 17:49:14 -05002223RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2224{
2225 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2226}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002227
Nicolas Capens157ba262019-12-10 17:49:14 -05002228RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2229{
2230 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2231}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002232
Nicolas Capens157ba262019-12-10 17:49:14 -05002233Type *SByte8::getType()
2234{
2235 return T(Type_v8i8);
2236}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002237
Nicolas Capens157ba262019-12-10 17:49:14 -05002238Type *Byte16::getType()
2239{
2240 return T(Ice::IceType_v16i8);
2241}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002242
Nicolas Capens157ba262019-12-10 17:49:14 -05002243Type *SByte16::getType()
2244{
2245 return T(Ice::IceType_v16i8);
2246}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002247
Nicolas Capens157ba262019-12-10 17:49:14 -05002248Type *Short2::getType()
2249{
2250 return T(Type_v2i16);
2251}
Nicolas Capensd4227962016-11-09 14:24:25 -05002252
Nicolas Capens157ba262019-12-10 17:49:14 -05002253Type *UShort2::getType()
2254{
2255 return T(Type_v2i16);
2256}
Nicolas Capensd4227962016-11-09 14:24:25 -05002257
Nicolas Capens157ba262019-12-10 17:49:14 -05002258Short4::Short4(RValue<Int4> cast)
2259{
Ben Clayton713b8d32019-12-17 20:37:56 +00002260 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002261 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2262 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2263
2264 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
2265 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2266
2267 storeValue(short4);
2268}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002269
2270// Short4::Short4(RValue<Float> cast)
2271// {
2272// }
2273
Nicolas Capens157ba262019-12-10 17:49:14 -05002274Short4::Short4(RValue<Float4> cast)
2275{
Ben Claytonce54c592020-02-07 11:30:51 +00002276 UNIMPLEMENTED_NO_BUG("Short4::Short4(RValue<Float4> cast)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002277}
2278
2279RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2280{
2281 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002282 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002283 Short4 result;
2284 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2285 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2286 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2287 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002288
2289 return result;
2290 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002291 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002292 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002293 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2294 }
2295}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002296
Nicolas Capens157ba262019-12-10 17:49:14 -05002297RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2298{
2299 if(emulateIntrinsics)
2300 {
2301 Short4 result;
2302 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2303 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2304 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2305 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2306
2307 return result;
2308 }
2309 else
2310 {
2311 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2312 }
2313}
2314
2315RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2316{
2317 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2318 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2319 ::basicBlock->appendInst(cmp);
2320
2321 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2322 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2323 ::basicBlock->appendInst(select);
2324
2325 return RValue<Short4>(V(result));
2326}
2327
2328RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2329{
2330 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2331 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2332 ::basicBlock->appendInst(cmp);
2333
2334 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2335 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2336 ::basicBlock->appendInst(select);
2337
2338 return RValue<Short4>(V(result));
2339}
2340
2341RValue<Short> SaturateSigned(RValue<Int> x)
2342{
2343 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2344}
2345
2346RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2347{
2348 if(emulateIntrinsics)
2349 {
2350 Short4 result;
2351 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2352 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2353 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2354 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2355
2356 return result;
2357 }
2358 else
2359 {
2360 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002361 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002362 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2363 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2364 paddsw->addArg(x.value);
2365 paddsw->addArg(y.value);
2366 ::basicBlock->appendInst(paddsw);
2367
2368 return RValue<Short4>(V(result));
2369 }
2370}
2371
2372RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2373{
2374 if(emulateIntrinsics)
2375 {
2376 Short4 result;
2377 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2378 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2379 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2380 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2381
2382 return result;
2383 }
2384 else
2385 {
2386 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002387 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002388 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2389 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2390 psubsw->addArg(x.value);
2391 psubsw->addArg(y.value);
2392 ::basicBlock->appendInst(psubsw);
2393
2394 return RValue<Short4>(V(result));
2395 }
2396}
2397
2398RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2399{
2400 if(emulateIntrinsics)
2401 {
2402 Short4 result;
2403 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2404 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2405 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2406 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2407
2408 return result;
2409 }
2410 else
2411 {
2412 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002413 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002414 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2415 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2416 pmulhw->addArg(x.value);
2417 pmulhw->addArg(y.value);
2418 ::basicBlock->appendInst(pmulhw);
2419
2420 return RValue<Short4>(V(result));
2421 }
2422}
2423
2424RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2425{
2426 if(emulateIntrinsics)
2427 {
2428 Int2 result;
2429 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2430 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2431
2432 return result;
2433 }
2434 else
2435 {
2436 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002437 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002438 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2439 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2440 pmaddwd->addArg(x.value);
2441 pmaddwd->addArg(y.value);
2442 ::basicBlock->appendInst(pmaddwd);
2443
2444 return As<Int2>(V(result));
2445 }
2446}
2447
2448RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2449{
2450 if(emulateIntrinsics)
2451 {
2452 SByte8 result;
2453 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2454 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2455 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2456 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2457 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2458 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2459 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2460 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2461
2462 return result;
2463 }
2464 else
2465 {
2466 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002467 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002468 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2469 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2470 pack->addArg(x.value);
2471 pack->addArg(y.value);
2472 ::basicBlock->appendInst(pack);
2473
2474 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2475 }
2476}
2477
2478RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2479{
2480 if(emulateIntrinsics)
2481 {
2482 Byte8 result;
2483 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2484 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2485 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2486 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2487 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2488 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2489 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2490 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2491
2492 return result;
2493 }
2494 else
2495 {
2496 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002497 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002498 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2499 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2500 pack->addArg(x.value);
2501 pack->addArg(y.value);
2502 ::basicBlock->appendInst(pack);
2503
2504 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2505 }
2506}
2507
2508RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2509{
2510 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2511}
2512
2513RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2514{
2515 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2516}
2517
2518Type *Short4::getType()
2519{
2520 return T(Type_v4i16);
2521}
2522
2523UShort4::UShort4(RValue<Float4> cast, bool saturate)
2524{
2525 if(saturate)
2526 {
2527 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002528 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002529 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2530 // PackUnsigned takes care of 0x0000 saturation.
2531 Int4 int4(Min(cast, Float4(0xFFFF)));
2532 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002533 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002534 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002535 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002536 // ARM saturates the 32-bit integer result on overflow/undeflow.
2537 Int4 int4(cast);
2538 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002539 }
2540 else
2541 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002542 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002543 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002544 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002545 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002546 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002547 *this = Short4(Int4(cast));
2548 }
2549}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002550
Nicolas Capens157ba262019-12-10 17:49:14 -05002551RValue<UShort> Extract(RValue<UShort4> val, int i)
2552{
2553 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2554}
2555
2556RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2557{
2558 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2559}
2560
2561RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2562{
2563 if(emulateIntrinsics)
2564 {
2565 UShort4 result;
2566 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2567 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2568 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2569 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2570
2571 return result;
2572 }
2573 else
2574 {
2575 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2576 }
2577}
2578
2579RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2580{
2581 if(emulateIntrinsics)
2582 {
2583 UShort4 result;
2584 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2585 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2586 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2587 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2588
2589 return result;
2590 }
2591 else
2592 {
2593 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2594 }
2595}
2596
2597RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2598{
2599 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2600 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2601 ::basicBlock->appendInst(cmp);
2602
2603 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2604 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2605 ::basicBlock->appendInst(select);
2606
2607 return RValue<UShort4>(V(result));
2608}
2609
2610RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2611{
2612 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2613 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2614 ::basicBlock->appendInst(cmp);
2615
2616 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2617 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2618 ::basicBlock->appendInst(select);
2619
2620 return RValue<UShort4>(V(result));
2621}
2622
2623RValue<UShort> SaturateUnsigned(RValue<Int> x)
2624{
2625 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2626}
2627
2628RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2629{
2630 if(emulateIntrinsics)
2631 {
2632 UShort4 result;
2633 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2634 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2635 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2636 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2637
2638 return result;
2639 }
2640 else
2641 {
2642 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002643 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002644 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2645 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2646 paddusw->addArg(x.value);
2647 paddusw->addArg(y.value);
2648 ::basicBlock->appendInst(paddusw);
2649
2650 return RValue<UShort4>(V(result));
2651 }
2652}
2653
2654RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2655{
2656 if(emulateIntrinsics)
2657 {
2658 UShort4 result;
2659 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2660 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2661 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2662 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2663
2664 return result;
2665 }
2666 else
2667 {
2668 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002669 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002670 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2671 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2672 psubusw->addArg(x.value);
2673 psubusw->addArg(y.value);
2674 ::basicBlock->appendInst(psubusw);
2675
2676 return RValue<UShort4>(V(result));
2677 }
2678}
2679
2680RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2681{
2682 if(emulateIntrinsics)
2683 {
2684 UShort4 result;
2685 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2686 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2687 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2688 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2689
2690 return result;
2691 }
2692 else
2693 {
2694 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002695 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002696 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2697 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2698 pmulhuw->addArg(x.value);
2699 pmulhuw->addArg(y.value);
2700 ::basicBlock->appendInst(pmulhuw);
2701
2702 return RValue<UShort4>(V(result));
2703 }
2704}
2705
2706RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2707{
2708 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2709
2710 // Scalarized implementation.
2711 Int4 result;
2712 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2713 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2714 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2715 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2716
2717 return result;
2718}
2719
2720RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2721{
2722 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2723
2724 if(false) // Partial product based implementation.
2725 {
2726 auto xh = x >> 16;
2727 auto yh = y >> 16;
2728 auto xl = x & UInt4(0x0000FFFF);
2729 auto yl = y & UInt4(0x0000FFFF);
2730 auto xlyh = xl * yh;
2731 auto xhyl = xh * yl;
2732 auto xlyhh = xlyh >> 16;
2733 auto xhylh = xhyl >> 16;
2734 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2735 auto xhyll = xhyl & UInt4(0x0000FFFF);
2736 auto xlylh = (xl * yl) >> 16;
2737 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2738
2739 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002740 }
2741
Nicolas Capens157ba262019-12-10 17:49:14 -05002742 // Scalarized implementation.
2743 Int4 result;
2744 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
2745 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
2746 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
2747 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
2748
2749 return As<UInt4>(result);
2750}
2751
2752RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2753{
Ben Claytonce54c592020-02-07 11:30:51 +00002754 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002755 return UShort4(0);
2756}
2757
2758Type *UShort4::getType()
2759{
2760 return T(Type_v4i16);
2761}
2762
2763RValue<Short> Extract(RValue<Short8> val, int i)
2764{
2765 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2766}
2767
2768RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2769{
2770 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2771}
2772
2773RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2774{
2775 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002776 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002777 Short8 result;
2778 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2779 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2780 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2781 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
2782 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
2783 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
2784 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
2785 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002786
Nicolas Capens157ba262019-12-10 17:49:14 -05002787 return result;
2788 }
2789 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002790 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002791 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002792 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002793}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002794
Nicolas Capens157ba262019-12-10 17:49:14 -05002795RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
2796{
2797 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002798 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002799 Short8 result;
2800 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2801 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2802 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2803 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2804 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
2805 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
2806 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
2807 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002808
Nicolas Capens157ba262019-12-10 17:49:14 -05002809 return result;
2810 }
2811 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002812 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002813 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002814 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002815}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002816
Nicolas Capens157ba262019-12-10 17:49:14 -05002817RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
2818{
Ben Claytonce54c592020-02-07 11:30:51 +00002819 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002820 return Int4(0);
2821}
2822
2823RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
2824{
Ben Claytonce54c592020-02-07 11:30:51 +00002825 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002826 return Short8(0);
2827}
2828
2829Type *Short8::getType()
2830{
2831 return T(Ice::IceType_v8i16);
2832}
2833
2834RValue<UShort> Extract(RValue<UShort8> val, int i)
2835{
2836 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2837}
2838
2839RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
2840{
2841 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
2842}
2843
2844RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
2845{
2846 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002847 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002848 UShort8 result;
2849 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2850 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2851 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2852 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2853 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
2854 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
2855 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
2856 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002857
Nicolas Capens157ba262019-12-10 17:49:14 -05002858 return result;
2859 }
2860 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002861 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002862 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002863 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002864}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002865
Nicolas Capens157ba262019-12-10 17:49:14 -05002866RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
2867{
2868 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002869 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002870 UShort8 result;
2871 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2872 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2873 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2874 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2875 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
2876 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
2877 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
2878 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002879
Nicolas Capens157ba262019-12-10 17:49:14 -05002880 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002881 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002882 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002883 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002884 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002885 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002886}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002887
Nicolas Capens157ba262019-12-10 17:49:14 -05002888RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
2889{
Ben Claytonce54c592020-02-07 11:30:51 +00002890 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002891 return UShort8(0);
2892}
2893
Nicolas Capens157ba262019-12-10 17:49:14 -05002894Type *UShort8::getType()
2895{
2896 return T(Ice::IceType_v8i16);
2897}
2898
Ben Clayton713b8d32019-12-17 20:37:56 +00002899RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002900{
2901 RValue<Int> res = val;
2902 val += 1;
2903 return res;
2904}
2905
Ben Clayton713b8d32019-12-17 20:37:56 +00002906const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002907{
2908 val += 1;
2909 return val;
2910}
2911
Ben Clayton713b8d32019-12-17 20:37:56 +00002912RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002913{
2914 RValue<Int> res = val;
2915 val -= 1;
2916 return res;
2917}
2918
Ben Clayton713b8d32019-12-17 20:37:56 +00002919const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002920{
2921 val -= 1;
2922 return val;
2923}
2924
2925RValue<Int> RoundInt(RValue<Float> cast)
2926{
2927 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002928 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002929 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2930 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002931 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002932 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002933 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002934 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002935 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002936 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2937 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2938 nearbyint->addArg(cast.value);
2939 ::basicBlock->appendInst(nearbyint);
2940
2941 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002942 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002943}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002944
Nicolas Capens157ba262019-12-10 17:49:14 -05002945Type *Int::getType()
2946{
2947 return T(Ice::IceType_i32);
2948}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002949
Nicolas Capens157ba262019-12-10 17:49:14 -05002950Type *Long::getType()
2951{
2952 return T(Ice::IceType_i64);
2953}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002954
Nicolas Capens157ba262019-12-10 17:49:14 -05002955UInt::UInt(RValue<Float> cast)
2956{
2957 // Smallest positive value representable in UInt, but not in Int
2958 const unsigned int ustart = 0x80000000u;
2959 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002960
Nicolas Capens157ba262019-12-10 17:49:14 -05002961 // If the value is negative, store 0, otherwise store the result of the conversion
2962 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00002963 // Check if the value can be represented as an Int
2964 IfThenElse(cast >= ustartf,
2965 // If the value is too large, subtract ustart and re-add it after conversion.
2966 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2967 // Otherwise, just convert normally
2968 Int(cast)))
2969 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05002970}
Nicolas Capensa8086512016-11-07 17:32:17 -05002971
Ben Clayton713b8d32019-12-17 20:37:56 +00002972RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002973{
2974 RValue<UInt> res = val;
2975 val += 1;
2976 return res;
2977}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002978
Ben Clayton713b8d32019-12-17 20:37:56 +00002979const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002980{
2981 val += 1;
2982 return val;
2983}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002984
Ben Clayton713b8d32019-12-17 20:37:56 +00002985RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002986{
2987 RValue<UInt> res = val;
2988 val -= 1;
2989 return res;
2990}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002991
Ben Clayton713b8d32019-12-17 20:37:56 +00002992const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002993{
2994 val -= 1;
2995 return val;
2996}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002997
Nicolas Capens598f8d82016-09-26 15:09:10 -04002998// RValue<UInt> RoundUInt(RValue<Float> cast)
2999// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003000// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003001// }
3002
Nicolas Capens157ba262019-12-10 17:49:14 -05003003Type *UInt::getType()
3004{
3005 return T(Ice::IceType_i32);
3006}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003007
3008// Int2::Int2(RValue<Int> cast)
3009// {
3010// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
3011// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
3012//
3013// Constant *shuffle[2];
3014// shuffle[0] = Nucleus::createConstantInt(0);
3015// shuffle[1] = Nucleus::createConstantInt(0);
3016//
3017// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
3018//
3019// storeValue(replicate);
3020// }
3021
Nicolas Capens157ba262019-12-10 17:49:14 -05003022RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3023{
3024 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003025 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003026 Int2 result;
3027 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3028 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003029
Nicolas Capens157ba262019-12-10 17:49:14 -05003030 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003031 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003032 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003033 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003034 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003035 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003036}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003037
Nicolas Capens157ba262019-12-10 17:49:14 -05003038RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3039{
3040 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003041 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003042 Int2 result;
3043 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3044 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3045
3046 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003047 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003048 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003049 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003050 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003051 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003052}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003053
Nicolas Capens157ba262019-12-10 17:49:14 -05003054Type *Int2::getType()
3055{
3056 return T(Type_v2i32);
3057}
3058
3059RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3060{
3061 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003062 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003063 UInt2 result;
3064 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3065 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003066
Nicolas Capens157ba262019-12-10 17:49:14 -05003067 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003068 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003069 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003070 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003071 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003072 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003073}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003074
Nicolas Capens157ba262019-12-10 17:49:14 -05003075RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3076{
3077 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003078 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003079 UInt2 result;
3080 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3081 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003082
Nicolas Capens157ba262019-12-10 17:49:14 -05003083 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003084 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003085 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003086 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003087 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003088 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003089}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003090
Nicolas Capens157ba262019-12-10 17:49:14 -05003091Type *UInt2::getType()
3092{
3093 return T(Type_v2i32);
3094}
3095
Ben Clayton713b8d32019-12-17 20:37:56 +00003096Int4::Int4(RValue<Byte4> cast)
3097 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003098{
3099 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3100 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3101
3102 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003103 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003104 Value *b = Nucleus::createBitCast(a, Byte16::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003105 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003106
Ben Clayton713b8d32019-12-17 20:37:56 +00003107 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003108 Value *d = Nucleus::createBitCast(c, Short8::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003109 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003110
3111 Value *f = Nucleus::createBitCast(e, Int4::getType());
3112 storeValue(f);
3113}
3114
Ben Clayton713b8d32019-12-17 20:37:56 +00003115Int4::Int4(RValue<SByte4> cast)
3116 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003117{
3118 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3119 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3120
Ben Clayton713b8d32019-12-17 20:37:56 +00003121 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003122 Value *b = Nucleus::createBitCast(a, Byte16::getType());
3123 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3124
Ben Clayton713b8d32019-12-17 20:37:56 +00003125 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003126 Value *d = Nucleus::createBitCast(c, Short8::getType());
3127 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3128
3129 *this = As<Int4>(e) >> 24;
3130}
3131
Ben Clayton713b8d32019-12-17 20:37:56 +00003132Int4::Int4(RValue<Short4> cast)
3133 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003134{
Ben Clayton713b8d32019-12-17 20:37:56 +00003135 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003136 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3137
3138 *this = As<Int4>(c) >> 16;
3139}
3140
Ben Clayton713b8d32019-12-17 20:37:56 +00003141Int4::Int4(RValue<UShort4> cast)
3142 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003143{
Ben Clayton713b8d32019-12-17 20:37:56 +00003144 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003145 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
3146 Value *d = Nucleus::createBitCast(c, Int4::getType());
3147 storeValue(d);
3148}
3149
Ben Clayton713b8d32019-12-17 20:37:56 +00003150Int4::Int4(RValue<Int> rhs)
3151 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003152{
3153 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
3154
Ben Clayton713b8d32019-12-17 20:37:56 +00003155 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003156 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3157
3158 storeValue(replicate);
3159}
3160
3161RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3162{
3163 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003164 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003165 Int4 result;
3166 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3167 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3168 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3169 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003170
Nicolas Capens157ba262019-12-10 17:49:14 -05003171 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003172 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003173 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003174 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003175 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003176 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003177}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003178
Nicolas Capens157ba262019-12-10 17:49:14 -05003179RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3180{
3181 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003182 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003183 Int4 result;
3184 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3185 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3186 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3187 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003188
Nicolas Capens157ba262019-12-10 17:49:14 -05003189 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003190 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003191 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003192 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003193 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003194 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003195}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003196
Nicolas Capens157ba262019-12-10 17:49:14 -05003197RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3198{
3199 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
3200}
3201
3202RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3203{
3204 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
3205}
3206
3207RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3208{
3209 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
3210}
3211
3212RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3213{
3214 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
3215}
3216
3217RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3218{
3219 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
3220}
3221
3222RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3223{
3224 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
3225}
3226
3227RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3228{
3229 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3230 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3231 ::basicBlock->appendInst(cmp);
3232
3233 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3234 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3235 ::basicBlock->appendInst(select);
3236
3237 return RValue<Int4>(V(result));
3238}
3239
3240RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3241{
3242 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3243 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3244 ::basicBlock->appendInst(cmp);
3245
3246 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3247 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3248 ::basicBlock->appendInst(select);
3249
3250 return RValue<Int4>(V(result));
3251}
3252
3253RValue<Int4> RoundInt(RValue<Float4> cast)
3254{
3255 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003256 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003257 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3258 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003259 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003260 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003261 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003262 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003263 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003264 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3265 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3266 nearbyint->addArg(cast.value);
3267 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003268
3269 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003270 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003271}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003272
Nicolas Capens157ba262019-12-10 17:49:14 -05003273RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3274{
3275 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003276 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003277 Short8 result;
3278 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3279 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3280 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3281 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3282 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3283 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3284 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3285 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003286
Nicolas Capens157ba262019-12-10 17:49:14 -05003287 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003288 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003289 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003290 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003291 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003292 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003293 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3294 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3295 pack->addArg(x.value);
3296 pack->addArg(y.value);
3297 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003298
Nicolas Capens157ba262019-12-10 17:49:14 -05003299 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003300 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003301}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003302
Nicolas Capens157ba262019-12-10 17:49:14 -05003303RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3304{
3305 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003306 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003307 RValue<Int4> sx = As<Int4>(x);
3308 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003309
Nicolas Capens157ba262019-12-10 17:49:14 -05003310 RValue<Int4> sy = As<Int4>(y);
3311 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003312
Nicolas Capens157ba262019-12-10 17:49:14 -05003313 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003314 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003315 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003316 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003317 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003318 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003319 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3320 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3321 pack->addArg(x.value);
3322 pack->addArg(y.value);
3323 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003324
Nicolas Capens157ba262019-12-10 17:49:14 -05003325 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003326 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003327}
Nicolas Capens33438a62017-09-27 11:47:35 -04003328
Nicolas Capens157ba262019-12-10 17:49:14 -05003329RValue<Int> SignMask(RValue<Int4> x)
3330{
3331 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003332 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003333 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3334 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003335 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003336 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003337 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003338 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003339 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003340 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3341 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3342 movmsk->addArg(x.value);
3343 ::basicBlock->appendInst(movmsk);
3344
3345 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003346 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003347}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003348
Nicolas Capens157ba262019-12-10 17:49:14 -05003349Type *Int4::getType()
3350{
3351 return T(Ice::IceType_v4i32);
3352}
3353
Ben Clayton713b8d32019-12-17 20:37:56 +00003354UInt4::UInt4(RValue<Float4> cast)
3355 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003356{
3357 // Smallest positive value representable in UInt, but not in Int
3358 const unsigned int ustart = 0x80000000u;
3359 const float ustartf = float(ustart);
3360
3361 // Check if the value can be represented as an Int
3362 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3363 // If the value is too large, subtract ustart and re-add it after conversion.
3364 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003365 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003366 (~uiValue & Int4(cast));
3367 // If the value is negative, store 0, otherwise store the result of the conversion
3368 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3369}
3370
Ben Clayton713b8d32019-12-17 20:37:56 +00003371UInt4::UInt4(RValue<UInt> rhs)
3372 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003373{
3374 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3375
Ben Clayton713b8d32019-12-17 20:37:56 +00003376 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003377 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3378
3379 storeValue(replicate);
3380}
3381
3382RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3383{
3384 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003385 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003386 UInt4 result;
3387 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3388 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3389 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3390 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003391
Nicolas Capens157ba262019-12-10 17:49:14 -05003392 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003393 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003394 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003395 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003396 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003397 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003398}
Ben Clayton88816fa2019-05-15 17:08:14 +01003399
Nicolas Capens157ba262019-12-10 17:49:14 -05003400RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3401{
3402 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003403 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003404 UInt4 result;
3405 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3406 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3407 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3408 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003409
Nicolas Capens157ba262019-12-10 17:49:14 -05003410 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003411 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003412 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003413 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003414 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003415 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003416}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003417
Nicolas Capens157ba262019-12-10 17:49:14 -05003418RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3419{
3420 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3421}
3422
3423RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3424{
3425 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3426}
3427
3428RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3429{
3430 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3431}
3432
3433RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3434{
3435 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3436}
3437
3438RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3439{
3440 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3441}
3442
3443RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3444{
3445 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3446}
3447
3448RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3449{
3450 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3451 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3452 ::basicBlock->appendInst(cmp);
3453
3454 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3455 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3456 ::basicBlock->appendInst(select);
3457
3458 return RValue<UInt4>(V(result));
3459}
3460
3461RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3462{
3463 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3464 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3465 ::basicBlock->appendInst(cmp);
3466
3467 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3468 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3469 ::basicBlock->appendInst(select);
3470
3471 return RValue<UInt4>(V(result));
3472}
3473
3474Type *UInt4::getType()
3475{
3476 return T(Ice::IceType_v4i32);
3477}
3478
3479Type *Half::getType()
3480{
3481 return T(Ice::IceType_i16);
3482}
3483
3484RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3485{
3486 return 1.0f / x;
3487}
3488
3489RValue<Float> RcpSqrt_pp(RValue<Float> x)
3490{
3491 return Rcp_pp(Sqrt(x));
3492}
3493
3494RValue<Float> Sqrt(RValue<Float> x)
3495{
3496 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003497 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003498 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3499 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3500 sqrt->addArg(x.value);
3501 ::basicBlock->appendInst(sqrt);
3502
3503 return RValue<Float>(V(result));
3504}
3505
3506RValue<Float> Round(RValue<Float> x)
3507{
3508 return Float4(Round(Float4(x))).x;
3509}
3510
3511RValue<Float> Trunc(RValue<Float> x)
3512{
3513 return Float4(Trunc(Float4(x))).x;
3514}
3515
3516RValue<Float> Frac(RValue<Float> x)
3517{
3518 return Float4(Frac(Float4(x))).x;
3519}
3520
3521RValue<Float> Floor(RValue<Float> x)
3522{
3523 return Float4(Floor(Float4(x))).x;
3524}
3525
3526RValue<Float> Ceil(RValue<Float> x)
3527{
3528 return Float4(Ceil(Float4(x))).x;
3529}
3530
3531Type *Float::getType()
3532{
3533 return T(Ice::IceType_f32);
3534}
3535
3536Type *Float2::getType()
3537{
3538 return T(Type_v2f32);
3539}
3540
Ben Clayton713b8d32019-12-17 20:37:56 +00003541Float4::Float4(RValue<Float> rhs)
3542 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003543{
3544 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3545
Ben Clayton713b8d32019-12-17 20:37:56 +00003546 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003547 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3548
3549 storeValue(replicate);
3550}
3551
3552RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3553{
3554 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3555 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3556 ::basicBlock->appendInst(cmp);
3557
3558 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3559 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3560 ::basicBlock->appendInst(select);
3561
3562 return RValue<Float4>(V(result));
3563}
3564
3565RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3566{
3567 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3568 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3569 ::basicBlock->appendInst(cmp);
3570
3571 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3572 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3573 ::basicBlock->appendInst(select);
3574
3575 return RValue<Float4>(V(result));
3576}
3577
3578RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3579{
3580 return Float4(1.0f) / x;
3581}
3582
3583RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3584{
3585 return Rcp_pp(Sqrt(x));
3586}
3587
3588RValue<Float4> Sqrt(RValue<Float4> x)
3589{
3590 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003591 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003592 Float4 result;
3593 result.x = Sqrt(Float(Float4(x).x));
3594 result.y = Sqrt(Float(Float4(x).y));
3595 result.z = Sqrt(Float(Float4(x).z));
3596 result.w = Sqrt(Float(Float4(x).w));
3597
3598 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003599 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003600 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003601 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003602 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003603 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003604 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3605 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3606 sqrt->addArg(x.value);
3607 ::basicBlock->appendInst(sqrt);
3608
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003609 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003610 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003611}
Nicolas Capens157ba262019-12-10 17:49:14 -05003612
3613RValue<Int> SignMask(RValue<Float4> x)
3614{
3615 if(emulateIntrinsics || CPUID::ARM)
3616 {
3617 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3618 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3619 }
3620 else
3621 {
3622 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003623 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003624 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3625 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3626 movmsk->addArg(x.value);
3627 ::basicBlock->appendInst(movmsk);
3628
3629 return RValue<Int>(V(result));
3630 }
3631}
3632
3633RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3634{
3635 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3636}
3637
3638RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3639{
3640 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3641}
3642
3643RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3644{
3645 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3646}
3647
3648RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3649{
3650 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3651}
3652
3653RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3654{
3655 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3656}
3657
3658RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3659{
3660 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
3661}
3662
3663RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3664{
3665 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3666}
3667
3668RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3669{
3670 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3671}
3672
3673RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3674{
3675 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3676}
3677
3678RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3679{
3680 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3681}
3682
3683RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3684{
3685 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3686}
3687
3688RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3689{
3690 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3691}
3692
3693RValue<Float4> Round(RValue<Float4> x)
3694{
3695 if(emulateIntrinsics || CPUID::ARM)
3696 {
3697 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3698 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
3699 }
3700 else if(CPUID::SSE4_1)
3701 {
3702 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003703 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003704 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3705 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3706 round->addArg(x.value);
3707 round->addArg(::context->getConstantInt32(0));
3708 ::basicBlock->appendInst(round);
3709
3710 return RValue<Float4>(V(result));
3711 }
3712 else
3713 {
3714 return Float4(RoundInt(x));
3715 }
3716}
3717
3718RValue<Float4> Trunc(RValue<Float4> x)
3719{
3720 if(CPUID::SSE4_1)
3721 {
3722 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003723 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003724 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3725 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3726 round->addArg(x.value);
3727 round->addArg(::context->getConstantInt32(3));
3728 ::basicBlock->appendInst(round);
3729
3730 return RValue<Float4>(V(result));
3731 }
3732 else
3733 {
3734 return Float4(Int4(x));
3735 }
3736}
3737
3738RValue<Float4> Frac(RValue<Float4> x)
3739{
3740 Float4 frc;
3741
3742 if(CPUID::SSE4_1)
3743 {
3744 frc = x - Floor(x);
3745 }
3746 else
3747 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003748 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05003749
Ben Clayton713b8d32019-12-17 20:37:56 +00003750 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05003751 }
3752
3753 // x - floor(x) can be 1.0 for very small negative x.
3754 // Clamp against the value just below 1.0.
3755 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
3756}
3757
3758RValue<Float4> Floor(RValue<Float4> x)
3759{
3760 if(CPUID::SSE4_1)
3761 {
3762 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003763 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003764 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3765 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3766 round->addArg(x.value);
3767 round->addArg(::context->getConstantInt32(1));
3768 ::basicBlock->appendInst(round);
3769
3770 return RValue<Float4>(V(result));
3771 }
3772 else
3773 {
3774 return x - Frac(x);
3775 }
3776}
3777
3778RValue<Float4> Ceil(RValue<Float4> x)
3779{
3780 if(CPUID::SSE4_1)
3781 {
3782 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003783 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003784 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3785 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3786 round->addArg(x.value);
3787 round->addArg(::context->getConstantInt32(2));
3788 ::basicBlock->appendInst(round);
3789
3790 return RValue<Float4>(V(result));
3791 }
3792 else
3793 {
3794 return -Floor(-x);
3795 }
3796}
3797
3798Type *Float4::getType()
3799{
3800 return T(Ice::IceType_v4f32);
3801}
3802
3803RValue<Long> Ticks()
3804{
Ben Claytonce54c592020-02-07 11:30:51 +00003805 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05003806 return Long(Int(0));
3807}
3808
Ben Clayton713b8d32019-12-17 20:37:56 +00003809RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05003810{
Antonio Maiorano02a39532020-01-21 15:15:34 -05003811 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05003812}
3813
Ben Clayton713b8d32019-12-17 20:37:56 +00003814RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05003815{
Antonio Maiorano02a39532020-01-21 15:15:34 -05003816 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05003817}
3818
Ben Clayton713b8d32019-12-17 20:37:56 +00003819Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05003820{
3821 Ice::Variable *ret = nullptr;
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003822 if(retTy != nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05003823 {
3824 ret = ::function->makeVariable(T(retTy));
3825 }
3826 auto call = Ice::InstCall::create(::function, args.size(), ret, V(fptr.value), false);
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003827 for(auto arg : args)
Nicolas Capens157ba262019-12-10 17:49:14 -05003828 {
3829 call->addArg(V(arg));
3830 }
3831 ::basicBlock->appendInst(call);
3832 return V(ret);
3833}
3834
3835void Breakpoint()
3836{
Ben Clayton713b8d32019-12-17 20:37:56 +00003837 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003838 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3839 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3840 ::basicBlock->appendInst(trap);
3841}
3842
Ben Clayton713b8d32019-12-17 20:37:56 +00003843void Nucleus::createFence(std::memory_order memoryOrder)
3844{
Antonio Maiorano370cba52019-12-31 11:36:07 -05003845 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3846 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3847 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3848 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
3849 inst->addArg(order);
3850 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00003851}
Antonio Maiorano370cba52019-12-31 11:36:07 -05003852
Ben Clayton713b8d32019-12-17 20:37:56 +00003853Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
3854{
Ben Claytonce54c592020-02-07 11:30:51 +00003855 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00003856 return nullptr;
3857}
3858void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
3859{
Ben Claytonce54c592020-02-07 11:30:51 +00003860 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00003861}
Nicolas Capens157ba262019-12-10 17:49:14 -05003862
3863RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3864{
3865 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3866}
3867
3868RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3869{
3870 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3871}
3872
3873void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3874{
3875 return emulated::Scatter(base, val, offsets, mask, alignment);
3876}
3877
3878void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3879{
3880 return emulated::Scatter(base, val, offsets, mask, alignment);
3881}
3882
3883RValue<Float> Exp2(RValue<Float> x)
3884{
3885 return emulated::Exp2(x);
3886}
3887
3888RValue<Float> Log2(RValue<Float> x)
3889{
3890 return emulated::Log2(x);
3891}
3892
3893RValue<Float4> Sin(RValue<Float4> x)
3894{
3895 return emulated::Sin(x);
3896}
3897
3898RValue<Float4> Cos(RValue<Float4> x)
3899{
3900 return emulated::Cos(x);
3901}
3902
3903RValue<Float4> Tan(RValue<Float4> x)
3904{
3905 return emulated::Tan(x);
3906}
3907
3908RValue<Float4> Asin(RValue<Float4> x)
3909{
3910 return emulated::Asin(x);
3911}
3912
3913RValue<Float4> Acos(RValue<Float4> x)
3914{
3915 return emulated::Acos(x);
3916}
3917
3918RValue<Float4> Atan(RValue<Float4> x)
3919{
3920 return emulated::Atan(x);
3921}
3922
3923RValue<Float4> Sinh(RValue<Float4> x)
3924{
3925 return emulated::Sinh(x);
3926}
3927
3928RValue<Float4> Cosh(RValue<Float4> x)
3929{
3930 return emulated::Cosh(x);
3931}
3932
3933RValue<Float4> Tanh(RValue<Float4> x)
3934{
3935 return emulated::Tanh(x);
3936}
3937
3938RValue<Float4> Asinh(RValue<Float4> x)
3939{
3940 return emulated::Asinh(x);
3941}
3942
3943RValue<Float4> Acosh(RValue<Float4> x)
3944{
3945 return emulated::Acosh(x);
3946}
3947
3948RValue<Float4> Atanh(RValue<Float4> x)
3949{
3950 return emulated::Atanh(x);
3951}
3952
3953RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3954{
3955 return emulated::Atan2(x, y);
3956}
3957
3958RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3959{
3960 return emulated::Pow(x, y);
3961}
3962
3963RValue<Float4> Exp(RValue<Float4> x)
3964{
3965 return emulated::Exp(x);
3966}
3967
3968RValue<Float4> Log(RValue<Float4> x)
3969{
3970 return emulated::Log(x);
3971}
3972
3973RValue<Float4> Exp2(RValue<Float4> x)
3974{
3975 return emulated::Exp2(x);
3976}
3977
3978RValue<Float4> Log2(RValue<Float4> x)
3979{
3980 return emulated::Log2(x);
3981}
3982
3983RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
3984{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003985 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003986 {
Ben Claytonce54c592020-02-07 11:30:51 +00003987 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00003988 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003989 }
3990 else
3991 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003992 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05003993 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3994 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3995 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3996 ctlz->addArg(x.value);
3997 ::basicBlock->appendInst(ctlz);
3998
3999 return RValue<UInt>(V(result));
4000 }
4001}
4002
4003RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4004{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004005 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004006 {
Ben Claytonce54c592020-02-07 11:30:51 +00004007 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004008 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004009 }
4010 else
4011 {
4012 // TODO: implement vectorized version in Subzero
4013 UInt4 result;
4014 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4015 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4016 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4017 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4018 return result;
4019 }
4020}
4021
4022RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4023{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004024 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004025 {
Ben Claytonce54c592020-02-07 11:30:51 +00004026 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004027 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004028 }
4029 else
4030 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004031 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004032 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4033 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4034 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4035 ctlz->addArg(x.value);
4036 ::basicBlock->appendInst(ctlz);
4037
4038 return RValue<UInt>(V(result));
4039 }
4040}
4041
4042RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4043{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004044 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004045 {
Ben Claytonce54c592020-02-07 11:30:51 +00004046 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004047 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004048 }
4049 else
4050 {
4051 // TODO: implement vectorized version in Subzero
4052 UInt4 result;
4053 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4054 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4055 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4056 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4057 return result;
4058 }
4059}
4060
Antonio Maiorano370cba52019-12-31 11:36:07 -05004061RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4062{
4063 return emulated::MinAtomic(x, y, memoryOrder);
4064}
4065
4066RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4067{
4068 return emulated::MinAtomic(x, y, memoryOrder);
4069}
4070
4071RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4072{
4073 return emulated::MaxAtomic(x, y, memoryOrder);
4074}
4075
4076RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4077{
4078 return emulated::MaxAtomic(x, y, memoryOrder);
4079}
4080
Nicolas Capens157ba262019-12-10 17:49:14 -05004081void EmitDebugLocation() {}
Ben Clayton713b8d32019-12-17 20:37:56 +00004082void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004083void FlushDebug() {}
4084
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004085namespace {
4086namespace coro {
4087
4088using FiberHandle = void *;
4089
4090// Instance data per generated coroutine
4091// This is the "handle" type used for Coroutine functions
4092// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4093struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004094{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004095 FiberHandle mainFiber{};
4096 FiberHandle routineFiber{};
4097 bool convertedFiber = false;
4098
4099 // Variables used by coroutines
4100 bool done = false;
4101 void *promisePtr = nullptr;
4102};
4103
4104CoroutineData *createCoroutineData()
4105{
4106 return new CoroutineData{};
4107}
4108
4109void destroyCoroutineData(CoroutineData *coroData)
4110{
4111 delete coroData;
4112}
4113
4114void convertThreadToMainFiber(Nucleus::CoroutineHandle handle)
4115{
4116#if defined(_WIN32)
4117 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4118
4119 coroData->mainFiber = ::ConvertThreadToFiber(nullptr);
4120
4121 if(coroData->mainFiber)
4122 {
4123 coroData->convertedFiber = true;
4124 }
4125 else
4126 {
4127 // We're probably already on a fiber, so just grab it and remember that we didn't
4128 // convert it, so not to convert back to thread.
4129 coroData->mainFiber = GetCurrentFiber();
4130 coroData->convertedFiber = false;
4131 }
4132 ASSERT(coroData->mainFiber);
4133#else
Ben Claytonce54c592020-02-07 11:30:51 +00004134 UNIMPLEMENTED_NO_BUG("convertThreadToMainFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004135#endif
4136}
4137
4138void convertMainFiberToThread(Nucleus::CoroutineHandle handle)
4139{
4140#if defined(_WIN32)
4141 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4142
4143 ASSERT(coroData->mainFiber);
4144
4145 if(coroData->convertedFiber)
4146 {
4147 ::ConvertFiberToThread();
4148 coroData->mainFiber = nullptr;
4149 }
4150#else
Ben Claytonce54c592020-02-07 11:30:51 +00004151 UNIMPLEMENTED_NO_BUG("convertMainFiberToThread not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004152#endif
4153}
4154using FiberFunc = std::function<void()>;
4155
4156void createRoutineFiber(Nucleus::CoroutineHandle handle, FiberFunc *fiberFunc)
4157{
4158#if defined(_WIN32)
4159 struct Invoker
4160 {
4161 FiberFunc func;
4162
4163 static VOID __stdcall fiberEntry(LPVOID lpParameter)
4164 {
4165 auto *func = reinterpret_cast<FiberFunc *>(lpParameter);
4166 (*func)();
4167 }
4168 };
4169
4170 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4171
4172 constexpr SIZE_T StackSize = 2 * 1024 * 1024;
4173 coroData->routineFiber = ::CreateFiber(StackSize, &Invoker::fiberEntry, fiberFunc);
4174 ASSERT(coroData->routineFiber);
4175#else
Ben Claytonce54c592020-02-07 11:30:51 +00004176 UNIMPLEMENTED_NO_BUG("createRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004177#endif
4178}
4179
4180void deleteRoutineFiber(Nucleus::CoroutineHandle handle)
4181{
4182#if defined(_WIN32)
4183 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4184 ASSERT(coroData->routineFiber);
4185 ::DeleteFiber(coroData->routineFiber);
4186 coroData->routineFiber = nullptr;
4187#else
Ben Claytonce54c592020-02-07 11:30:51 +00004188 UNIMPLEMENTED_NO_BUG("deleteRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004189#endif
4190}
4191
4192void switchToMainFiber(Nucleus::CoroutineHandle handle)
4193{
4194#if defined(_WIN32)
4195 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4196
4197 // Win32
4198 ASSERT(coroData->mainFiber);
4199 ::SwitchToFiber(coroData->mainFiber);
4200#else
Ben Claytonce54c592020-02-07 11:30:51 +00004201 UNIMPLEMENTED_NO_BUG("switchToMainFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004202#endif
4203}
4204
4205void switchToRoutineFiber(Nucleus::CoroutineHandle handle)
4206{
4207#if defined(_WIN32)
4208 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4209
4210 // Win32
4211 ASSERT(coroData->routineFiber);
4212 ::SwitchToFiber(coroData->routineFiber);
4213#else
Ben Claytonce54c592020-02-07 11:30:51 +00004214 UNIMPLEMENTED_NO_BUG("switchToRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004215#endif
4216}
4217
4218namespace detail {
4219thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4220} // namespace detail
4221
4222void setHandleParam(Nucleus::CoroutineHandle handle)
4223{
4224 ASSERT(!detail::coroHandle);
4225 detail::coroHandle = handle;
4226}
4227
4228Nucleus::CoroutineHandle getHandleParam()
4229{
4230 ASSERT(detail::coroHandle);
4231 auto handle = detail::coroHandle;
4232 detail::coroHandle = {};
4233 return handle;
4234}
4235
4236void setDone(Nucleus::CoroutineHandle handle)
4237{
4238 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4239 ASSERT(!coroData->done); // Should be called once
4240 coroData->done = true;
4241}
4242
4243bool isDone(Nucleus::CoroutineHandle handle)
4244{
4245 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4246 return coroData->done;
4247}
4248
4249void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4250{
4251 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4252 coroData->promisePtr = promisePtr;
4253}
4254
4255void *getPromisePtr(Nucleus::CoroutineHandle handle)
4256{
4257 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4258 return coroData->promisePtr;
4259}
4260
4261} // namespace coro
4262} // namespace
4263
4264// Used to generate coroutines.
4265// Lifetime: from yield to acquireCoroutine
4266class CoroutineGenerator
4267{
4268public:
4269 CoroutineGenerator()
4270 {
4271 }
4272
4273 // Inserts instructions at the top of the current function to make it a coroutine.
4274 void generateCoroutineBegin()
4275 {
4276 // Begin building the main coroutine_begin() function.
4277 // We insert these instructions at the top of the entry node,
4278 // before existing reactor-generated instructions.
4279
4280 // CoroutineHandle coroutine_begin(<Arguments>)
4281 // {
4282 // this->handle = coro::getHandleParam();
4283 //
4284 // YieldType promise;
4285 // coro::setPromisePtr(handle, &promise); // For await
4286 //
4287 // ... <REACTOR CODE> ...
4288 //
4289
4290 // Save original entry block and current block, and create a new entry block and make it current.
4291 // This new block will be used to inject code above the begin routine's existing code. We make
4292 // this block branch to the original entry block as the last instruction.
4293 auto origEntryBB = ::function->getEntryNode();
4294 auto origCurrBB = ::basicBlock;
4295 auto newBB = ::function->makeNode();
4296 sz::replaceEntryNode(::function, newBB);
4297 ::basicBlock = newBB;
4298
4299 // this->handle = coro::getHandleParam();
4300 this->handle = sz::Call(::function, ::basicBlock, coro::getHandleParam);
4301
4302 // YieldType promise;
4303 // coro::setPromisePtr(handle, &promise); // For await
4304 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
4305 sz::Call(::function, ::basicBlock, coro::setPromisePtr, this->handle, this->promise);
4306
4307 // Branch to original entry block
4308 auto br = Ice::InstBr::create(::function, origEntryBB);
4309 ::basicBlock->appendInst(br);
4310
4311 // Restore current block for future instructions
4312 ::basicBlock = origCurrBB;
4313 }
4314
4315 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4316 void generateYield(Value *val)
4317 {
4318 // ... <REACTOR CODE> ...
4319 //
4320 // promise = val;
4321 // coro::switchToMainFiber(handle);
4322 //
4323 // ... <REACTOR CODE> ...
4324
4325 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
4326 sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
4327 }
4328
4329 // Adds instructions at the end of the current main coroutine function to end the coroutine.
4330 void generateCoroutineEnd()
4331 {
4332 // ... <REACTOR CODE> ...
4333 //
4334 // coro::setDone(handle);
4335 // coro::switchToMainFiber();
4336 // // Unreachable
4337 // }
4338 //
4339
4340 sz::Call(::function, ::basicBlock, coro::setDone, this->handle);
4341
4342 // A Win32 Fiber function must not end, otherwise it tears down the thread it's running on.
4343 // So we add code to switch back to the main thread.
4344 sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
4345 }
4346
4347 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4348
4349 // Generates the await function for the current coroutine.
4350 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4351 static FunctionUniquePtr generateAwaitFunction()
4352 {
4353 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4354 // {
4355 // if (coro::isDone())
4356 // {
4357 // return false;
4358 // }
4359 // else // resume
4360 // {
4361 // YieldType* promise = coro::getPromisePtr(handle);
4362 // *out = *promise;
4363 // coro::switchToRoutineFiber(handle);
4364 // return true;
4365 // }
4366 // }
4367
4368 // Subzero doesn't support bool types (IceType_i1) as return type
4369 const Ice::Type ReturnType = Ice::IceType_i32;
4370 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4371 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4372
4373 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4374 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4375
4376 Ice::Variable *handle = awaitFunc->getArgs()[0];
4377 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4378
4379 auto doneBlock = awaitFunc->makeNode();
4380 {
4381 // return false;
4382 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4383 doneBlock->appendInst(ret);
4384 }
4385
4386 auto resumeBlock = awaitFunc->makeNode();
4387 {
4388 // YieldType* promise = coro::getPromisePtr(handle);
4389 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4390
4391 // *out = *promise;
4392 // Load promise value
4393 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4394 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4395 resumeBlock->appendInst(load);
4396 // Then store it in output param
4397 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4398 resumeBlock->appendInst(store);
4399
4400 // coro::switchToRoutineFiber(handle);
4401 sz::Call(awaitFunc, resumeBlock, coro::switchToRoutineFiber, handle);
4402
4403 // return true;
4404 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4405 resumeBlock->appendInst(ret);
4406 }
4407
4408 // if (coro::isDone())
4409 // {
4410 // <doneBlock>
4411 // }
4412 // else // resume
4413 // {
4414 // <resumeBlock>
4415 // }
4416 Ice::CfgNode *bb = awaitFunc->getEntryNode();
4417 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone);
4418 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4419 bb->appendInst(br);
4420
4421 return FunctionUniquePtr{ awaitFunc };
4422 }
4423
4424 // Generates the destroy function for the current coroutine.
4425 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4426 static FunctionUniquePtr generateDestroyFunction()
4427 {
4428 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4429 // {
4430 // coro::convertMainFiberToThread(coroData);
4431 // coro::deleteRoutineFiber(handle);
4432 // coro::destroyCoroutineData(handle);
4433 // return;
4434 // }
4435
4436 const Ice::Type ReturnType = Ice::IceType_void;
4437 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4438
4439 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4440 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4441
4442 Ice::Variable *handle = destroyFunc->getArgs()[0];
4443
4444 auto *bb = destroyFunc->getEntryNode();
4445
4446 // coro::convertMainFiberToThread(coroData);
4447 sz::Call(destroyFunc, bb, coro::convertMainFiberToThread, handle);
4448
4449 // coro::deleteRoutineFiber(handle);
4450 sz::Call(destroyFunc, bb, coro::deleteRoutineFiber, handle);
4451
4452 // coro::destroyCoroutineData(handle);
4453 sz::Call(destroyFunc, bb, coro::destroyCoroutineData, handle);
4454
4455 // return;
4456 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4457 bb->appendInst(ret);
4458
4459 return FunctionUniquePtr{ destroyFunc };
4460 }
4461
4462private:
4463 Ice::Variable *handle{};
4464 Ice::Variable *promise{};
4465};
4466
4467static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4468{
4469 // This doubles up as our coroutine handle
4470 auto coroData = coro::createCoroutineData();
4471
4472 // Convert current thread to a fiber so we can create new fibers and switch to them
4473 coro::convertThreadToMainFiber(coroData);
4474
4475 coro::FiberFunc fiberFunc = [&]() {
4476 // Store handle in TLS so that the coroutine can grab it right away, before
4477 // any fiber switch occurs.
4478 coro::setHandleParam(coroData);
4479
4480 // Invoke the begin function in the context of the routine fiber
4481 beginFunc();
4482
4483 // Either it yielded, or finished. In either case, we switch back to the main fiber.
4484 // We don't ever return from this function, or the current thread will be destroyed.
4485 coro::switchToMainFiber(coroData);
4486 };
4487
4488 coro::createRoutineFiber(coroData, &fiberFunc);
4489
4490 // Fiber will now start running, executing the saved beginFunc
4491 coro::switchToRoutineFiber(coroData);
4492
4493 return coroData;
4494}
4495
4496void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4497{
4498 // Start by creating a regular function
4499 createFunction(yieldType, params);
4500
4501 // Save in case yield() is called
4502 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4503 ::coroYieldType = yieldType;
4504}
4505
4506void Nucleus::yield(Value *val)
4507{
4508 Variable::materializeAll();
4509
4510 // On first yield, we start generating coroutine functions
4511 if(!::coroGen)
4512 {
4513 ::coroGen = std::make_shared<CoroutineGenerator>();
4514 ::coroGen->generateCoroutineBegin();
4515 }
4516
4517 ASSERT(::coroGen);
4518 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004519}
4520
Ben Clayton713b8d32019-12-17 20:37:56 +00004521static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4522{
4523 return false;
4524}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004525
4526static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4527{
4528}
Nicolas Capens157ba262019-12-10 17:49:14 -05004529
4530std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4531{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004532 if(::coroGen)
4533 {
4534 // Finish generating coroutine functions
4535 {
4536 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4537 ::coroGen->generateCoroutineEnd();
4538 createRetVoidIfNoRet();
4539 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004540
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004541 auto awaitFunc = ::coroGen->generateAwaitFunction();
4542 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004543
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004544 // At this point, we no longer need the CoroutineGenerator.
4545 ::coroGen.reset();
4546 ::coroYieldType = nullptr;
4547
4548 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4549 { name, "await", "destroy" },
4550 cfgEdit);
4551
4552 return routine;
4553 }
4554 else
4555 {
4556 {
4557 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4558 createRetVoidIfNoRet();
4559 }
4560
4561 ::coroYieldType = nullptr;
4562
4563 // Not an actual coroutine (no yields), so return stubs for await and destroy
4564 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4565
4566 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4567 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4568 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4569 return routine;
4570 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004571}
4572
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004573Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004574{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004575 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4576
4577 if(isCoroutine)
4578 {
4579 return rr::invokeCoroutineBegin(func);
4580 }
4581 else
4582 {
4583 // For regular routines, just invoke the begin func directly
4584 return func();
4585 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004586}
Nicolas Capens157ba262019-12-10 17:49:14 -05004587
4588} // namespace rr