blob: 462406473b97a5386f4ff900f270bf86de9d6038 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000017#include "Reactor.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040018
Nicolas Capens1a3ce872018-10-10 10:42:36 -040019#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000020#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040021
Nicolas Capens598f8d82016-09-26 15:09:10 -040022#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040023#include "src/IceCfgNode.h"
24#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000025#include "src/IceELFStreamer.h"
26#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040027#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040029
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031#include "llvm/Support/FileSystem.h"
32#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040033
34#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000035# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040036#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040037
Nicolas Capensbd65da92017-01-05 16:31:06 -050038#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# ifndef WIN32_LEAN_AND_MEAN
40# define WIN32_LEAN_AND_MEAN
41# endif // !WIN32_LEAN_AND_MEAN
42# ifndef NOMINMAX
43# define NOMINMAX
44# endif // !NOMINMAX
45# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050046#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040047
Nicolas Capens598f8d82016-09-26 15:09:10 -040048#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000049#include <limits>
50#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Antonio Maiorano02a39532020-01-21 15:15:34 -050052// Subzero utility functions
53// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050054namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050055namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050056void replaceEntryNode(Ice::Cfg *function, Ice::CfgNode *newEntryNode)
57{
58 ASSERT_MSG(function->getEntryNode() != nullptr, "Function should have an entry node");
59
60 if(function->getEntryNode() == newEntryNode)
61 {
62 return;
63 }
64
65 // Make this the new entry node
66 function->setEntryNode(newEntryNode);
67
68 // Reorder nodes so that new entry block comes first. This is required
69 // by Cfg::renumberInstructions, which expects the first node in the list
70 // to be the entry node.
71 {
72 auto nodes = function->getNodes();
73
74 // TODO(amaiorano): Fast path if newEntryNode is last? Can avoid linear search.
75
76 auto iter = std::find(nodes.begin(), nodes.end(), newEntryNode);
77 ASSERT_MSG(iter != nodes.end(), "New node should be in the function's node list");
78
79 nodes.erase(iter);
80 nodes.insert(nodes.begin(), newEntryNode);
81
82 // swapNodes replaces its nodes with the input one, and renumbers them,
83 // so our new entry node will be 0, and the previous will be 1.
84 function->swapNodes(nodes);
85 }
86}
87
88Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
89{
90 uint32_t sequenceNumber = 0;
91 auto function = Ice::Cfg::create(context, sequenceNumber).release();
92
93 Ice::CfgLocalAllocatorScope allocScope{ function };
94
95 for(auto type : paramTypes)
96 {
97 Ice::Variable *arg = function->makeVariable(type);
98 function->addArg(arg);
99 }
100
101 Ice::CfgNode *node = function->makeNode();
102 function->setEntryNode(node);
103
104 return function;
105}
106
107Ice::Type getPointerType(Ice::Type elementType)
108{
109 if(sizeof(void *) == 8)
110 {
111 return Ice::IceType_i64;
112 }
113 else
114 {
115 return Ice::IceType_i32;
116 }
117}
118
119Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
120{
121 int typeSize = Ice::typeWidthInBytes(type);
122 int totalSize = typeSize * (arraySize ? arraySize : 1);
123
124 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
125 auto address = function->makeVariable(getPointerType(type));
126 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
127 function->getEntryNode()->getInsts().push_front(alloca);
128
129 return address;
130}
131
132Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500133{
134 if(sizeof(void *) == 8)
135 {
136 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
137 }
138 else
139 {
140 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
141 }
142}
143
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500144// Wrapper for calls on C functions with Ice types
145template<typename Return, typename... CArgs, typename... RArgs>
146Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
147{
148 Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
149
150 // Subzero doesn't support boolean return values. Replace with an i32.
151 if(retTy == Ice::IceType_i1)
152 {
153 retTy = Ice::IceType_i32;
154 }
155
156 Ice::Variable *ret = nullptr;
157 if(retTy != Ice::IceType_void)
158 {
159 ret = function->makeVariable(retTy);
160 }
161
162 std::initializer_list<Ice::Variable *> iceArgs = { std::forward<RArgs>(args)... };
163
164 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, getConstantPointer(function->getContext(), reinterpret_cast<void const *>(fptr)), false);
165 for(auto arg : iceArgs)
166 {
167 call->addArg(arg);
168 }
169
170 basicBlock->appendInst(call);
171 return ret;
172}
173
Antonio Maiorano02a39532020-01-21 15:15:34 -0500174// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500175Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500176{
177 Ice::Variable *result = function->makeVariable(v->getType());
178 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
179 basicBlock->appendInst(cast);
180 return result;
181}
182
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500183Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500184{
185 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
186 // absolute address. We circumvent this by casting to a non-const variable, and loading
187 // from that.
188 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
189 {
190 ptr = sz::createUnconstCast(function, basicBlock, cptr);
191 }
192
193 Ice::Variable *result = function->makeVariable(type);
194 auto load = Ice::InstLoad::create(function, result, ptr, align);
195 basicBlock->appendInst(load);
196
197 return result;
198}
199
200} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500201} // namespace
202
Ben Clayton713b8d32019-12-17 20:37:56 +0000203namespace rr {
204class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500205class CoroutineGenerator;
206} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500207
208namespace {
209
210// Default configuration settings. Must be accessed under mutex lock.
211std::mutex defaultConfigLock;
212rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000213{
Nicolas Capens157ba262019-12-10 17:49:14 -0500214 // This uses a static in a function to avoid the cost of a global static
215 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
216 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000217 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500218 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000219}
220
Nicolas Capens157ba262019-12-10 17:49:14 -0500221Ice::GlobalContext *context = nullptr;
222Ice::Cfg *function = nullptr;
223Ice::CfgNode *basicBlock = nullptr;
224Ice::CfgLocalAllocatorScope *allocator = nullptr;
225rr::ELFMemoryStreamer *routine = nullptr;
226
227std::mutex codegenMutex;
228
229Ice::ELFFileStreamer *elfFile = nullptr;
230Ice::Fdstream *out = nullptr;
231
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500232// Coroutine globals
233rr::Type *coroYieldType = nullptr;
234std::shared_ptr<rr::CoroutineGenerator> coroGen;
235
Nicolas Capens157ba262019-12-10 17:49:14 -0500236} // Anonymous namespace
237
238namespace {
239
240#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000241# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500242#endif
243
Ben Clayton713b8d32019-12-17 20:37:56 +0000244#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
245# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500246#endif
247
Antonio Maiorano370cba52019-12-31 11:36:07 -0500248Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400249{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500250 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100251 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500252 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000253 case rr::Optimization::Level::None: return Ice::Opt_m1;
254 case rr::Optimization::Level::Less: return Ice::Opt_m1;
255 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500256 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
257 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100258 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500259 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400260}
261
Antonio Maiorano370cba52019-12-31 11:36:07 -0500262Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
263{
264 switch(memoryOrder)
265 {
266 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
267 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
268 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
269 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
270 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
271 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
272 }
273 return Ice::Intrinsics::MemoryOrderInvalid;
274}
275
Nicolas Capens157ba262019-12-10 17:49:14 -0500276class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500277{
Nicolas Capens157ba262019-12-10 17:49:14 -0500278public:
279 const static bool ARM;
280 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400281
Nicolas Capens157ba262019-12-10 17:49:14 -0500282private:
283 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100284 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000285#if defined(__i386__) || defined(__x86_64__)
286# if defined(_WIN32)
287 __cpuid(registers, info);
288# else
289 __asm volatile("cpuid"
290 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
291 : "a"(info));
292# endif
293#else
294 registers[0] = 0;
295 registers[1] = 0;
296 registers[2] = 0;
297 registers[3] = 0;
298#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100299 }
300
Nicolas Capens157ba262019-12-10 17:49:14 -0500301 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500302 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000303#if defined(__arm__) || defined(__aarch64__)
304 return true;
305#elif defined(__i386__) || defined(__x86_64__)
306 return false;
307#elif defined(__mips__)
308 return false;
309#else
310# error "Unknown architecture"
311#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500312 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500313
Nicolas Capens157ba262019-12-10 17:49:14 -0500314 static bool detectSSE4_1()
315 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000316#if defined(__i386__) || defined(__x86_64__)
317 int registers[4];
318 cpuid(registers, 1);
319 return (registers[2] & 0x00080000) != 0;
320#else
321 return false;
322#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500323 }
324};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500325
Nicolas Capens157ba262019-12-10 17:49:14 -0500326const bool CPUID::ARM = CPUID::detectARM();
327const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
328const bool emulateIntrinsics = false;
329const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400330
Nicolas Capens157ba262019-12-10 17:49:14 -0500331constexpr bool subzeroDumpEnabled = false;
332constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500333
334#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500335static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
336static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500337#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500338
339} // anonymous namespace
340
341namespace rr {
342
Antonio Maioranoab210f92019-12-13 16:26:24 -0500343std::string BackendName()
344{
345 return "Subzero";
346}
347
Ben Clayton713b8d32019-12-17 20:37:56 +0000348const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500349#if defined(_WIN32)
350 true, // CoroutinesSupported
351#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000352 false, // CoroutinesSupported
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500353#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500354};
355
356enum EmulatedType
357{
358 EmulatedShift = 16,
359 EmulatedV2 = 2 << EmulatedShift,
360 EmulatedV4 = 4 << EmulatedShift,
361 EmulatedV8 = 8 << EmulatedShift,
362 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
363
364 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
365 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
366 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000367 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
368 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500369 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
370};
371
Ben Clayton713b8d32019-12-17 20:37:56 +0000372class Value : public Ice::Operand
373{};
374class SwitchCases : public Ice::InstSwitch
375{};
376class BasicBlock : public Ice::CfgNode
377{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500378
379Ice::Type T(Type *t)
380{
381 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
382 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500383}
384
Nicolas Capens157ba262019-12-10 17:49:14 -0500385Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400386{
Ben Clayton713b8d32019-12-17 20:37:56 +0000387 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500388}
389
390Type *T(EmulatedType t)
391{
Ben Clayton713b8d32019-12-17 20:37:56 +0000392 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500393}
394
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500395std::vector<Ice::Type> T(const std::vector<Type *> &types)
396{
397 std::vector<Ice::Type> result;
398 result.reserve(types.size());
399 for(auto &t : types)
400 {
401 result.push_back(T(t));
402 }
403 return result;
404}
405
Nicolas Capens157ba262019-12-10 17:49:14 -0500406Value *V(Ice::Operand *v)
407{
Ben Clayton713b8d32019-12-17 20:37:56 +0000408 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500409}
410
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500411Ice::Operand *V(Value *v)
412{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500413 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500414}
415
Nicolas Capens157ba262019-12-10 17:49:14 -0500416BasicBlock *B(Ice::CfgNode *b)
417{
Ben Clayton713b8d32019-12-17 20:37:56 +0000418 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500419}
420
421static size_t typeSize(Type *type)
422{
423 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400424 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500425 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800426 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000427 case Type_v2i32: return 8;
428 case Type_v4i16: return 8;
429 case Type_v2i16: return 4;
430 case Type_v8i8: return 8;
431 case Type_v4i8: return 4;
432 case Type_v2f32: return 8;
433 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500434 }
435 }
436
437 return Ice::typeWidthInBytes(T(type));
438}
439
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500440static void createRetVoidIfNoRet()
441{
442 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
443 {
444 Nucleus::createRetVoid();
445 }
446}
447
Ben Clayton713b8d32019-12-17 20:37:56 +0000448using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
449using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500450
451inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
452{
Ben Clayton713b8d32019-12-17 20:37:56 +0000453 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500454}
455
456inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
457{
458 return &sectionHeader(elfHeader)[index];
459}
460
461static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
462{
463 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
464
465 uint32_t index = relocation.getSymbol();
466 int table = relocationTable.sh_link;
467 void *symbolValue = nullptr;
468
469 if(index != SHN_UNDEF)
470 {
471 if(table == SHN_UNDEF) return nullptr;
472 const SectionHeader *symbolTable = elfSection(elfHeader, table);
473
474 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
475 if(index >= symtab_entries)
476 {
477 ASSERT(index < symtab_entries && "Symbol Index out of range");
478 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800479 }
480
Nicolas Capens157ba262019-12-10 17:49:14 -0500481 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000482 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500483 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800484
Nicolas Capens157ba262019-12-10 17:49:14 -0500485 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400486 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500487 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000488 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400489 }
490 else
491 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500492 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400493 }
Nicolas Capens66478362016-10-13 15:36:36 -0400494 }
495
Nicolas Capens157ba262019-12-10 17:49:14 -0500496 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000497 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500498
499 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400500 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400501 switch(relocation.getType())
502 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000503 case R_ARM_NONE:
504 // No relocation
505 break;
506 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500507 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000508 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500509 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
510 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
511 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400512 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000513 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500514 {
515 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
516 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
517 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400518 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000519 default:
520 ASSERT(false && "Unsupported relocation type");
521 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400522 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500523 }
524 else
525 {
526 switch(relocation.getType())
527 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000528 case R_386_NONE:
529 // No relocation
530 break;
531 case R_386_32:
532 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
533 break;
534 case R_386_PC32:
535 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
536 break;
537 default:
538 ASSERT(false && "Unsupported relocation type");
539 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500540 }
Nicolas Capens66478362016-10-13 15:36:36 -0400541 }
542
Nicolas Capens157ba262019-12-10 17:49:14 -0500543 return symbolValue;
544}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400545
Nicolas Capens157ba262019-12-10 17:49:14 -0500546static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
547{
548 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
549
550 uint32_t index = relocation.getSymbol();
551 int table = relocationTable.sh_link;
552 void *symbolValue = nullptr;
553
554 if(index != SHN_UNDEF)
555 {
556 if(table == SHN_UNDEF) return nullptr;
557 const SectionHeader *symbolTable = elfSection(elfHeader, table);
558
559 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
560 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400561 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500562 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400563 return nullptr;
564 }
565
Nicolas Capens157ba262019-12-10 17:49:14 -0500566 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000567 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500568 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400569
Nicolas Capens157ba262019-12-10 17:49:14 -0500570 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400571 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500572 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000573 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500574 }
575 else
576 {
577 return nullptr;
578 }
579 }
Nicolas Capens66478362016-10-13 15:36:36 -0400580
Nicolas Capens157ba262019-12-10 17:49:14 -0500581 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000582 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
583 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400584
Nicolas Capens157ba262019-12-10 17:49:14 -0500585 switch(relocation.getType())
586 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000587 case R_X86_64_NONE:
588 // No relocation
589 break;
590 case R_X86_64_64:
591 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
592 break;
593 case R_X86_64_PC32:
594 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
595 break;
596 case R_X86_64_32S:
597 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
598 break;
599 default:
600 ASSERT(false && "Unsupported relocation type");
601 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500602 }
603
604 return symbolValue;
605}
606
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500607void *loadImage(uint8_t *const elfImage, size_t &codeSize, const char *functionName = nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -0500608{
Ben Clayton713b8d32019-12-17 20:37:56 +0000609 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500610
611 if(!elfHeader->checkMagic())
612 {
613 return nullptr;
614 }
615
616 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000617 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
618#if defined(__i386__)
619 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
620#elif defined(__x86_64__)
621 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
622#elif defined(__arm__)
623 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
624#elif defined(__aarch64__)
625 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
626#elif defined(__mips__)
627 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
628#else
629# error "Unsupported platform"
630#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500631
Ben Clayton713b8d32019-12-17 20:37:56 +0000632 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500633 void *entry = nullptr;
634
635 for(int i = 0; i < elfHeader->e_shnum; i++)
636 {
637 if(sectionHeader[i].sh_type == SHT_PROGBITS)
638 {
639 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
640 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500641 auto getCurrSectionName = [&]() {
642 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
643 return reinterpret_cast<const char *>(elfImage + sectionNameOffset);
644 };
645 if(functionName && strstr(getCurrSectionName(), functionName) == nullptr)
646 {
647 continue;
648 }
649
Nicolas Capens157ba262019-12-10 17:49:14 -0500650 entry = elfImage + sectionHeader[i].sh_offset;
651 codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400652 }
653 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500654 else if(sectionHeader[i].sh_type == SHT_REL)
655 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000656 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400657
Nicolas Capens157ba262019-12-10 17:49:14 -0500658 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
659 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000660 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500661 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
662 }
663 }
664 else if(sectionHeader[i].sh_type == SHT_RELA)
665 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000666 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500667
668 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
669 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000670 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500671 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
672 }
673 }
674 }
675
676 return entry;
677}
678
679template<typename T>
680struct ExecutableAllocator
681{
682 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000683 template<class U>
684 ExecutableAllocator(const ExecutableAllocator<U> &other)
685 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500686
687 using value_type = T;
688 using size_type = std::size_t;
689
690 T *allocate(size_type n)
691 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000692 return (T *)allocateMemoryPages(
693 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500694 }
695
696 void deallocate(T *p, size_type n)
697 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800698 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500699 }
700};
701
702class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
703{
704 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
705 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
706
707public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000708 ELFMemoryStreamer()
709 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500710 {
711 position = 0;
712 buffer.reserve(0x1000);
713 }
714
715 ~ELFMemoryStreamer() override
716 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500717 }
718
719 void write8(uint8_t Value) override
720 {
721 if(position == (uint64_t)buffer.size())
722 {
723 buffer.push_back(Value);
724 position++;
725 }
726 else if(position < (uint64_t)buffer.size())
727 {
728 buffer[position] = Value;
729 position++;
730 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000731 else
732 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500733 }
734
735 void writeBytes(llvm::StringRef Bytes) override
736 {
737 std::size_t oldSize = buffer.size();
738 buffer.resize(oldSize + Bytes.size());
739 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
740 position += Bytes.size();
741 }
742
743 uint64_t tell() const override { return position; }
744
745 void seek(uint64_t Off) override { position = Off; }
746
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500747 const void *getEntryByName(const char *name)
Nicolas Capens157ba262019-12-10 17:49:14 -0500748 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500749 size_t codeSize = 0;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500750 const void *entry = loadImage(&buffer[0], codeSize, name);
Nicolas Capens157ba262019-12-10 17:49:14 -0500751
752#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500753 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
754#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000755 __builtin___clear_cache((char *)entry, (char *)entry + codeSize);
Nicolas Capens157ba262019-12-10 17:49:14 -0500756#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500757
Nicolas Capens598f8d82016-09-26 15:09:10 -0400758 return entry;
759 }
760
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500761 void finalize()
762 {
763 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
764
765 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
766 }
767
Ben Clayton713b8d32019-12-17 20:37:56 +0000768 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400769 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500770 ASSERT(func);
771 funcs[index] = func;
772 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400773
Nicolas Capens157ba262019-12-10 17:49:14 -0500774 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400775 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500776 ASSERT(funcs[index]);
777 return funcs[index];
778 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400779
Antonio Maiorano02a39532020-01-21 15:15:34 -0500780 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500781 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500782 // TODO(b/148086935): Replace with a buffer allocator.
783 size_t space = size + alignment;
784 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
785 void *ptr = buf.get();
786 void *alignedPtr = std::align(alignment, size, ptr, space);
787 ASSERT(alignedPtr);
788 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500789 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500790 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500791 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400792
Nicolas Capens157ba262019-12-10 17:49:14 -0500793private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000794 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500795 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
796 std::size_t position;
797 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500798};
799
800Nucleus::Nucleus()
801{
Ben Clayton713b8d32019-12-17 20:37:56 +0000802 ::codegenMutex.lock(); // Reactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500803
804 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
805 Ice::ClFlags::getParsedClFlags(Flags);
806
Ben Clayton713b8d32019-12-17 20:37:56 +0000807#if defined(__arm__)
808 Flags.setTargetArch(Ice::Target_ARM32);
809 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
810#elif defined(__mips__)
811 Flags.setTargetArch(Ice::Target_MIPS32);
812 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
813#else // x86
814 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
815 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
816#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500817 Flags.setOutFileType(Ice::FT_Elf);
818 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
819 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
820 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
821 Flags.setDisableHybridAssembly(true);
822
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500823 // Emit functions into separate sections in the ELF so we can find them by name
824 Flags.setFunctionSections(true);
825
Nicolas Capens157ba262019-12-10 17:49:14 -0500826 static llvm::raw_os_ostream cout(std::cout);
827 static llvm::raw_os_ostream cerr(std::cerr);
828
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500829 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500830 {
831 // Decorate text asm with liveness info
832 Flags.setDecorateAsm(true);
833 }
834
Ben Clayton713b8d32019-12-17 20:37:56 +0000835 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500836 {
837 std::error_code errorCode;
838 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
839 ::elfFile = new Ice::ELFFileStreamer(*out);
840 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
841 }
842 else
843 {
844 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
845 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
846 ::routine = elfMemory;
847 }
848}
849
850Nucleus::~Nucleus()
851{
852 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500853 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500854
855 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500856 ::allocator = nullptr;
857
Nicolas Capens157ba262019-12-10 17:49:14 -0500858 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500859 ::function = nullptr;
860
Nicolas Capens157ba262019-12-10 17:49:14 -0500861 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500862 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500863
864 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500865 ::elfFile = nullptr;
866
Nicolas Capens157ba262019-12-10 17:49:14 -0500867 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500868 ::out = nullptr;
869
870 ::basicBlock = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500871
872 ::codegenMutex.unlock();
873}
874
875void Nucleus::setDefaultConfig(const Config &cfg)
876{
877 std::unique_lock<std::mutex> lock(::defaultConfigLock);
878 ::defaultConfig() = cfg;
879}
880
881void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
882{
883 std::unique_lock<std::mutex> lock(::defaultConfigLock);
884 auto &config = ::defaultConfig();
885 config = cfgEdit.apply(config);
886}
887
888Config Nucleus::getDefaultConfig()
889{
890 std::unique_lock<std::mutex> lock(::defaultConfigLock);
891 return ::defaultConfig();
892}
893
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500894// This function lowers and produces executable binary code in memory for the input functions,
895// and returns a Routine with the entry points to these functions.
896template<size_t Count>
897static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500898{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500899 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
900 // and GlobalContext::emitItems.
901
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500902 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500903 {
904 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500905 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500906 }
907
908 ::context->emitFileHeader();
909
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500910 // Translate
911
912 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500913 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500914 Ice::Cfg *currFunc = functions[i];
915
916 // Install function allocator in TLS for Cfg-specific container allocators
917 Ice::CfgLocalAllocatorScope allocScope(currFunc);
918
919 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
920
921 rr::optimize(currFunc);
922
923 currFunc->computeInOutEdges();
924 ASSERT(!currFunc->hasError());
925
926 currFunc->translate();
927 ASSERT(!currFunc->hasError());
928
929 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
930
931 if(subzeroEmitTextAsm)
932 {
933 currFunc->emit();
934 }
935
936 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -0500937 }
938
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500939 // Emit items
940
941 ::context->lowerGlobals("");
942
Nicolas Capens157ba262019-12-10 17:49:14 -0500943 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500944
945 for(size_t i = 0; i < Count; ++i)
946 {
947 Ice::Cfg *currFunc = functions[i];
948
949 // Accumulate globals from functions to emit into the "last" section at the end
950 auto globals = currFunc->getGlobalInits();
951 if(globals && !globals->empty())
952 {
953 ::context->getGlobals()->merge(globals.get());
954 }
955
956 auto assembler = currFunc->releaseAssembler();
957 assembler->alignFunction();
958 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
959 }
960
Nicolas Capens157ba262019-12-10 17:49:14 -0500961 ::context->lowerGlobals("last");
962 ::context->lowerConstants();
963 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500964
Nicolas Capens157ba262019-12-10 17:49:14 -0500965 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500966 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -0500967 objectWriter->writeNonUserSections();
968
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500969 // Done compiling functions, get entry pointers to each of them
970 for(size_t i = 0; i < Count; ++i)
971 {
972 const void *entry = ::routine->getEntryByName(names[i]);
973 ::routine->setEntry(i, entry);
974 }
975
976 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -0500977
978 Routine *handoffRoutine = ::routine;
979 ::routine = nullptr;
980
981 return std::shared_ptr<Routine>(handoffRoutine);
982}
983
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500984std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
985{
986 createRetVoidIfNoRet();
987 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
988}
989
Nicolas Capens157ba262019-12-10 17:49:14 -0500990Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
991{
992 Ice::Type type = T(t);
993 int typeSize = Ice::typeWidthInBytes(type);
994 int totalSize = typeSize * (arraySize ? arraySize : 1);
995
996 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
997 auto address = ::function->makeVariable(T(getPointerType(t)));
998 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
999 ::function->getEntryNode()->getInsts().push_front(alloca);
1000
1001 return V(address);
1002}
1003
1004BasicBlock *Nucleus::createBasicBlock()
1005{
1006 return B(::function->makeNode());
1007}
1008
1009BasicBlock *Nucleus::getInsertBlock()
1010{
1011 return B(::basicBlock);
1012}
1013
1014void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1015{
Ben Clayton713b8d32019-12-17 20:37:56 +00001016 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001017
1018 Variable::materializeAll();
1019
1020 ::basicBlock = basicBlock;
1021}
1022
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001023void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001024{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001025 ASSERT(::function == nullptr);
1026 ASSERT(::allocator == nullptr);
1027 ASSERT(::basicBlock == nullptr);
1028
1029 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1030
1031 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1032 // becomes invalid if another one is created; for example, when creating await and destroy functions
1033 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1034 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001035 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1036
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001037 ::basicBlock = ::function->getEntryNode();
Nicolas Capens157ba262019-12-10 17:49:14 -05001038}
1039
1040Value *Nucleus::getArgument(unsigned int index)
1041{
1042 return V(::function->getArgs()[index]);
1043}
1044
1045void Nucleus::createRetVoid()
1046{
1047 // Code generated after this point is unreachable, so any variables
1048 // being read can safely return an undefined value. We have to avoid
1049 // materializing variables after the terminator ret instruction.
1050 Variable::killUnmaterialized();
1051
1052 Ice::InstRet *ret = Ice::InstRet::create(::function);
1053 ::basicBlock->appendInst(ret);
1054}
1055
1056void Nucleus::createRet(Value *v)
1057{
1058 // Code generated after this point is unreachable, so any variables
1059 // being read can safely return an undefined value. We have to avoid
1060 // materializing variables after the terminator ret instruction.
1061 Variable::killUnmaterialized();
1062
1063 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1064 ::basicBlock->appendInst(ret);
1065}
1066
1067void Nucleus::createBr(BasicBlock *dest)
1068{
1069 Variable::materializeAll();
1070
1071 auto br = Ice::InstBr::create(::function, dest);
1072 ::basicBlock->appendInst(br);
1073}
1074
1075void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1076{
1077 Variable::materializeAll();
1078
1079 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1080 ::basicBlock->appendInst(br);
1081}
1082
1083static bool isCommutative(Ice::InstArithmetic::OpKind op)
1084{
1085 switch(op)
1086 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001087 case Ice::InstArithmetic::Add:
1088 case Ice::InstArithmetic::Fadd:
1089 case Ice::InstArithmetic::Mul:
1090 case Ice::InstArithmetic::Fmul:
1091 case Ice::InstArithmetic::And:
1092 case Ice::InstArithmetic::Or:
1093 case Ice::InstArithmetic::Xor:
1094 return true;
1095 default:
1096 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001097 }
1098}
1099
1100static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1101{
1102 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1103
1104 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1105
1106 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1107 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1108 ::basicBlock->appendInst(arithmetic);
1109
1110 return V(result);
1111}
1112
1113Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1114{
1115 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1116}
1117
1118Value *Nucleus::createSub(Value *lhs, Value *rhs)
1119{
1120 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1121}
1122
1123Value *Nucleus::createMul(Value *lhs, Value *rhs)
1124{
1125 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1126}
1127
1128Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1129{
1130 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1131}
1132
1133Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1134{
1135 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1136}
1137
1138Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1139{
1140 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1141}
1142
1143Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1144{
1145 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1146}
1147
1148Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1149{
1150 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1151}
1152
1153Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1154{
1155 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1156}
1157
1158Value *Nucleus::createURem(Value *lhs, Value *rhs)
1159{
1160 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1161}
1162
1163Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1164{
1165 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1166}
1167
1168Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1169{
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001170 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1171 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001172 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001173 return nullptr;
1174}
1175
1176RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1177{
1178 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001179}
1180
1181Value *Nucleus::createShl(Value *lhs, Value *rhs)
1182{
1183 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1184}
1185
1186Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1187{
1188 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1189}
1190
1191Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1192{
1193 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1194}
1195
1196Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1197{
1198 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1199}
1200
1201Value *Nucleus::createOr(Value *lhs, Value *rhs)
1202{
1203 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1204}
1205
1206Value *Nucleus::createXor(Value *lhs, Value *rhs)
1207{
1208 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1209}
1210
1211Value *Nucleus::createNeg(Value *v)
1212{
1213 return createSub(createNullValue(T(v->getType())), v);
1214}
1215
1216Value *Nucleus::createFNeg(Value *v)
1217{
Ben Clayton713b8d32019-12-17 20:37:56 +00001218 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1219 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001220
1221 return createFSub(negativeZero, v);
1222}
1223
1224Value *Nucleus::createNot(Value *v)
1225{
1226 if(Ice::isScalarIntegerType(v->getType()))
1227 {
1228 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1229 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001230 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001231 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001232 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001233 return createXor(v, createConstantVector(c, T(v->getType())));
1234 }
1235}
1236
1237Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1238{
Ben Clayton713b8d32019-12-17 20:37:56 +00001239 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -05001240 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
1241
1242 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001243 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001244
Ben Clayton713b8d32019-12-17 20:37:56 +00001245 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001246 {
1247 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001248 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001249 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001250 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001251 auto pointer = RValue<Pointer<Byte>>(ptr);
1252 Int x = *Pointer<Int>(pointer);
1253
1254 Int4 vector;
1255 vector = Insert(vector, x, 0);
1256
Antonio Maiorano02a39532020-01-21 15:15:34 -05001257 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001258 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1259 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001260 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001261 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001262 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001263 auto pointer = RValue<Pointer<Byte>>(ptr);
1264 Int x = *Pointer<Int>(pointer);
1265 Int y = *Pointer<Int>(pointer + 4);
1266
1267 Int4 vector;
1268 vector = Insert(vector, x, 0);
1269 vector = Insert(vector, y, 1);
1270
Antonio Maiorano02a39532020-01-21 15:15:34 -05001271 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001272 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1273 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001274 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001275 else
1276 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001277 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001278 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001279 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001280 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001281 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001282 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001283 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1284 load->addArg(ptr);
1285 load->addArg(::context->getConstantInt32(typeSize(type)));
1286 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001287 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001288 }
1289 else
1290 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001291 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001292 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001293
Antonio Maiorano02a39532020-01-21 15:15:34 -05001294 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001295 return V(result);
1296}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001297
Nicolas Capens157ba262019-12-10 17:49:14 -05001298Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1299{
Ben Clayton713b8d32019-12-17 20:37:56 +00001300 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -05001301 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
Nicolas Capens598f8d82016-09-26 15:09:10 -04001302
Ben Clayton713b8d32019-12-17 20:37:56 +00001303#if __has_feature(memory_sanitizer)
1304 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
1305 if(align != 0)
1306 {
1307 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1308 call->addArg(ptr);
1309 call->addArg(::context->getConstantInt64(typeSize(type)));
1310 ::basicBlock->appendInst(call);
1311 }
1312#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001313
Nicolas Capens157ba262019-12-10 17:49:14 -05001314 int valueType = (int)reinterpret_cast<intptr_t>(type);
1315
Ben Clayton713b8d32019-12-17 20:37:56 +00001316 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001317 {
1318 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001319 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001320 if(typeSize(type) == 4)
1321 {
1322 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1323 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1324 ::basicBlock->appendInst(bitcast);
1325
1326 RValue<Int4> v(V(vector));
1327
1328 auto pointer = RValue<Pointer<Byte>>(ptr);
1329 Int x = Extract(v, 0);
1330 *Pointer<Int>(pointer) = x;
1331 }
1332 else if(typeSize(type) == 8)
1333 {
1334 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1335 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1336 ::basicBlock->appendInst(bitcast);
1337
1338 RValue<Int4> v(V(vector));
1339
1340 auto pointer = RValue<Pointer<Byte>>(ptr);
1341 Int x = Extract(v, 0);
1342 *Pointer<Int>(pointer) = x;
1343 Int y = Extract(v, 1);
1344 *Pointer<Int>(pointer + 4) = y;
1345 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001346 else
1347 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001348 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001349 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001350 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001351 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001352 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1353 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1354 store->addArg(value);
1355 store->addArg(ptr);
1356 store->addArg(::context->getConstantInt32(typeSize(type)));
1357 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001358 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001359 }
1360 else
1361 {
1362 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001363
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001364 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001365 ::basicBlock->appendInst(store);
1366 }
1367
1368 return value;
1369}
1370
1371Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1372{
1373 ASSERT(index->getType() == Ice::IceType_i32);
1374
1375 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1376 {
1377 int32_t offset = constant->getValue() * (int)typeSize(type);
1378
1379 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001380 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001381 return ptr;
1382 }
1383
Nicolas Capens157ba262019-12-10 17:49:14 -05001384 return createAdd(ptr, createConstantInt(offset));
1385 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001386
Nicolas Capens157ba262019-12-10 17:49:14 -05001387 if(!Ice::isByteSizedType(T(type)))
1388 {
1389 index = createMul(index, createConstantInt((int)typeSize(type)));
1390 }
1391
Ben Clayton713b8d32019-12-17 20:37:56 +00001392 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001393 {
1394 if(unsignedIndex)
1395 {
1396 index = createZExt(index, T(Ice::IceType_i64));
1397 }
1398 else
1399 {
1400 index = createSExt(index, T(Ice::IceType_i64));
1401 }
1402 }
1403
1404 return createAdd(ptr, index);
1405}
1406
Antonio Maiorano370cba52019-12-31 11:36:07 -05001407static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1408{
1409 Ice::Variable *result = ::function->makeVariable(value->getType());
1410
1411 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1412 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1413 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1414 auto op = ::context->getConstantInt32(rmwOp);
1415 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1416 inst->addArg(op);
1417 inst->addArg(ptr);
1418 inst->addArg(value);
1419 inst->addArg(order);
1420 ::basicBlock->appendInst(inst);
1421
1422 return V(result);
1423}
1424
Nicolas Capens157ba262019-12-10 17:49:14 -05001425Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1426{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001427 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001428}
1429
1430Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1431{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001432 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001433}
1434
1435Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1436{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001437 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001438}
1439
1440Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1441{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001442 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001443}
1444
1445Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1446{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001447 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001448}
1449
1450Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1451{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001452 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001453}
1454
1455Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1456{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001457 Ice::Variable *result = ::function->makeVariable(value->getType());
1458
1459 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1460 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1461 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1462 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1463 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1464 inst->addArg(ptr);
1465 inst->addArg(compare);
1466 inst->addArg(value);
1467 inst->addArg(orderEq);
1468 inst->addArg(orderNeq);
1469 ::basicBlock->appendInst(inst);
1470
1471 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001472}
1473
1474static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1475{
1476 if(v->getType() == T(destType))
1477 {
1478 return v;
1479 }
1480
1481 Ice::Variable *result = ::function->makeVariable(T(destType));
1482 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1483 ::basicBlock->appendInst(cast);
1484
1485 return V(result);
1486}
1487
1488Value *Nucleus::createTrunc(Value *v, Type *destType)
1489{
1490 return createCast(Ice::InstCast::Trunc, v, destType);
1491}
1492
1493Value *Nucleus::createZExt(Value *v, Type *destType)
1494{
1495 return createCast(Ice::InstCast::Zext, v, destType);
1496}
1497
1498Value *Nucleus::createSExt(Value *v, Type *destType)
1499{
1500 return createCast(Ice::InstCast::Sext, v, destType);
1501}
1502
1503Value *Nucleus::createFPToUI(Value *v, Type *destType)
1504{
1505 return createCast(Ice::InstCast::Fptoui, v, destType);
1506}
1507
1508Value *Nucleus::createFPToSI(Value *v, Type *destType)
1509{
1510 return createCast(Ice::InstCast::Fptosi, v, destType);
1511}
1512
1513Value *Nucleus::createSIToFP(Value *v, Type *destType)
1514{
1515 return createCast(Ice::InstCast::Sitofp, v, destType);
1516}
1517
1518Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1519{
1520 return createCast(Ice::InstCast::Fptrunc, v, destType);
1521}
1522
1523Value *Nucleus::createFPExt(Value *v, Type *destType)
1524{
1525 return createCast(Ice::InstCast::Fpext, v, destType);
1526}
1527
1528Value *Nucleus::createBitCast(Value *v, Type *destType)
1529{
1530 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1531 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1532 // emulate them by writing to the stack and reading back as the destination type.
1533 if(emulateMismatchedBitCast)
1534 {
1535 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1536 {
1537 Value *address = allocateStackVariable(destType);
1538 createStore(v, address, T(v->getType()));
1539 return createLoad(address, destType);
1540 }
1541 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1542 {
1543 Value *address = allocateStackVariable(T(v->getType()));
1544 createStore(v, address, T(v->getType()));
1545 return createLoad(address, destType);
1546 }
1547 }
1548
1549 return createCast(Ice::InstCast::Bitcast, v, destType);
1550}
1551
1552static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1553{
1554 ASSERT(lhs->getType() == rhs->getType());
1555
1556 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1557 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1558 ::basicBlock->appendInst(cmp);
1559
1560 return V(result);
1561}
1562
1563Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1564{
1565 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1566}
1567
1568Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1569{
1570 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1571}
1572
1573Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1574{
1575 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1576}
1577
1578Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1579{
1580 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1581}
1582
1583Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1584{
1585 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1586}
1587
1588Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1589{
1590 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1591}
1592
1593Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1594{
1595 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1596}
1597
1598Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1599{
1600 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1601}
1602
1603Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1604{
1605 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1606}
1607
1608Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1609{
1610 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1611}
1612
1613Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1614{
1615 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1616}
1617
1618static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1619{
1620 ASSERT(lhs->getType() == rhs->getType());
1621 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1622
1623 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1624 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1625 ::basicBlock->appendInst(cmp);
1626
1627 return V(result);
1628}
1629
1630Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1631{
1632 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1633}
1634
1635Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1636{
1637 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1638}
1639
1640Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1641{
1642 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1643}
1644
1645Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1646{
1647 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1648}
1649
1650Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1651{
1652 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1653}
1654
1655Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1656{
1657 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1658}
1659
1660Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1661{
1662 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1663}
1664
1665Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1666{
1667 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1668}
1669
1670Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1671{
1672 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1673}
1674
1675Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1676{
1677 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1678}
1679
1680Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1681{
1682 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1683}
1684
1685Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1686{
1687 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1688}
1689
1690Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1691{
1692 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1693}
1694
1695Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1696{
1697 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1698}
1699
1700Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1701{
1702 auto result = ::function->makeVariable(T(type));
1703 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1704 ::basicBlock->appendInst(extract);
1705
1706 return V(result);
1707}
1708
1709Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1710{
1711 auto result = ::function->makeVariable(vector->getType());
1712 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1713 ::basicBlock->appendInst(insert);
1714
1715 return V(result);
1716}
1717
1718Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1719{
1720 ASSERT(V1->getType() == V2->getType());
1721
1722 int size = Ice::typeNumElements(V1->getType());
1723 auto result = ::function->makeVariable(V1->getType());
1724 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1725
1726 for(int i = 0; i < size; i++)
1727 {
1728 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1729 }
1730
1731 ::basicBlock->appendInst(shuffle);
1732
1733 return V(result);
1734}
1735
1736Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1737{
1738 ASSERT(ifTrue->getType() == ifFalse->getType());
1739
1740 auto result = ::function->makeVariable(ifTrue->getType());
1741 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1742 ::basicBlock->appendInst(select);
1743
1744 return V(result);
1745}
1746
1747SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1748{
1749 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1750 ::basicBlock->appendInst(switchInst);
1751
Ben Clayton713b8d32019-12-17 20:37:56 +00001752 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001753}
1754
1755void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1756{
1757 switchCases->addBranch(label, label, branch);
1758}
1759
1760void Nucleus::createUnreachable()
1761{
1762 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1763 ::basicBlock->appendInst(unreachable);
1764}
1765
1766Type *Nucleus::getPointerType(Type *ElementType)
1767{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001768 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001769}
1770
1771Value *Nucleus::createNullValue(Type *Ty)
1772{
1773 if(Ice::isVectorType(T(Ty)))
1774 {
1775 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001776 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001777 return createConstantVector(c, Ty);
1778 }
1779 else
1780 {
1781 return V(::context->getConstantZero(T(Ty)));
1782 }
1783}
1784
1785Value *Nucleus::createConstantLong(int64_t i)
1786{
1787 return V(::context->getConstantInt64(i));
1788}
1789
1790Value *Nucleus::createConstantInt(int i)
1791{
1792 return V(::context->getConstantInt32(i));
1793}
1794
1795Value *Nucleus::createConstantInt(unsigned int i)
1796{
1797 return V(::context->getConstantInt32(i));
1798}
1799
1800Value *Nucleus::createConstantBool(bool b)
1801{
1802 return V(::context->getConstantInt1(b));
1803}
1804
1805Value *Nucleus::createConstantByte(signed char i)
1806{
1807 return V(::context->getConstantInt8(i));
1808}
1809
1810Value *Nucleus::createConstantByte(unsigned char i)
1811{
1812 return V(::context->getConstantInt8(i));
1813}
1814
1815Value *Nucleus::createConstantShort(short i)
1816{
1817 return V(::context->getConstantInt16(i));
1818}
1819
1820Value *Nucleus::createConstantShort(unsigned short i)
1821{
1822 return V(::context->getConstantInt16(i));
1823}
1824
1825Value *Nucleus::createConstantFloat(float x)
1826{
1827 return V(::context->getConstantFloat(x));
1828}
1829
1830Value *Nucleus::createNullPointer(Type *Ty)
1831{
Ben Clayton713b8d32019-12-17 20:37:56 +00001832 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05001833}
1834
Antonio Maiorano02a39532020-01-21 15:15:34 -05001835static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
1836{
1837 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
1838}
1839
Nicolas Capens157ba262019-12-10 17:49:14 -05001840Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1841{
1842 const int vectorSize = 16;
1843 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
1844 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05001845
1846 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00001847 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001848
1849 // TODO(148082873): Fix global variable constants when generating multiple functions
1850 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001851
1852 switch((int)reinterpret_cast<intptr_t>(type))
1853 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001854 case Ice::IceType_v4i32:
1855 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001856 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001857 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001858 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001859 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001860 }
1861 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001862 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001863 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001864 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001865 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001866 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001867 }
1868 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001869 case Ice::IceType_v8i16:
1870 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001871 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001872 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001873 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001874 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001875 }
1876 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001877 case Ice::IceType_v16i8:
1878 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001879 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001880 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001881 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001882 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001883 }
1884 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001885 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001886 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001887 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001888 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001889 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001890 }
1891 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001892 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001893 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001894 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001895 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001896 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001897 }
1898 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001899 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05001900 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001901 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001902 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001903 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001904 }
1905 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001906 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05001907 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001908 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001909 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001910 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001911 }
1912 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001913 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05001914 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001915 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001916 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001917 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001918 }
1919 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001920 default:
1921 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001922 }
1923
Antonio Maiorano02a39532020-01-21 15:15:34 -05001924 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05001925
Antonio Maiorano02a39532020-01-21 15:15:34 -05001926 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001927 return V(result);
1928}
1929
1930Value *Nucleus::createConstantVector(const double *constants, Type *type)
1931{
Ben Clayton713b8d32019-12-17 20:37:56 +00001932 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05001933}
1934
1935Type *Void::getType()
1936{
1937 return T(Ice::IceType_void);
1938}
1939
1940Type *Bool::getType()
1941{
1942 return T(Ice::IceType_i1);
1943}
1944
1945Type *Byte::getType()
1946{
1947 return T(Ice::IceType_i8);
1948}
1949
1950Type *SByte::getType()
1951{
1952 return T(Ice::IceType_i8);
1953}
1954
1955Type *Short::getType()
1956{
1957 return T(Ice::IceType_i16);
1958}
1959
1960Type *UShort::getType()
1961{
1962 return T(Ice::IceType_i16);
1963}
1964
1965Type *Byte4::getType()
1966{
1967 return T(Type_v4i8);
1968}
1969
1970Type *SByte4::getType()
1971{
1972 return T(Type_v4i8);
1973}
1974
Ben Clayton713b8d32019-12-17 20:37:56 +00001975namespace {
1976RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05001977{
Ben Clayton713b8d32019-12-17 20:37:56 +00001978 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05001979}
1980
Ben Clayton713b8d32019-12-17 20:37:56 +00001981RValue<Byte> Extract(RValue<Byte8> val, int i)
1982{
1983 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
1984}
1985
1986RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
1987{
1988 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
1989}
1990} // namespace
1991
Nicolas Capens157ba262019-12-10 17:49:14 -05001992RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
1993{
1994 if(emulateIntrinsics)
1995 {
1996 Byte8 result;
1997 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1998 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1999 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2000 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2001 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2002 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2003 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2004 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2005
2006 return result;
2007 }
2008 else
2009 {
2010 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002011 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002012 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2013 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2014 paddusb->addArg(x.value);
2015 paddusb->addArg(y.value);
2016 ::basicBlock->appendInst(paddusb);
2017
2018 return RValue<Byte8>(V(result));
2019 }
2020}
2021
2022RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2023{
2024 if(emulateIntrinsics)
2025 {
2026 Byte8 result;
2027 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2028 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2029 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2030 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2031 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2032 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2033 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2034 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2035
2036 return result;
2037 }
2038 else
2039 {
2040 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002041 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002042 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2043 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2044 psubusw->addArg(x.value);
2045 psubusw->addArg(y.value);
2046 ::basicBlock->appendInst(psubusw);
2047
2048 return RValue<Byte8>(V(result));
2049 }
2050}
2051
2052RValue<SByte> Extract(RValue<SByte8> val, int i)
2053{
2054 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
2055}
2056
2057RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2058{
2059 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2060}
2061
2062RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2063{
2064 if(emulateIntrinsics)
2065 {
2066 SByte8 result;
2067 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2068 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2069 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2070 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2071 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2072 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2073 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2074 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2075
2076 return result;
2077 }
2078 else
2079 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002080#if defined(__i386__) || defined(__x86_64__)
2081 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2082 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2083 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002084
Ben Clayton713b8d32019-12-17 20:37:56 +00002085 return As<SByte8>(hi | lo);
2086#else
2087 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2088#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002089 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002090}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002091
Nicolas Capens157ba262019-12-10 17:49:14 -05002092RValue<Int> SignMask(RValue<Byte8> x)
2093{
2094 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002095 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002096 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2097 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002098 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002099 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002100 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002101 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002102 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002103 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2104 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2105 movmsk->addArg(x.value);
2106 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002107
Nicolas Capens157ba262019-12-10 17:49:14 -05002108 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002109 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002110}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002111
2112// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2113// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05002114// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002115// }
2116
Nicolas Capens157ba262019-12-10 17:49:14 -05002117RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2118{
2119 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2120}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002121
Nicolas Capens157ba262019-12-10 17:49:14 -05002122Type *Byte8::getType()
2123{
2124 return T(Type_v8i8);
2125}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002126
Nicolas Capens598f8d82016-09-26 15:09:10 -04002127// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2128// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002129// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002130// }
2131
2132// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2133// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002134// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002135// }
2136
Nicolas Capens157ba262019-12-10 17:49:14 -05002137RValue<SByte> SaturateSigned(RValue<Short> x)
2138{
2139 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2140}
2141
2142RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2143{
2144 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002145 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002146 SByte8 result;
2147 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2148 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2149 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2150 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2151 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2152 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2153 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2154 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002155
Nicolas Capens157ba262019-12-10 17:49:14 -05002156 return result;
2157 }
2158 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002159 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002160 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002161 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002162 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2163 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2164 paddsb->addArg(x.value);
2165 paddsb->addArg(y.value);
2166 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002167
Nicolas Capens157ba262019-12-10 17:49:14 -05002168 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002169 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002170}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002171
Nicolas Capens157ba262019-12-10 17:49:14 -05002172RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2173{
2174 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002175 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002176 SByte8 result;
2177 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2178 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2179 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2180 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2181 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2182 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2183 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2184 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002185
Nicolas Capens157ba262019-12-10 17:49:14 -05002186 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002187 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002188 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002189 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002190 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002191 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002192 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2193 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2194 psubsb->addArg(x.value);
2195 psubsb->addArg(y.value);
2196 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002197
Nicolas Capens157ba262019-12-10 17:49:14 -05002198 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002199 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002200}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002201
Nicolas Capens157ba262019-12-10 17:49:14 -05002202RValue<Int> SignMask(RValue<SByte8> x)
2203{
2204 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002205 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002206 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2207 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002208 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002209 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002210 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002211 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002212 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002213 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2214 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2215 movmsk->addArg(x.value);
2216 ::basicBlock->appendInst(movmsk);
2217
2218 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002219 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002220}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002221
Nicolas Capens157ba262019-12-10 17:49:14 -05002222RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2223{
2224 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2225}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002226
Nicolas Capens157ba262019-12-10 17:49:14 -05002227RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2228{
2229 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2230}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002231
Nicolas Capens157ba262019-12-10 17:49:14 -05002232Type *SByte8::getType()
2233{
2234 return T(Type_v8i8);
2235}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002236
Nicolas Capens157ba262019-12-10 17:49:14 -05002237Type *Byte16::getType()
2238{
2239 return T(Ice::IceType_v16i8);
2240}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002241
Nicolas Capens157ba262019-12-10 17:49:14 -05002242Type *SByte16::getType()
2243{
2244 return T(Ice::IceType_v16i8);
2245}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002246
Nicolas Capens157ba262019-12-10 17:49:14 -05002247Type *Short2::getType()
2248{
2249 return T(Type_v2i16);
2250}
Nicolas Capensd4227962016-11-09 14:24:25 -05002251
Nicolas Capens157ba262019-12-10 17:49:14 -05002252Type *UShort2::getType()
2253{
2254 return T(Type_v2i16);
2255}
Nicolas Capensd4227962016-11-09 14:24:25 -05002256
Nicolas Capens157ba262019-12-10 17:49:14 -05002257Short4::Short4(RValue<Int4> cast)
2258{
Ben Clayton713b8d32019-12-17 20:37:56 +00002259 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002260 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2261 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2262
2263 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
2264 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2265
2266 storeValue(short4);
2267}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002268
2269// Short4::Short4(RValue<Float> cast)
2270// {
2271// }
2272
Nicolas Capens157ba262019-12-10 17:49:14 -05002273Short4::Short4(RValue<Float4> cast)
2274{
Ben Claytonce54c592020-02-07 11:30:51 +00002275 UNIMPLEMENTED_NO_BUG("Short4::Short4(RValue<Float4> cast)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002276}
2277
2278RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2279{
2280 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002281 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002282 Short4 result;
2283 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2284 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2285 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2286 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002287
2288 return result;
2289 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002290 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002291 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002292 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2293 }
2294}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002295
Nicolas Capens157ba262019-12-10 17:49:14 -05002296RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2297{
2298 if(emulateIntrinsics)
2299 {
2300 Short4 result;
2301 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2302 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2303 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2304 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2305
2306 return result;
2307 }
2308 else
2309 {
2310 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2311 }
2312}
2313
2314RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2315{
2316 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2317 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2318 ::basicBlock->appendInst(cmp);
2319
2320 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2321 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2322 ::basicBlock->appendInst(select);
2323
2324 return RValue<Short4>(V(result));
2325}
2326
2327RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2328{
2329 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2330 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2331 ::basicBlock->appendInst(cmp);
2332
2333 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2334 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2335 ::basicBlock->appendInst(select);
2336
2337 return RValue<Short4>(V(result));
2338}
2339
2340RValue<Short> SaturateSigned(RValue<Int> x)
2341{
2342 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2343}
2344
2345RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2346{
2347 if(emulateIntrinsics)
2348 {
2349 Short4 result;
2350 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2351 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2352 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2353 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2354
2355 return result;
2356 }
2357 else
2358 {
2359 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002360 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002361 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2362 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2363 paddsw->addArg(x.value);
2364 paddsw->addArg(y.value);
2365 ::basicBlock->appendInst(paddsw);
2366
2367 return RValue<Short4>(V(result));
2368 }
2369}
2370
2371RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2372{
2373 if(emulateIntrinsics)
2374 {
2375 Short4 result;
2376 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2377 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2378 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2379 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2380
2381 return result;
2382 }
2383 else
2384 {
2385 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002386 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002387 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2388 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2389 psubsw->addArg(x.value);
2390 psubsw->addArg(y.value);
2391 ::basicBlock->appendInst(psubsw);
2392
2393 return RValue<Short4>(V(result));
2394 }
2395}
2396
2397RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2398{
2399 if(emulateIntrinsics)
2400 {
2401 Short4 result;
2402 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2403 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2404 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2405 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2406
2407 return result;
2408 }
2409 else
2410 {
2411 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002412 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002413 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2414 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2415 pmulhw->addArg(x.value);
2416 pmulhw->addArg(y.value);
2417 ::basicBlock->appendInst(pmulhw);
2418
2419 return RValue<Short4>(V(result));
2420 }
2421}
2422
2423RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2424{
2425 if(emulateIntrinsics)
2426 {
2427 Int2 result;
2428 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2429 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2430
2431 return result;
2432 }
2433 else
2434 {
2435 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002436 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002437 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2438 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2439 pmaddwd->addArg(x.value);
2440 pmaddwd->addArg(y.value);
2441 ::basicBlock->appendInst(pmaddwd);
2442
2443 return As<Int2>(V(result));
2444 }
2445}
2446
2447RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2448{
2449 if(emulateIntrinsics)
2450 {
2451 SByte8 result;
2452 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2453 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2454 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2455 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2456 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2457 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2458 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2459 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2460
2461 return result;
2462 }
2463 else
2464 {
2465 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002466 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002467 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2468 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2469 pack->addArg(x.value);
2470 pack->addArg(y.value);
2471 ::basicBlock->appendInst(pack);
2472
2473 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2474 }
2475}
2476
2477RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2478{
2479 if(emulateIntrinsics)
2480 {
2481 Byte8 result;
2482 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2483 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2484 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2485 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2486 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2487 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2488 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2489 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2490
2491 return result;
2492 }
2493 else
2494 {
2495 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002496 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002497 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2498 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2499 pack->addArg(x.value);
2500 pack->addArg(y.value);
2501 ::basicBlock->appendInst(pack);
2502
2503 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2504 }
2505}
2506
2507RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2508{
2509 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2510}
2511
2512RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2513{
2514 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2515}
2516
2517Type *Short4::getType()
2518{
2519 return T(Type_v4i16);
2520}
2521
2522UShort4::UShort4(RValue<Float4> cast, bool saturate)
2523{
2524 if(saturate)
2525 {
2526 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002527 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002528 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2529 // PackUnsigned takes care of 0x0000 saturation.
2530 Int4 int4(Min(cast, Float4(0xFFFF)));
2531 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002532 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002533 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002534 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002535 // ARM saturates the 32-bit integer result on overflow/undeflow.
2536 Int4 int4(cast);
2537 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002538 }
2539 else
2540 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002541 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002542 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002543 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002544 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002545 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002546 *this = Short4(Int4(cast));
2547 }
2548}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002549
Nicolas Capens157ba262019-12-10 17:49:14 -05002550RValue<UShort> Extract(RValue<UShort4> val, int i)
2551{
2552 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2553}
2554
2555RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2556{
2557 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2558}
2559
2560RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2561{
2562 if(emulateIntrinsics)
2563 {
2564 UShort4 result;
2565 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2566 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2567 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2568 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2569
2570 return result;
2571 }
2572 else
2573 {
2574 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2575 }
2576}
2577
2578RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2579{
2580 if(emulateIntrinsics)
2581 {
2582 UShort4 result;
2583 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2584 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2585 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2586 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2587
2588 return result;
2589 }
2590 else
2591 {
2592 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2593 }
2594}
2595
2596RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2597{
2598 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2599 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2600 ::basicBlock->appendInst(cmp);
2601
2602 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2603 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2604 ::basicBlock->appendInst(select);
2605
2606 return RValue<UShort4>(V(result));
2607}
2608
2609RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2610{
2611 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2612 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2613 ::basicBlock->appendInst(cmp);
2614
2615 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2616 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2617 ::basicBlock->appendInst(select);
2618
2619 return RValue<UShort4>(V(result));
2620}
2621
2622RValue<UShort> SaturateUnsigned(RValue<Int> x)
2623{
2624 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2625}
2626
2627RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2628{
2629 if(emulateIntrinsics)
2630 {
2631 UShort4 result;
2632 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2633 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2634 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2635 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2636
2637 return result;
2638 }
2639 else
2640 {
2641 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002642 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002643 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2644 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2645 paddusw->addArg(x.value);
2646 paddusw->addArg(y.value);
2647 ::basicBlock->appendInst(paddusw);
2648
2649 return RValue<UShort4>(V(result));
2650 }
2651}
2652
2653RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2654{
2655 if(emulateIntrinsics)
2656 {
2657 UShort4 result;
2658 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2659 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2660 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2661 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2662
2663 return result;
2664 }
2665 else
2666 {
2667 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002668 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002669 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2670 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2671 psubusw->addArg(x.value);
2672 psubusw->addArg(y.value);
2673 ::basicBlock->appendInst(psubusw);
2674
2675 return RValue<UShort4>(V(result));
2676 }
2677}
2678
2679RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2680{
2681 if(emulateIntrinsics)
2682 {
2683 UShort4 result;
2684 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2685 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2686 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2687 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2688
2689 return result;
2690 }
2691 else
2692 {
2693 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002694 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002695 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2696 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2697 pmulhuw->addArg(x.value);
2698 pmulhuw->addArg(y.value);
2699 ::basicBlock->appendInst(pmulhuw);
2700
2701 return RValue<UShort4>(V(result));
2702 }
2703}
2704
2705RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2706{
2707 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2708
2709 // Scalarized implementation.
2710 Int4 result;
2711 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2712 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2713 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2714 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2715
2716 return result;
2717}
2718
2719RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2720{
2721 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2722
2723 if(false) // Partial product based implementation.
2724 {
2725 auto xh = x >> 16;
2726 auto yh = y >> 16;
2727 auto xl = x & UInt4(0x0000FFFF);
2728 auto yl = y & UInt4(0x0000FFFF);
2729 auto xlyh = xl * yh;
2730 auto xhyl = xh * yl;
2731 auto xlyhh = xlyh >> 16;
2732 auto xhylh = xhyl >> 16;
2733 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2734 auto xhyll = xhyl & UInt4(0x0000FFFF);
2735 auto xlylh = (xl * yl) >> 16;
2736 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2737
2738 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002739 }
2740
Nicolas Capens157ba262019-12-10 17:49:14 -05002741 // Scalarized implementation.
2742 Int4 result;
2743 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
2744 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
2745 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
2746 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
2747
2748 return As<UInt4>(result);
2749}
2750
2751RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2752{
Ben Claytonce54c592020-02-07 11:30:51 +00002753 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002754 return UShort4(0);
2755}
2756
2757Type *UShort4::getType()
2758{
2759 return T(Type_v4i16);
2760}
2761
2762RValue<Short> Extract(RValue<Short8> val, int i)
2763{
2764 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2765}
2766
2767RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2768{
2769 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2770}
2771
2772RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2773{
2774 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002775 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002776 Short8 result;
2777 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2778 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2779 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2780 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
2781 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
2782 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
2783 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
2784 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002785
Nicolas Capens157ba262019-12-10 17:49:14 -05002786 return result;
2787 }
2788 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002789 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002790 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002791 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002792}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002793
Nicolas Capens157ba262019-12-10 17:49:14 -05002794RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
2795{
2796 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002797 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002798 Short8 result;
2799 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2800 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2801 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2802 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2803 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
2804 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
2805 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
2806 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002807
Nicolas Capens157ba262019-12-10 17:49:14 -05002808 return result;
2809 }
2810 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002811 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002812 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002813 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002814}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002815
Nicolas Capens157ba262019-12-10 17:49:14 -05002816RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
2817{
Ben Claytonce54c592020-02-07 11:30:51 +00002818 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002819 return Int4(0);
2820}
2821
2822RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
2823{
Ben Claytonce54c592020-02-07 11:30:51 +00002824 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002825 return Short8(0);
2826}
2827
2828Type *Short8::getType()
2829{
2830 return T(Ice::IceType_v8i16);
2831}
2832
2833RValue<UShort> Extract(RValue<UShort8> val, int i)
2834{
2835 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2836}
2837
2838RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
2839{
2840 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
2841}
2842
2843RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
2844{
2845 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002846 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002847 UShort8 result;
2848 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2849 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2850 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2851 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2852 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
2853 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
2854 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
2855 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002856
Nicolas Capens157ba262019-12-10 17:49:14 -05002857 return result;
2858 }
2859 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002860 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002861 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002862 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002863}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002864
Nicolas Capens157ba262019-12-10 17:49:14 -05002865RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
2866{
2867 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002868 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002869 UShort8 result;
2870 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2871 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2872 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2873 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2874 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
2875 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
2876 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
2877 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002878
Nicolas Capens157ba262019-12-10 17:49:14 -05002879 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002880 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002881 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002882 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002883 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002884 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002885}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002886
Nicolas Capens157ba262019-12-10 17:49:14 -05002887RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
2888{
Ben Claytonce54c592020-02-07 11:30:51 +00002889 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002890 return UShort8(0);
2891}
2892
Nicolas Capens157ba262019-12-10 17:49:14 -05002893Type *UShort8::getType()
2894{
2895 return T(Ice::IceType_v8i16);
2896}
2897
Ben Clayton713b8d32019-12-17 20:37:56 +00002898RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002899{
2900 RValue<Int> res = val;
2901 val += 1;
2902 return res;
2903}
2904
Ben Clayton713b8d32019-12-17 20:37:56 +00002905const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002906{
2907 val += 1;
2908 return val;
2909}
2910
Ben Clayton713b8d32019-12-17 20:37:56 +00002911RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002912{
2913 RValue<Int> res = val;
2914 val -= 1;
2915 return res;
2916}
2917
Ben Clayton713b8d32019-12-17 20:37:56 +00002918const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002919{
2920 val -= 1;
2921 return val;
2922}
2923
2924RValue<Int> RoundInt(RValue<Float> cast)
2925{
2926 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002927 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002928 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2929 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002930 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002931 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002932 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002933 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002934 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002935 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2936 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2937 nearbyint->addArg(cast.value);
2938 ::basicBlock->appendInst(nearbyint);
2939
2940 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002941 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002942}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002943
Nicolas Capens157ba262019-12-10 17:49:14 -05002944Type *Int::getType()
2945{
2946 return T(Ice::IceType_i32);
2947}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002948
Nicolas Capens157ba262019-12-10 17:49:14 -05002949Type *Long::getType()
2950{
2951 return T(Ice::IceType_i64);
2952}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002953
Nicolas Capens157ba262019-12-10 17:49:14 -05002954UInt::UInt(RValue<Float> cast)
2955{
2956 // Smallest positive value representable in UInt, but not in Int
2957 const unsigned int ustart = 0x80000000u;
2958 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002959
Nicolas Capens157ba262019-12-10 17:49:14 -05002960 // If the value is negative, store 0, otherwise store the result of the conversion
2961 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00002962 // Check if the value can be represented as an Int
2963 IfThenElse(cast >= ustartf,
2964 // If the value is too large, subtract ustart and re-add it after conversion.
2965 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2966 // Otherwise, just convert normally
2967 Int(cast)))
2968 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05002969}
Nicolas Capensa8086512016-11-07 17:32:17 -05002970
Ben Clayton713b8d32019-12-17 20:37:56 +00002971RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002972{
2973 RValue<UInt> res = val;
2974 val += 1;
2975 return res;
2976}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002977
Ben Clayton713b8d32019-12-17 20:37:56 +00002978const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002979{
2980 val += 1;
2981 return val;
2982}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002983
Ben Clayton713b8d32019-12-17 20:37:56 +00002984RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002985{
2986 RValue<UInt> res = val;
2987 val -= 1;
2988 return res;
2989}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002990
Ben Clayton713b8d32019-12-17 20:37:56 +00002991const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002992{
2993 val -= 1;
2994 return val;
2995}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002996
Nicolas Capens598f8d82016-09-26 15:09:10 -04002997// RValue<UInt> RoundUInt(RValue<Float> cast)
2998// {
Ben Claytoneb50d252019-04-15 13:50:01 -04002999// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003000// }
3001
Nicolas Capens157ba262019-12-10 17:49:14 -05003002Type *UInt::getType()
3003{
3004 return T(Ice::IceType_i32);
3005}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003006
3007// Int2::Int2(RValue<Int> cast)
3008// {
3009// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
3010// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
3011//
3012// Constant *shuffle[2];
3013// shuffle[0] = Nucleus::createConstantInt(0);
3014// shuffle[1] = Nucleus::createConstantInt(0);
3015//
3016// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
3017//
3018// storeValue(replicate);
3019// }
3020
Nicolas Capens157ba262019-12-10 17:49:14 -05003021RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3022{
3023 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003024 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003025 Int2 result;
3026 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3027 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003028
Nicolas Capens157ba262019-12-10 17:49:14 -05003029 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003030 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003031 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003032 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003033 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003034 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003035}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003036
Nicolas Capens157ba262019-12-10 17:49:14 -05003037RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3038{
3039 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003040 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003041 Int2 result;
3042 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3043 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3044
3045 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003046 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003047 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003048 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003049 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003050 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003051}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003052
Nicolas Capens157ba262019-12-10 17:49:14 -05003053Type *Int2::getType()
3054{
3055 return T(Type_v2i32);
3056}
3057
3058RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3059{
3060 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003061 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003062 UInt2 result;
3063 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3064 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003065
Nicolas Capens157ba262019-12-10 17:49:14 -05003066 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003067 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003068 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003069 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003070 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003071 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003072}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003073
Nicolas Capens157ba262019-12-10 17:49:14 -05003074RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3075{
3076 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003077 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003078 UInt2 result;
3079 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3080 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003081
Nicolas Capens157ba262019-12-10 17:49:14 -05003082 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003083 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003084 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003085 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003086 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003087 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003088}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003089
Nicolas Capens157ba262019-12-10 17:49:14 -05003090Type *UInt2::getType()
3091{
3092 return T(Type_v2i32);
3093}
3094
Ben Clayton713b8d32019-12-17 20:37:56 +00003095Int4::Int4(RValue<Byte4> cast)
3096 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003097{
3098 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3099 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3100
3101 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003102 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003103 Value *b = Nucleus::createBitCast(a, Byte16::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003104 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003105
Ben Clayton713b8d32019-12-17 20:37:56 +00003106 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003107 Value *d = Nucleus::createBitCast(c, Short8::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003108 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003109
3110 Value *f = Nucleus::createBitCast(e, Int4::getType());
3111 storeValue(f);
3112}
3113
Ben Clayton713b8d32019-12-17 20:37:56 +00003114Int4::Int4(RValue<SByte4> cast)
3115 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003116{
3117 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3118 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3119
Ben Clayton713b8d32019-12-17 20:37:56 +00003120 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003121 Value *b = Nucleus::createBitCast(a, Byte16::getType());
3122 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3123
Ben Clayton713b8d32019-12-17 20:37:56 +00003124 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003125 Value *d = Nucleus::createBitCast(c, Short8::getType());
3126 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3127
3128 *this = As<Int4>(e) >> 24;
3129}
3130
Ben Clayton713b8d32019-12-17 20:37:56 +00003131Int4::Int4(RValue<Short4> cast)
3132 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003133{
Ben Clayton713b8d32019-12-17 20:37:56 +00003134 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003135 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3136
3137 *this = As<Int4>(c) >> 16;
3138}
3139
Ben Clayton713b8d32019-12-17 20:37:56 +00003140Int4::Int4(RValue<UShort4> cast)
3141 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003142{
Ben Clayton713b8d32019-12-17 20:37:56 +00003143 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003144 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
3145 Value *d = Nucleus::createBitCast(c, Int4::getType());
3146 storeValue(d);
3147}
3148
Ben Clayton713b8d32019-12-17 20:37:56 +00003149Int4::Int4(RValue<Int> rhs)
3150 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003151{
3152 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
3153
Ben Clayton713b8d32019-12-17 20:37:56 +00003154 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003155 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3156
3157 storeValue(replicate);
3158}
3159
3160RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3161{
3162 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003163 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003164 Int4 result;
3165 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3166 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3167 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3168 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003169
Nicolas Capens157ba262019-12-10 17:49:14 -05003170 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003171 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003172 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003173 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003174 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003175 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003176}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003177
Nicolas Capens157ba262019-12-10 17:49:14 -05003178RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3179{
3180 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003181 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003182 Int4 result;
3183 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3184 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3185 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3186 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003187
Nicolas Capens157ba262019-12-10 17:49:14 -05003188 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003189 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003190 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003191 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003192 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003193 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003194}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003195
Nicolas Capens157ba262019-12-10 17:49:14 -05003196RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3197{
3198 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
3199}
3200
3201RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3202{
3203 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
3204}
3205
3206RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3207{
3208 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
3209}
3210
3211RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3212{
3213 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
3214}
3215
3216RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3217{
3218 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
3219}
3220
3221RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3222{
3223 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
3224}
3225
3226RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3227{
3228 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3229 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3230 ::basicBlock->appendInst(cmp);
3231
3232 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3233 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3234 ::basicBlock->appendInst(select);
3235
3236 return RValue<Int4>(V(result));
3237}
3238
3239RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3240{
3241 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3242 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3243 ::basicBlock->appendInst(cmp);
3244
3245 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3246 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3247 ::basicBlock->appendInst(select);
3248
3249 return RValue<Int4>(V(result));
3250}
3251
3252RValue<Int4> RoundInt(RValue<Float4> cast)
3253{
3254 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003255 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003256 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3257 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003258 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003259 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003260 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003261 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003262 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003263 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3264 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3265 nearbyint->addArg(cast.value);
3266 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003267
3268 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003269 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003270}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003271
Nicolas Capens157ba262019-12-10 17:49:14 -05003272RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3273{
3274 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003275 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003276 Short8 result;
3277 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3278 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3279 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3280 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3281 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3282 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3283 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3284 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003285
Nicolas Capens157ba262019-12-10 17:49:14 -05003286 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003287 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003288 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003289 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003290 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003291 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003292 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3293 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3294 pack->addArg(x.value);
3295 pack->addArg(y.value);
3296 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003297
Nicolas Capens157ba262019-12-10 17:49:14 -05003298 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003299 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003300}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003301
Nicolas Capens157ba262019-12-10 17:49:14 -05003302RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3303{
3304 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003305 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003306 RValue<Int4> sx = As<Int4>(x);
3307 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003308
Nicolas Capens157ba262019-12-10 17:49:14 -05003309 RValue<Int4> sy = As<Int4>(y);
3310 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003311
Nicolas Capens157ba262019-12-10 17:49:14 -05003312 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003313 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003314 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003315 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003316 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003317 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003318 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3319 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3320 pack->addArg(x.value);
3321 pack->addArg(y.value);
3322 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003323
Nicolas Capens157ba262019-12-10 17:49:14 -05003324 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003325 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003326}
Nicolas Capens33438a62017-09-27 11:47:35 -04003327
Nicolas Capens157ba262019-12-10 17:49:14 -05003328RValue<Int> SignMask(RValue<Int4> x)
3329{
3330 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003332 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3333 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003334 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003335 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003336 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003337 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003338 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003339 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3340 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3341 movmsk->addArg(x.value);
3342 ::basicBlock->appendInst(movmsk);
3343
3344 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003345 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003346}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003347
Nicolas Capens157ba262019-12-10 17:49:14 -05003348Type *Int4::getType()
3349{
3350 return T(Ice::IceType_v4i32);
3351}
3352
Ben Clayton713b8d32019-12-17 20:37:56 +00003353UInt4::UInt4(RValue<Float4> cast)
3354 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003355{
3356 // Smallest positive value representable in UInt, but not in Int
3357 const unsigned int ustart = 0x80000000u;
3358 const float ustartf = float(ustart);
3359
3360 // Check if the value can be represented as an Int
3361 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3362 // If the value is too large, subtract ustart and re-add it after conversion.
3363 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003364 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003365 (~uiValue & Int4(cast));
3366 // If the value is negative, store 0, otherwise store the result of the conversion
3367 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3368}
3369
Ben Clayton713b8d32019-12-17 20:37:56 +00003370UInt4::UInt4(RValue<UInt> rhs)
3371 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003372{
3373 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3374
Ben Clayton713b8d32019-12-17 20:37:56 +00003375 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003376 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3377
3378 storeValue(replicate);
3379}
3380
3381RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3382{
3383 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003384 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003385 UInt4 result;
3386 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3387 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3388 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3389 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003390
Nicolas Capens157ba262019-12-10 17:49:14 -05003391 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003392 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003393 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003394 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003395 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003396 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003397}
Ben Clayton88816fa2019-05-15 17:08:14 +01003398
Nicolas Capens157ba262019-12-10 17:49:14 -05003399RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3400{
3401 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003402 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003403 UInt4 result;
3404 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3405 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3406 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3407 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003408
Nicolas Capens157ba262019-12-10 17:49:14 -05003409 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003410 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003411 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003412 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003413 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003414 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003415}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003416
Nicolas Capens157ba262019-12-10 17:49:14 -05003417RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3418{
3419 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3420}
3421
3422RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3423{
3424 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3425}
3426
3427RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3428{
3429 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3430}
3431
3432RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3433{
3434 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3435}
3436
3437RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3438{
3439 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3440}
3441
3442RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3443{
3444 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3445}
3446
3447RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3448{
3449 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3450 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3451 ::basicBlock->appendInst(cmp);
3452
3453 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3454 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3455 ::basicBlock->appendInst(select);
3456
3457 return RValue<UInt4>(V(result));
3458}
3459
3460RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3461{
3462 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3463 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3464 ::basicBlock->appendInst(cmp);
3465
3466 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3467 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3468 ::basicBlock->appendInst(select);
3469
3470 return RValue<UInt4>(V(result));
3471}
3472
3473Type *UInt4::getType()
3474{
3475 return T(Ice::IceType_v4i32);
3476}
3477
3478Type *Half::getType()
3479{
3480 return T(Ice::IceType_i16);
3481}
3482
3483RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3484{
3485 return 1.0f / x;
3486}
3487
3488RValue<Float> RcpSqrt_pp(RValue<Float> x)
3489{
3490 return Rcp_pp(Sqrt(x));
3491}
3492
3493RValue<Float> Sqrt(RValue<Float> x)
3494{
3495 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003496 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003497 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3498 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3499 sqrt->addArg(x.value);
3500 ::basicBlock->appendInst(sqrt);
3501
3502 return RValue<Float>(V(result));
3503}
3504
3505RValue<Float> Round(RValue<Float> x)
3506{
3507 return Float4(Round(Float4(x))).x;
3508}
3509
3510RValue<Float> Trunc(RValue<Float> x)
3511{
3512 return Float4(Trunc(Float4(x))).x;
3513}
3514
3515RValue<Float> Frac(RValue<Float> x)
3516{
3517 return Float4(Frac(Float4(x))).x;
3518}
3519
3520RValue<Float> Floor(RValue<Float> x)
3521{
3522 return Float4(Floor(Float4(x))).x;
3523}
3524
3525RValue<Float> Ceil(RValue<Float> x)
3526{
3527 return Float4(Ceil(Float4(x))).x;
3528}
3529
3530Type *Float::getType()
3531{
3532 return T(Ice::IceType_f32);
3533}
3534
3535Type *Float2::getType()
3536{
3537 return T(Type_v2f32);
3538}
3539
Ben Clayton713b8d32019-12-17 20:37:56 +00003540Float4::Float4(RValue<Float> rhs)
3541 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003542{
3543 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3544
Ben Clayton713b8d32019-12-17 20:37:56 +00003545 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003546 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3547
3548 storeValue(replicate);
3549}
3550
3551RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3552{
3553 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3554 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3555 ::basicBlock->appendInst(cmp);
3556
3557 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3558 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3559 ::basicBlock->appendInst(select);
3560
3561 return RValue<Float4>(V(result));
3562}
3563
3564RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3565{
3566 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3567 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3568 ::basicBlock->appendInst(cmp);
3569
3570 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3571 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3572 ::basicBlock->appendInst(select);
3573
3574 return RValue<Float4>(V(result));
3575}
3576
3577RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3578{
3579 return Float4(1.0f) / x;
3580}
3581
3582RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3583{
3584 return Rcp_pp(Sqrt(x));
3585}
3586
3587RValue<Float4> Sqrt(RValue<Float4> x)
3588{
3589 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003590 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003591 Float4 result;
3592 result.x = Sqrt(Float(Float4(x).x));
3593 result.y = Sqrt(Float(Float4(x).y));
3594 result.z = Sqrt(Float(Float4(x).z));
3595 result.w = Sqrt(Float(Float4(x).w));
3596
3597 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003598 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003599 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003600 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003601 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003602 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003603 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3604 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3605 sqrt->addArg(x.value);
3606 ::basicBlock->appendInst(sqrt);
3607
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003608 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003609 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003610}
Nicolas Capens157ba262019-12-10 17:49:14 -05003611
3612RValue<Int> SignMask(RValue<Float4> x)
3613{
3614 if(emulateIntrinsics || CPUID::ARM)
3615 {
3616 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3617 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3618 }
3619 else
3620 {
3621 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003622 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003623 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3624 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3625 movmsk->addArg(x.value);
3626 ::basicBlock->appendInst(movmsk);
3627
3628 return RValue<Int>(V(result));
3629 }
3630}
3631
3632RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3633{
3634 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3635}
3636
3637RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3638{
3639 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3640}
3641
3642RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3643{
3644 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3645}
3646
3647RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3648{
3649 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3650}
3651
3652RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3653{
3654 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3655}
3656
3657RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3658{
3659 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
3660}
3661
3662RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3663{
3664 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3665}
3666
3667RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3668{
3669 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3670}
3671
3672RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3673{
3674 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3675}
3676
3677RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3678{
3679 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3680}
3681
3682RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3683{
3684 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3685}
3686
3687RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3688{
3689 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3690}
3691
3692RValue<Float4> Round(RValue<Float4> x)
3693{
3694 if(emulateIntrinsics || CPUID::ARM)
3695 {
3696 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3697 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
3698 }
3699 else if(CPUID::SSE4_1)
3700 {
3701 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003702 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003703 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3704 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3705 round->addArg(x.value);
3706 round->addArg(::context->getConstantInt32(0));
3707 ::basicBlock->appendInst(round);
3708
3709 return RValue<Float4>(V(result));
3710 }
3711 else
3712 {
3713 return Float4(RoundInt(x));
3714 }
3715}
3716
3717RValue<Float4> Trunc(RValue<Float4> x)
3718{
3719 if(CPUID::SSE4_1)
3720 {
3721 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003722 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003723 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3724 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3725 round->addArg(x.value);
3726 round->addArg(::context->getConstantInt32(3));
3727 ::basicBlock->appendInst(round);
3728
3729 return RValue<Float4>(V(result));
3730 }
3731 else
3732 {
3733 return Float4(Int4(x));
3734 }
3735}
3736
3737RValue<Float4> Frac(RValue<Float4> x)
3738{
3739 Float4 frc;
3740
3741 if(CPUID::SSE4_1)
3742 {
3743 frc = x - Floor(x);
3744 }
3745 else
3746 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003747 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05003748
Ben Clayton713b8d32019-12-17 20:37:56 +00003749 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05003750 }
3751
3752 // x - floor(x) can be 1.0 for very small negative x.
3753 // Clamp against the value just below 1.0.
3754 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
3755}
3756
3757RValue<Float4> Floor(RValue<Float4> x)
3758{
3759 if(CPUID::SSE4_1)
3760 {
3761 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003762 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003763 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3764 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3765 round->addArg(x.value);
3766 round->addArg(::context->getConstantInt32(1));
3767 ::basicBlock->appendInst(round);
3768
3769 return RValue<Float4>(V(result));
3770 }
3771 else
3772 {
3773 return x - Frac(x);
3774 }
3775}
3776
3777RValue<Float4> Ceil(RValue<Float4> x)
3778{
3779 if(CPUID::SSE4_1)
3780 {
3781 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003782 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003783 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3784 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3785 round->addArg(x.value);
3786 round->addArg(::context->getConstantInt32(2));
3787 ::basicBlock->appendInst(round);
3788
3789 return RValue<Float4>(V(result));
3790 }
3791 else
3792 {
3793 return -Floor(-x);
3794 }
3795}
3796
3797Type *Float4::getType()
3798{
3799 return T(Ice::IceType_v4f32);
3800}
3801
3802RValue<Long> Ticks()
3803{
Ben Claytonce54c592020-02-07 11:30:51 +00003804 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05003805 return Long(Int(0));
3806}
3807
Ben Clayton713b8d32019-12-17 20:37:56 +00003808RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05003809{
Antonio Maiorano02a39532020-01-21 15:15:34 -05003810 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05003811}
3812
Ben Clayton713b8d32019-12-17 20:37:56 +00003813RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05003814{
Antonio Maiorano02a39532020-01-21 15:15:34 -05003815 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05003816}
3817
Ben Clayton713b8d32019-12-17 20:37:56 +00003818Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05003819{
3820 Ice::Variable *ret = nullptr;
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003821 if(retTy != nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05003822 {
3823 ret = ::function->makeVariable(T(retTy));
3824 }
3825 auto call = Ice::InstCall::create(::function, args.size(), ret, V(fptr.value), false);
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003826 for(auto arg : args)
Nicolas Capens157ba262019-12-10 17:49:14 -05003827 {
3828 call->addArg(V(arg));
3829 }
3830 ::basicBlock->appendInst(call);
3831 return V(ret);
3832}
3833
3834void Breakpoint()
3835{
Ben Clayton713b8d32019-12-17 20:37:56 +00003836 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003837 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3838 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3839 ::basicBlock->appendInst(trap);
3840}
3841
Ben Clayton713b8d32019-12-17 20:37:56 +00003842void Nucleus::createFence(std::memory_order memoryOrder)
3843{
Antonio Maiorano370cba52019-12-31 11:36:07 -05003844 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3845 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3846 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3847 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
3848 inst->addArg(order);
3849 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00003850}
Antonio Maiorano370cba52019-12-31 11:36:07 -05003851
Ben Clayton713b8d32019-12-17 20:37:56 +00003852Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
3853{
Ben Claytonce54c592020-02-07 11:30:51 +00003854 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00003855 return nullptr;
3856}
3857void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
3858{
Ben Claytonce54c592020-02-07 11:30:51 +00003859 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00003860}
Nicolas Capens157ba262019-12-10 17:49:14 -05003861
3862RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3863{
3864 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3865}
3866
3867RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3868{
3869 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3870}
3871
3872void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3873{
3874 return emulated::Scatter(base, val, offsets, mask, alignment);
3875}
3876
3877void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3878{
3879 return emulated::Scatter(base, val, offsets, mask, alignment);
3880}
3881
3882RValue<Float> Exp2(RValue<Float> x)
3883{
3884 return emulated::Exp2(x);
3885}
3886
3887RValue<Float> Log2(RValue<Float> x)
3888{
3889 return emulated::Log2(x);
3890}
3891
3892RValue<Float4> Sin(RValue<Float4> x)
3893{
3894 return emulated::Sin(x);
3895}
3896
3897RValue<Float4> Cos(RValue<Float4> x)
3898{
3899 return emulated::Cos(x);
3900}
3901
3902RValue<Float4> Tan(RValue<Float4> x)
3903{
3904 return emulated::Tan(x);
3905}
3906
3907RValue<Float4> Asin(RValue<Float4> x)
3908{
3909 return emulated::Asin(x);
3910}
3911
3912RValue<Float4> Acos(RValue<Float4> x)
3913{
3914 return emulated::Acos(x);
3915}
3916
3917RValue<Float4> Atan(RValue<Float4> x)
3918{
3919 return emulated::Atan(x);
3920}
3921
3922RValue<Float4> Sinh(RValue<Float4> x)
3923{
3924 return emulated::Sinh(x);
3925}
3926
3927RValue<Float4> Cosh(RValue<Float4> x)
3928{
3929 return emulated::Cosh(x);
3930}
3931
3932RValue<Float4> Tanh(RValue<Float4> x)
3933{
3934 return emulated::Tanh(x);
3935}
3936
3937RValue<Float4> Asinh(RValue<Float4> x)
3938{
3939 return emulated::Asinh(x);
3940}
3941
3942RValue<Float4> Acosh(RValue<Float4> x)
3943{
3944 return emulated::Acosh(x);
3945}
3946
3947RValue<Float4> Atanh(RValue<Float4> x)
3948{
3949 return emulated::Atanh(x);
3950}
3951
3952RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3953{
3954 return emulated::Atan2(x, y);
3955}
3956
3957RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3958{
3959 return emulated::Pow(x, y);
3960}
3961
3962RValue<Float4> Exp(RValue<Float4> x)
3963{
3964 return emulated::Exp(x);
3965}
3966
3967RValue<Float4> Log(RValue<Float4> x)
3968{
3969 return emulated::Log(x);
3970}
3971
3972RValue<Float4> Exp2(RValue<Float4> x)
3973{
3974 return emulated::Exp2(x);
3975}
3976
3977RValue<Float4> Log2(RValue<Float4> x)
3978{
3979 return emulated::Log2(x);
3980}
3981
3982RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
3983{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003984 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003985 {
Ben Claytonce54c592020-02-07 11:30:51 +00003986 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00003987 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003988 }
3989 else
3990 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003991 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05003992 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3993 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3994 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3995 ctlz->addArg(x.value);
3996 ::basicBlock->appendInst(ctlz);
3997
3998 return RValue<UInt>(V(result));
3999 }
4000}
4001
4002RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4003{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004004 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004005 {
Ben Claytonce54c592020-02-07 11:30:51 +00004006 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004007 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004008 }
4009 else
4010 {
4011 // TODO: implement vectorized version in Subzero
4012 UInt4 result;
4013 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4014 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4015 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4016 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4017 return result;
4018 }
4019}
4020
4021RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4022{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004023 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004024 {
Ben Claytonce54c592020-02-07 11:30:51 +00004025 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004026 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004027 }
4028 else
4029 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004030 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004031 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4032 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4033 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4034 ctlz->addArg(x.value);
4035 ::basicBlock->appendInst(ctlz);
4036
4037 return RValue<UInt>(V(result));
4038 }
4039}
4040
4041RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4042{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004043 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004044 {
Ben Claytonce54c592020-02-07 11:30:51 +00004045 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004046 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004047 }
4048 else
4049 {
4050 // TODO: implement vectorized version in Subzero
4051 UInt4 result;
4052 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4053 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4054 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4055 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4056 return result;
4057 }
4058}
4059
Antonio Maiorano370cba52019-12-31 11:36:07 -05004060RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4061{
4062 return emulated::MinAtomic(x, y, memoryOrder);
4063}
4064
4065RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4066{
4067 return emulated::MinAtomic(x, y, memoryOrder);
4068}
4069
4070RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4071{
4072 return emulated::MaxAtomic(x, y, memoryOrder);
4073}
4074
4075RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4076{
4077 return emulated::MaxAtomic(x, y, memoryOrder);
4078}
4079
Nicolas Capens157ba262019-12-10 17:49:14 -05004080void EmitDebugLocation() {}
Ben Clayton713b8d32019-12-17 20:37:56 +00004081void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004082void FlushDebug() {}
4083
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004084namespace {
4085namespace coro {
4086
4087using FiberHandle = void *;
4088
4089// Instance data per generated coroutine
4090// This is the "handle" type used for Coroutine functions
4091// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4092struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004093{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004094 FiberHandle mainFiber{};
4095 FiberHandle routineFiber{};
4096 bool convertedFiber = false;
4097
4098 // Variables used by coroutines
4099 bool done = false;
4100 void *promisePtr = nullptr;
4101};
4102
4103CoroutineData *createCoroutineData()
4104{
4105 return new CoroutineData{};
4106}
4107
4108void destroyCoroutineData(CoroutineData *coroData)
4109{
4110 delete coroData;
4111}
4112
4113void convertThreadToMainFiber(Nucleus::CoroutineHandle handle)
4114{
4115#if defined(_WIN32)
4116 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4117
4118 coroData->mainFiber = ::ConvertThreadToFiber(nullptr);
4119
4120 if(coroData->mainFiber)
4121 {
4122 coroData->convertedFiber = true;
4123 }
4124 else
4125 {
4126 // We're probably already on a fiber, so just grab it and remember that we didn't
4127 // convert it, so not to convert back to thread.
4128 coroData->mainFiber = GetCurrentFiber();
4129 coroData->convertedFiber = false;
4130 }
4131 ASSERT(coroData->mainFiber);
4132#else
Ben Claytonce54c592020-02-07 11:30:51 +00004133 UNIMPLEMENTED_NO_BUG("convertThreadToMainFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004134#endif
4135}
4136
4137void convertMainFiberToThread(Nucleus::CoroutineHandle handle)
4138{
4139#if defined(_WIN32)
4140 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4141
4142 ASSERT(coroData->mainFiber);
4143
4144 if(coroData->convertedFiber)
4145 {
4146 ::ConvertFiberToThread();
4147 coroData->mainFiber = nullptr;
4148 }
4149#else
Ben Claytonce54c592020-02-07 11:30:51 +00004150 UNIMPLEMENTED_NO_BUG("convertMainFiberToThread not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004151#endif
4152}
4153using FiberFunc = std::function<void()>;
4154
4155void createRoutineFiber(Nucleus::CoroutineHandle handle, FiberFunc *fiberFunc)
4156{
4157#if defined(_WIN32)
4158 struct Invoker
4159 {
4160 FiberFunc func;
4161
4162 static VOID __stdcall fiberEntry(LPVOID lpParameter)
4163 {
4164 auto *func = reinterpret_cast<FiberFunc *>(lpParameter);
4165 (*func)();
4166 }
4167 };
4168
4169 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4170
4171 constexpr SIZE_T StackSize = 2 * 1024 * 1024;
4172 coroData->routineFiber = ::CreateFiber(StackSize, &Invoker::fiberEntry, fiberFunc);
4173 ASSERT(coroData->routineFiber);
4174#else
Ben Claytonce54c592020-02-07 11:30:51 +00004175 UNIMPLEMENTED_NO_BUG("createRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004176#endif
4177}
4178
4179void deleteRoutineFiber(Nucleus::CoroutineHandle handle)
4180{
4181#if defined(_WIN32)
4182 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4183 ASSERT(coroData->routineFiber);
4184 ::DeleteFiber(coroData->routineFiber);
4185 coroData->routineFiber = nullptr;
4186#else
Ben Claytonce54c592020-02-07 11:30:51 +00004187 UNIMPLEMENTED_NO_BUG("deleteRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004188#endif
4189}
4190
4191void switchToMainFiber(Nucleus::CoroutineHandle handle)
4192{
4193#if defined(_WIN32)
4194 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4195
4196 // Win32
4197 ASSERT(coroData->mainFiber);
4198 ::SwitchToFiber(coroData->mainFiber);
4199#else
Ben Claytonce54c592020-02-07 11:30:51 +00004200 UNIMPLEMENTED_NO_BUG("switchToMainFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004201#endif
4202}
4203
4204void switchToRoutineFiber(Nucleus::CoroutineHandle handle)
4205{
4206#if defined(_WIN32)
4207 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4208
4209 // Win32
4210 ASSERT(coroData->routineFiber);
4211 ::SwitchToFiber(coroData->routineFiber);
4212#else
Ben Claytonce54c592020-02-07 11:30:51 +00004213 UNIMPLEMENTED_NO_BUG("switchToRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004214#endif
4215}
4216
4217namespace detail {
4218thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4219} // namespace detail
4220
4221void setHandleParam(Nucleus::CoroutineHandle handle)
4222{
4223 ASSERT(!detail::coroHandle);
4224 detail::coroHandle = handle;
4225}
4226
4227Nucleus::CoroutineHandle getHandleParam()
4228{
4229 ASSERT(detail::coroHandle);
4230 auto handle = detail::coroHandle;
4231 detail::coroHandle = {};
4232 return handle;
4233}
4234
4235void setDone(Nucleus::CoroutineHandle handle)
4236{
4237 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4238 ASSERT(!coroData->done); // Should be called once
4239 coroData->done = true;
4240}
4241
4242bool isDone(Nucleus::CoroutineHandle handle)
4243{
4244 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4245 return coroData->done;
4246}
4247
4248void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4249{
4250 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4251 coroData->promisePtr = promisePtr;
4252}
4253
4254void *getPromisePtr(Nucleus::CoroutineHandle handle)
4255{
4256 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4257 return coroData->promisePtr;
4258}
4259
4260} // namespace coro
4261} // namespace
4262
4263// Used to generate coroutines.
4264// Lifetime: from yield to acquireCoroutine
4265class CoroutineGenerator
4266{
4267public:
4268 CoroutineGenerator()
4269 {
4270 }
4271
4272 // Inserts instructions at the top of the current function to make it a coroutine.
4273 void generateCoroutineBegin()
4274 {
4275 // Begin building the main coroutine_begin() function.
4276 // We insert these instructions at the top of the entry node,
4277 // before existing reactor-generated instructions.
4278
4279 // CoroutineHandle coroutine_begin(<Arguments>)
4280 // {
4281 // this->handle = coro::getHandleParam();
4282 //
4283 // YieldType promise;
4284 // coro::setPromisePtr(handle, &promise); // For await
4285 //
4286 // ... <REACTOR CODE> ...
4287 //
4288
4289 // Save original entry block and current block, and create a new entry block and make it current.
4290 // This new block will be used to inject code above the begin routine's existing code. We make
4291 // this block branch to the original entry block as the last instruction.
4292 auto origEntryBB = ::function->getEntryNode();
4293 auto origCurrBB = ::basicBlock;
4294 auto newBB = ::function->makeNode();
4295 sz::replaceEntryNode(::function, newBB);
4296 ::basicBlock = newBB;
4297
4298 // this->handle = coro::getHandleParam();
4299 this->handle = sz::Call(::function, ::basicBlock, coro::getHandleParam);
4300
4301 // YieldType promise;
4302 // coro::setPromisePtr(handle, &promise); // For await
4303 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
4304 sz::Call(::function, ::basicBlock, coro::setPromisePtr, this->handle, this->promise);
4305
4306 // Branch to original entry block
4307 auto br = Ice::InstBr::create(::function, origEntryBB);
4308 ::basicBlock->appendInst(br);
4309
4310 // Restore current block for future instructions
4311 ::basicBlock = origCurrBB;
4312 }
4313
4314 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4315 void generateYield(Value *val)
4316 {
4317 // ... <REACTOR CODE> ...
4318 //
4319 // promise = val;
4320 // coro::switchToMainFiber(handle);
4321 //
4322 // ... <REACTOR CODE> ...
4323
4324 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
4325 sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
4326 }
4327
4328 // Adds instructions at the end of the current main coroutine function to end the coroutine.
4329 void generateCoroutineEnd()
4330 {
4331 // ... <REACTOR CODE> ...
4332 //
4333 // coro::setDone(handle);
4334 // coro::switchToMainFiber();
4335 // // Unreachable
4336 // }
4337 //
4338
4339 sz::Call(::function, ::basicBlock, coro::setDone, this->handle);
4340
4341 // A Win32 Fiber function must not end, otherwise it tears down the thread it's running on.
4342 // So we add code to switch back to the main thread.
4343 sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
4344 }
4345
4346 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4347
4348 // Generates the await function for the current coroutine.
4349 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4350 static FunctionUniquePtr generateAwaitFunction()
4351 {
4352 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4353 // {
4354 // if (coro::isDone())
4355 // {
4356 // return false;
4357 // }
4358 // else // resume
4359 // {
4360 // YieldType* promise = coro::getPromisePtr(handle);
4361 // *out = *promise;
4362 // coro::switchToRoutineFiber(handle);
4363 // return true;
4364 // }
4365 // }
4366
4367 // Subzero doesn't support bool types (IceType_i1) as return type
4368 const Ice::Type ReturnType = Ice::IceType_i32;
4369 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4370 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4371
4372 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4373 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4374
4375 Ice::Variable *handle = awaitFunc->getArgs()[0];
4376 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4377
4378 auto doneBlock = awaitFunc->makeNode();
4379 {
4380 // return false;
4381 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4382 doneBlock->appendInst(ret);
4383 }
4384
4385 auto resumeBlock = awaitFunc->makeNode();
4386 {
4387 // YieldType* promise = coro::getPromisePtr(handle);
4388 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4389
4390 // *out = *promise;
4391 // Load promise value
4392 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4393 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4394 resumeBlock->appendInst(load);
4395 // Then store it in output param
4396 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4397 resumeBlock->appendInst(store);
4398
4399 // coro::switchToRoutineFiber(handle);
4400 sz::Call(awaitFunc, resumeBlock, coro::switchToRoutineFiber, handle);
4401
4402 // return true;
4403 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4404 resumeBlock->appendInst(ret);
4405 }
4406
4407 // if (coro::isDone())
4408 // {
4409 // <doneBlock>
4410 // }
4411 // else // resume
4412 // {
4413 // <resumeBlock>
4414 // }
4415 Ice::CfgNode *bb = awaitFunc->getEntryNode();
4416 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone);
4417 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4418 bb->appendInst(br);
4419
4420 return FunctionUniquePtr{ awaitFunc };
4421 }
4422
4423 // Generates the destroy function for the current coroutine.
4424 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4425 static FunctionUniquePtr generateDestroyFunction()
4426 {
4427 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4428 // {
4429 // coro::convertMainFiberToThread(coroData);
4430 // coro::deleteRoutineFiber(handle);
4431 // coro::destroyCoroutineData(handle);
4432 // return;
4433 // }
4434
4435 const Ice::Type ReturnType = Ice::IceType_void;
4436 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4437
4438 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4439 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4440
4441 Ice::Variable *handle = destroyFunc->getArgs()[0];
4442
4443 auto *bb = destroyFunc->getEntryNode();
4444
4445 // coro::convertMainFiberToThread(coroData);
4446 sz::Call(destroyFunc, bb, coro::convertMainFiberToThread, handle);
4447
4448 // coro::deleteRoutineFiber(handle);
4449 sz::Call(destroyFunc, bb, coro::deleteRoutineFiber, handle);
4450
4451 // coro::destroyCoroutineData(handle);
4452 sz::Call(destroyFunc, bb, coro::destroyCoroutineData, handle);
4453
4454 // return;
4455 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4456 bb->appendInst(ret);
4457
4458 return FunctionUniquePtr{ destroyFunc };
4459 }
4460
4461private:
4462 Ice::Variable *handle{};
4463 Ice::Variable *promise{};
4464};
4465
4466static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4467{
4468 // This doubles up as our coroutine handle
4469 auto coroData = coro::createCoroutineData();
4470
4471 // Convert current thread to a fiber so we can create new fibers and switch to them
4472 coro::convertThreadToMainFiber(coroData);
4473
4474 coro::FiberFunc fiberFunc = [&]() {
4475 // Store handle in TLS so that the coroutine can grab it right away, before
4476 // any fiber switch occurs.
4477 coro::setHandleParam(coroData);
4478
4479 // Invoke the begin function in the context of the routine fiber
4480 beginFunc();
4481
4482 // Either it yielded, or finished. In either case, we switch back to the main fiber.
4483 // We don't ever return from this function, or the current thread will be destroyed.
4484 coro::switchToMainFiber(coroData);
4485 };
4486
4487 coro::createRoutineFiber(coroData, &fiberFunc);
4488
4489 // Fiber will now start running, executing the saved beginFunc
4490 coro::switchToRoutineFiber(coroData);
4491
4492 return coroData;
4493}
4494
4495void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4496{
4497 // Start by creating a regular function
4498 createFunction(yieldType, params);
4499
4500 // Save in case yield() is called
4501 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4502 ::coroYieldType = yieldType;
4503}
4504
4505void Nucleus::yield(Value *val)
4506{
4507 Variable::materializeAll();
4508
4509 // On first yield, we start generating coroutine functions
4510 if(!::coroGen)
4511 {
4512 ::coroGen = std::make_shared<CoroutineGenerator>();
4513 ::coroGen->generateCoroutineBegin();
4514 }
4515
4516 ASSERT(::coroGen);
4517 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004518}
4519
Ben Clayton713b8d32019-12-17 20:37:56 +00004520static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4521{
4522 return false;
4523}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004524
4525static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4526{
4527}
Nicolas Capens157ba262019-12-10 17:49:14 -05004528
4529std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4530{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004531 if(::coroGen)
4532 {
4533 // Finish generating coroutine functions
4534 {
4535 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4536 ::coroGen->generateCoroutineEnd();
4537 createRetVoidIfNoRet();
4538 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004539
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004540 auto awaitFunc = ::coroGen->generateAwaitFunction();
4541 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004542
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004543 // At this point, we no longer need the CoroutineGenerator.
4544 ::coroGen.reset();
4545 ::coroYieldType = nullptr;
4546
4547 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4548 { name, "await", "destroy" },
4549 cfgEdit);
4550
4551 return routine;
4552 }
4553 else
4554 {
4555 {
4556 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4557 createRetVoidIfNoRet();
4558 }
4559
4560 ::coroYieldType = nullptr;
4561
4562 // Not an actual coroutine (no yields), so return stubs for await and destroy
4563 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4564
4565 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4566 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4567 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4568 return routine;
4569 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004570}
4571
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004572Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004573{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004574 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4575
4576 if(isCoroutine)
4577 {
4578 return rr::invokeCoroutineBegin(func);
4579 }
4580 else
4581 {
4582 // For regular routines, just invoke the begin func directly
4583 return func();
4584 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004585}
Nicolas Capens157ba262019-12-10 17:49:14 -05004586
4587} // namespace rr