blob: 911ec6754a8bf9bc984a8c940efa1416f9a1ce37 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Antonio Maiorano62427e02020-02-13 09:18:05 -050017#include "Print.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000018#include "Reactor.hpp"
Antonio Maioranoaae33732020-02-14 14:52:34 -050019#include "ReactorDebugInfo.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040020
Nicolas Capens1a3ce872018-10-10 10:42:36 -040021#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000022#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040023
Nicolas Capens598f8d82016-09-26 15:09:10 -040024#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040025#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000027#include "src/IceELFStreamer.h"
28#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040029#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031
Ben Clayton713b8d32019-12-17 20:37:56 +000032#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040033#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040035
36#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000037# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040038#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040039
Nicolas Capensbd65da92017-01-05 16:31:06 -050040#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000041# ifndef WIN32_LEAN_AND_MEAN
42# define WIN32_LEAN_AND_MEAN
43# endif // !WIN32_LEAN_AND_MEAN
44# ifndef NOMINMAX
45# define NOMINMAX
46# endif // !NOMINMAX
47# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050048#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040049
Ben Clayton683bad82020-02-10 23:57:09 +000050#include <array>
Nicolas Capens598f8d82016-09-26 15:09:10 -040051#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000052#include <limits>
53#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040054
Antonio Maiorano02a39532020-01-21 15:15:34 -050055// Subzero utility functions
56// These functions only accept and return Subzero (Ice) types, and do not access any globals.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050057namespace {
Antonio Maiorano02a39532020-01-21 15:15:34 -050058namespace sz {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -050059void replaceEntryNode(Ice::Cfg *function, Ice::CfgNode *newEntryNode)
60{
61 ASSERT_MSG(function->getEntryNode() != nullptr, "Function should have an entry node");
62
63 if(function->getEntryNode() == newEntryNode)
64 {
65 return;
66 }
67
68 // Make this the new entry node
69 function->setEntryNode(newEntryNode);
70
71 // Reorder nodes so that new entry block comes first. This is required
72 // by Cfg::renumberInstructions, which expects the first node in the list
73 // to be the entry node.
74 {
75 auto nodes = function->getNodes();
76
77 // TODO(amaiorano): Fast path if newEntryNode is last? Can avoid linear search.
78
79 auto iter = std::find(nodes.begin(), nodes.end(), newEntryNode);
80 ASSERT_MSG(iter != nodes.end(), "New node should be in the function's node list");
81
82 nodes.erase(iter);
83 nodes.insert(nodes.begin(), newEntryNode);
84
85 // swapNodes replaces its nodes with the input one, and renumbers them,
86 // so our new entry node will be 0, and the previous will be 1.
87 function->swapNodes(nodes);
88 }
89}
90
91Ice::Cfg *createFunction(Ice::GlobalContext *context, Ice::Type returnType, const std::vector<Ice::Type> &paramTypes)
92{
93 uint32_t sequenceNumber = 0;
94 auto function = Ice::Cfg::create(context, sequenceNumber).release();
95
96 Ice::CfgLocalAllocatorScope allocScope{ function };
97
98 for(auto type : paramTypes)
99 {
100 Ice::Variable *arg = function->makeVariable(type);
101 function->addArg(arg);
102 }
103
104 Ice::CfgNode *node = function->makeNode();
105 function->setEntryNode(node);
106
107 return function;
108}
109
110Ice::Type getPointerType(Ice::Type elementType)
111{
112 if(sizeof(void *) == 8)
113 {
114 return Ice::IceType_i64;
115 }
116 else
117 {
118 return Ice::IceType_i32;
119 }
120}
121
122Ice::Variable *allocateStackVariable(Ice::Cfg *function, Ice::Type type, int arraySize = 0)
123{
124 int typeSize = Ice::typeWidthInBytes(type);
125 int totalSize = typeSize * (arraySize ? arraySize : 1);
126
127 auto bytes = Ice::ConstantInteger32::create(function->getContext(), Ice::IceType_i32, totalSize);
128 auto address = function->makeVariable(getPointerType(type));
129 auto alloca = Ice::InstAlloca::create(function, address, bytes, typeSize);
130 function->getEntryNode()->getInsts().push_front(alloca);
131
132 return address;
133}
134
135Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500136{
137 if(sizeof(void *) == 8)
138 {
139 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
140 }
141 else
142 {
143 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
144 }
145}
146
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500147// Wrapper for calls on C functions with Ice types
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500148Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Type retTy, void const *fptr, const std::vector<Ice::Operand *> &iceArgs, bool isVariadic)
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500149{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500150 // Subzero doesn't support boolean return values. Replace with an i32.
151 if(retTy == Ice::IceType_i1)
152 {
153 retTy = Ice::IceType_i32;
154 }
155
156 Ice::Variable *ret = nullptr;
157 if(retTy != Ice::IceType_void)
158 {
159 ret = function->makeVariable(retTy);
160 }
161
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500162 auto call = Ice::InstCall::create(function, iceArgs.size(), ret, getConstantPointer(function->getContext(), fptr), false, false, isVariadic);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500163 for(auto arg : iceArgs)
164 {
165 call->addArg(arg);
166 }
167
168 basicBlock->appendInst(call);
169 return ret;
170}
171
Antonio Maiorano62427e02020-02-13 09:18:05 -0500172// Wrapper for calls on C functions with Ice types
173template<typename Return, typename... CArgs, typename... RArgs>
174Ice::Variable *Call(Ice::Cfg *function, Ice::CfgNode *basicBlock, Return(fptr)(CArgs...), RArgs &&... args)
175{
176 Ice::Type retTy = T(rr::CToReactorT<Return>::getType());
177 std::vector<Ice::Operand *> iceArgs{ std::forward<RArgs>(args)... };
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500178 return Call(function, basicBlock, retTy, reinterpret_cast<void const *>(fptr), iceArgs, false);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500179}
180
Antonio Maiorano02a39532020-01-21 15:15:34 -0500181// Returns a non-const variable copy of const v
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500182Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500183{
184 Ice::Variable *result = function->makeVariable(v->getType());
185 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
186 basicBlock->appendInst(cast);
187 return result;
188}
189
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500190Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
Antonio Maiorano02a39532020-01-21 15:15:34 -0500191{
192 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
193 // absolute address. We circumvent this by casting to a non-const variable, and loading
194 // from that.
195 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
196 {
197 ptr = sz::createUnconstCast(function, basicBlock, cptr);
198 }
199
200 Ice::Variable *result = function->makeVariable(type);
201 auto load = Ice::InstLoad::create(function, result, ptr, align);
202 basicBlock->appendInst(load);
203
204 return result;
205}
206
207} // namespace sz
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500208} // namespace
209
Ben Clayton713b8d32019-12-17 20:37:56 +0000210namespace rr {
211class ELFMemoryStreamer;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500212class CoroutineGenerator;
213} // namespace rr
Nicolas Capens157ba262019-12-10 17:49:14 -0500214
215namespace {
216
217// Default configuration settings. Must be accessed under mutex lock.
218std::mutex defaultConfigLock;
219rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000220{
Nicolas Capens157ba262019-12-10 17:49:14 -0500221 // This uses a static in a function to avoid the cost of a global static
222 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
223 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000224 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500225 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000226}
227
Nicolas Capens157ba262019-12-10 17:49:14 -0500228Ice::GlobalContext *context = nullptr;
229Ice::Cfg *function = nullptr;
230Ice::CfgNode *basicBlock = nullptr;
231Ice::CfgLocalAllocatorScope *allocator = nullptr;
232rr::ELFMemoryStreamer *routine = nullptr;
233
234std::mutex codegenMutex;
235
236Ice::ELFFileStreamer *elfFile = nullptr;
237Ice::Fdstream *out = nullptr;
238
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500239// Coroutine globals
240rr::Type *coroYieldType = nullptr;
241std::shared_ptr<rr::CoroutineGenerator> coroGen;
242
Nicolas Capens157ba262019-12-10 17:49:14 -0500243} // Anonymous namespace
244
245namespace {
246
247#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000248# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500249#endif
250
Ben Clayton713b8d32019-12-17 20:37:56 +0000251#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
252# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500253#endif
254
Antonio Maiorano370cba52019-12-31 11:36:07 -0500255Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400256{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500257 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100258 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500259 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000260 case rr::Optimization::Level::None: return Ice::Opt_m1;
261 case rr::Optimization::Level::Less: return Ice::Opt_m1;
262 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500263 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
264 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100265 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500266 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400267}
268
Antonio Maiorano370cba52019-12-31 11:36:07 -0500269Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
270{
271 switch(memoryOrder)
272 {
273 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
274 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
275 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
276 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
277 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
278 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
279 }
280 return Ice::Intrinsics::MemoryOrderInvalid;
281}
282
Nicolas Capens157ba262019-12-10 17:49:14 -0500283class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500284{
Nicolas Capens157ba262019-12-10 17:49:14 -0500285public:
286 const static bool ARM;
287 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400288
Nicolas Capens157ba262019-12-10 17:49:14 -0500289private:
290 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100291 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000292#if defined(__i386__) || defined(__x86_64__)
293# if defined(_WIN32)
294 __cpuid(registers, info);
295# else
296 __asm volatile("cpuid"
297 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
298 : "a"(info));
299# endif
300#else
301 registers[0] = 0;
302 registers[1] = 0;
303 registers[2] = 0;
304 registers[3] = 0;
305#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100306 }
307
Nicolas Capens157ba262019-12-10 17:49:14 -0500308 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500309 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000310#if defined(__arm__) || defined(__aarch64__)
311 return true;
312#elif defined(__i386__) || defined(__x86_64__)
313 return false;
314#elif defined(__mips__)
315 return false;
316#else
317# error "Unknown architecture"
318#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500319 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500320
Nicolas Capens157ba262019-12-10 17:49:14 -0500321 static bool detectSSE4_1()
322 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000323#if defined(__i386__) || defined(__x86_64__)
324 int registers[4];
325 cpuid(registers, 1);
326 return (registers[2] & 0x00080000) != 0;
327#else
328 return false;
329#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500330 }
331};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500332
Nicolas Capens157ba262019-12-10 17:49:14 -0500333const bool CPUID::ARM = CPUID::detectARM();
334const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
335const bool emulateIntrinsics = false;
336const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400337
Nicolas Capens157ba262019-12-10 17:49:14 -0500338constexpr bool subzeroDumpEnabled = false;
339constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500340
341#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500342static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
343static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500344#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500345
346} // anonymous namespace
347
348namespace rr {
349
Antonio Maioranoab210f92019-12-13 16:26:24 -0500350std::string BackendName()
351{
352 return "Subzero";
353}
354
Ben Clayton713b8d32019-12-17 20:37:56 +0000355const Capabilities Caps = {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500356#if defined(_WIN32)
357 true, // CoroutinesSupported
358#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000359 false, // CoroutinesSupported
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500360#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500361};
362
363enum EmulatedType
364{
365 EmulatedShift = 16,
366 EmulatedV2 = 2 << EmulatedShift,
367 EmulatedV4 = 4 << EmulatedShift,
368 EmulatedV8 = 8 << EmulatedShift,
369 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
370
371 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
372 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
373 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000374 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
375 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500376 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
377};
378
Ben Clayton713b8d32019-12-17 20:37:56 +0000379class Value : public Ice::Operand
380{};
381class SwitchCases : public Ice::InstSwitch
382{};
383class BasicBlock : public Ice::CfgNode
384{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500385
386Ice::Type T(Type *t)
387{
388 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
389 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500390}
391
Nicolas Capens157ba262019-12-10 17:49:14 -0500392Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400393{
Ben Clayton713b8d32019-12-17 20:37:56 +0000394 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500395}
396
397Type *T(EmulatedType t)
398{
Ben Clayton713b8d32019-12-17 20:37:56 +0000399 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500400}
401
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500402std::vector<Ice::Type> T(const std::vector<Type *> &types)
403{
404 std::vector<Ice::Type> result;
405 result.reserve(types.size());
406 for(auto &t : types)
407 {
408 result.push_back(T(t));
409 }
410 return result;
411}
412
Nicolas Capens157ba262019-12-10 17:49:14 -0500413Value *V(Ice::Operand *v)
414{
Ben Clayton713b8d32019-12-17 20:37:56 +0000415 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500416}
417
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500418Ice::Operand *V(Value *v)
419{
Antonio Maiorano38c065d2020-01-30 09:51:37 -0500420 return reinterpret_cast<Ice::Operand *>(v);
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500421}
422
Antonio Maiorano62427e02020-02-13 09:18:05 -0500423std::vector<Ice::Operand *> V(const std::vector<Value *> &values)
424{
425 std::vector<Ice::Operand *> result;
426 result.reserve(values.size());
427 for(auto &v : values)
428 {
429 result.push_back(V(v));
430 }
431 return result;
432}
433
Nicolas Capens157ba262019-12-10 17:49:14 -0500434BasicBlock *B(Ice::CfgNode *b)
435{
Ben Clayton713b8d32019-12-17 20:37:56 +0000436 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500437}
438
439static size_t typeSize(Type *type)
440{
441 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400442 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500443 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800444 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000445 case Type_v2i32: return 8;
446 case Type_v4i16: return 8;
447 case Type_v2i16: return 4;
448 case Type_v8i8: return 8;
449 case Type_v4i8: return 4;
450 case Type_v2f32: return 8;
451 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500452 }
453 }
454
455 return Ice::typeWidthInBytes(T(type));
456}
457
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500458static void createRetVoidIfNoRet()
459{
460 if(::basicBlock->getInsts().empty() || ::basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
461 {
462 Nucleus::createRetVoid();
463 }
464}
465
Ben Clayton713b8d32019-12-17 20:37:56 +0000466using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
467using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500468
469inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
470{
Ben Clayton713b8d32019-12-17 20:37:56 +0000471 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500472}
473
474inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
475{
476 return &sectionHeader(elfHeader)[index];
477}
478
479static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
480{
481 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
482
483 uint32_t index = relocation.getSymbol();
484 int table = relocationTable.sh_link;
485 void *symbolValue = nullptr;
486
487 if(index != SHN_UNDEF)
488 {
489 if(table == SHN_UNDEF) return nullptr;
490 const SectionHeader *symbolTable = elfSection(elfHeader, table);
491
492 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
493 if(index >= symtab_entries)
494 {
495 ASSERT(index < symtab_entries && "Symbol Index out of range");
496 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800497 }
498
Nicolas Capens157ba262019-12-10 17:49:14 -0500499 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000500 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500501 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800502
Nicolas Capens157ba262019-12-10 17:49:14 -0500503 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400504 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500505 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000506 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400507 }
508 else
509 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500510 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400511 }
Nicolas Capens66478362016-10-13 15:36:36 -0400512 }
513
Nicolas Capens157ba262019-12-10 17:49:14 -0500514 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000515 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500516
517 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400518 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400519 switch(relocation.getType())
520 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000521 case R_ARM_NONE:
522 // No relocation
523 break;
524 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500525 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000526 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500527 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
528 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
529 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400530 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000531 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500532 {
533 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
534 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
535 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400536 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000537 default:
538 ASSERT(false && "Unsupported relocation type");
539 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400540 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500541 }
542 else
543 {
544 switch(relocation.getType())
545 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000546 case R_386_NONE:
547 // No relocation
548 break;
549 case R_386_32:
550 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
551 break;
552 case R_386_PC32:
553 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
554 break;
555 default:
556 ASSERT(false && "Unsupported relocation type");
557 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500558 }
Nicolas Capens66478362016-10-13 15:36:36 -0400559 }
560
Nicolas Capens157ba262019-12-10 17:49:14 -0500561 return symbolValue;
562}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400563
Nicolas Capens157ba262019-12-10 17:49:14 -0500564static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
565{
566 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
567
568 uint32_t index = relocation.getSymbol();
569 int table = relocationTable.sh_link;
570 void *symbolValue = nullptr;
571
572 if(index != SHN_UNDEF)
573 {
574 if(table == SHN_UNDEF) return nullptr;
575 const SectionHeader *symbolTable = elfSection(elfHeader, table);
576
577 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
578 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400579 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500580 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400581 return nullptr;
582 }
583
Nicolas Capens157ba262019-12-10 17:49:14 -0500584 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000585 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500586 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400587
Nicolas Capens157ba262019-12-10 17:49:14 -0500588 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400589 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500590 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000591 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500592 }
593 else
594 {
595 return nullptr;
596 }
597 }
Nicolas Capens66478362016-10-13 15:36:36 -0400598
Nicolas Capens157ba262019-12-10 17:49:14 -0500599 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000600 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
601 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400602
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 switch(relocation.getType())
604 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000605 case R_X86_64_NONE:
606 // No relocation
607 break;
608 case R_X86_64_64:
609 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
610 break;
611 case R_X86_64_PC32:
612 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
613 break;
614 case R_X86_64_32S:
615 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
616 break;
617 default:
618 ASSERT(false && "Unsupported relocation type");
619 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500620 }
621
622 return symbolValue;
623}
624
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500625void *loadImage(uint8_t *const elfImage, size_t &codeSize, const char *functionName = nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -0500626{
Ben Clayton713b8d32019-12-17 20:37:56 +0000627 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500628
629 if(!elfHeader->checkMagic())
630 {
631 return nullptr;
632 }
633
634 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000635 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
636#if defined(__i386__)
637 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
638#elif defined(__x86_64__)
639 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
640#elif defined(__arm__)
641 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
642#elif defined(__aarch64__)
643 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
644#elif defined(__mips__)
645 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
646#else
647# error "Unsupported platform"
648#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500649
Ben Clayton713b8d32019-12-17 20:37:56 +0000650 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500651 void *entry = nullptr;
652
653 for(int i = 0; i < elfHeader->e_shnum; i++)
654 {
655 if(sectionHeader[i].sh_type == SHT_PROGBITS)
656 {
657 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
658 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500659 auto getCurrSectionName = [&]() {
660 auto sectionNameOffset = sectionHeader[elfHeader->e_shstrndx].sh_offset + sectionHeader[i].sh_name;
661 return reinterpret_cast<const char *>(elfImage + sectionNameOffset);
662 };
663 if(functionName && strstr(getCurrSectionName(), functionName) == nullptr)
664 {
665 continue;
666 }
667
Nicolas Capens157ba262019-12-10 17:49:14 -0500668 entry = elfImage + sectionHeader[i].sh_offset;
669 codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400670 }
671 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500672 else if(sectionHeader[i].sh_type == SHT_REL)
673 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000674 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400675
Nicolas Capens157ba262019-12-10 17:49:14 -0500676 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
677 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000678 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500679 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
680 }
681 }
682 else if(sectionHeader[i].sh_type == SHT_RELA)
683 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000684 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500685
686 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
687 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000688 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500689 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
690 }
691 }
692 }
693
694 return entry;
695}
696
697template<typename T>
698struct ExecutableAllocator
699{
700 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000701 template<class U>
702 ExecutableAllocator(const ExecutableAllocator<U> &other)
703 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500704
705 using value_type = T;
706 using size_type = std::size_t;
707
708 T *allocate(size_type n)
709 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000710 return (T *)allocateMemoryPages(
711 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500712 }
713
714 void deallocate(T *p, size_type n)
715 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800716 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500717 }
718};
719
720class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
721{
722 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
723 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
724
725public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000726 ELFMemoryStreamer()
727 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500728 {
729 position = 0;
730 buffer.reserve(0x1000);
731 }
732
733 ~ELFMemoryStreamer() override
734 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500735 }
736
737 void write8(uint8_t Value) override
738 {
739 if(position == (uint64_t)buffer.size())
740 {
741 buffer.push_back(Value);
742 position++;
743 }
744 else if(position < (uint64_t)buffer.size())
745 {
746 buffer[position] = Value;
747 position++;
748 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000749 else
750 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500751 }
752
753 void writeBytes(llvm::StringRef Bytes) override
754 {
755 std::size_t oldSize = buffer.size();
756 buffer.resize(oldSize + Bytes.size());
757 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
758 position += Bytes.size();
759 }
760
761 uint64_t tell() const override { return position; }
762
763 void seek(uint64_t Off) override { position = Off; }
764
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500765 const void *getEntryByName(const char *name)
Nicolas Capens157ba262019-12-10 17:49:14 -0500766 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500767 size_t codeSize = 0;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500768 const void *entry = loadImage(&buffer[0], codeSize, name);
Nicolas Capens157ba262019-12-10 17:49:14 -0500769
770#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500771 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
772#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000773 __builtin___clear_cache((char *)entry, (char *)entry + codeSize);
Nicolas Capens157ba262019-12-10 17:49:14 -0500774#endif
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500775
Nicolas Capens598f8d82016-09-26 15:09:10 -0400776 return entry;
777 }
778
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500779 void finalize()
780 {
781 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
782
783 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
784 }
785
Ben Clayton713b8d32019-12-17 20:37:56 +0000786 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400787 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500788 ASSERT(func);
789 funcs[index] = func;
790 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400791
Nicolas Capens157ba262019-12-10 17:49:14 -0500792 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400793 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500794 ASSERT(funcs[index]);
795 return funcs[index];
796 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400797
Antonio Maiorano02a39532020-01-21 15:15:34 -0500798 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500799 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500800 // TODO(b/148086935): Replace with a buffer allocator.
801 size_t space = size + alignment;
802 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
803 void *ptr = buf.get();
804 void *alignedPtr = std::align(alignment, size, ptr, space);
805 ASSERT(alignedPtr);
806 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500807 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500808 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500809 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400810
Nicolas Capens157ba262019-12-10 17:49:14 -0500811private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000812 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500813 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
814 std::size_t position;
815 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500816};
817
Antonio Maiorano62427e02020-02-13 09:18:05 -0500818#ifdef ENABLE_RR_PRINT
819void VPrintf(const std::vector<Value *> &vals)
820{
Antonio Maioranoad3e42a2020-02-26 14:23:09 -0500821 sz::Call(::function, ::basicBlock, Ice::IceType_i32, reinterpret_cast<const void *>(::printf), V(vals), true);
Antonio Maiorano62427e02020-02-13 09:18:05 -0500822}
823#endif // ENABLE_RR_PRINT
824
Nicolas Capens157ba262019-12-10 17:49:14 -0500825Nucleus::Nucleus()
826{
Ben Clayton713b8d32019-12-17 20:37:56 +0000827 ::codegenMutex.lock(); // Reactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500828
829 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
830 Ice::ClFlags::getParsedClFlags(Flags);
831
Ben Clayton713b8d32019-12-17 20:37:56 +0000832#if defined(__arm__)
833 Flags.setTargetArch(Ice::Target_ARM32);
834 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
835#elif defined(__mips__)
836 Flags.setTargetArch(Ice::Target_MIPS32);
837 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
838#else // x86
839 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
840 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
841#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500842 Flags.setOutFileType(Ice::FT_Elf);
843 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
844 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
845 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
846 Flags.setDisableHybridAssembly(true);
847
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500848 // Emit functions into separate sections in the ELF so we can find them by name
849 Flags.setFunctionSections(true);
850
Nicolas Capens157ba262019-12-10 17:49:14 -0500851 static llvm::raw_os_ostream cout(std::cout);
852 static llvm::raw_os_ostream cerr(std::cerr);
853
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500854 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500855 {
856 // Decorate text asm with liveness info
857 Flags.setDecorateAsm(true);
858 }
859
Ben Clayton713b8d32019-12-17 20:37:56 +0000860 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500861 {
862 std::error_code errorCode;
863 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
864 ::elfFile = new Ice::ELFFileStreamer(*out);
865 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
866 }
867 else
868 {
869 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
870 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
871 ::routine = elfMemory;
872 }
873}
874
875Nucleus::~Nucleus()
876{
877 delete ::routine;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500878 ::routine = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500879
880 delete ::allocator;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500881 ::allocator = nullptr;
882
Nicolas Capens157ba262019-12-10 17:49:14 -0500883 delete ::function;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500884 ::function = nullptr;
885
Nicolas Capens157ba262019-12-10 17:49:14 -0500886 delete ::context;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500887 ::context = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500888
889 delete ::elfFile;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500890 ::elfFile = nullptr;
891
Nicolas Capens157ba262019-12-10 17:49:14 -0500892 delete ::out;
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500893 ::out = nullptr;
894
895 ::basicBlock = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500896
897 ::codegenMutex.unlock();
898}
899
900void Nucleus::setDefaultConfig(const Config &cfg)
901{
902 std::unique_lock<std::mutex> lock(::defaultConfigLock);
903 ::defaultConfig() = cfg;
904}
905
906void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
907{
908 std::unique_lock<std::mutex> lock(::defaultConfigLock);
909 auto &config = ::defaultConfig();
910 config = cfgEdit.apply(config);
911}
912
913Config Nucleus::getDefaultConfig()
914{
915 std::unique_lock<std::mutex> lock(::defaultConfigLock);
916 return ::defaultConfig();
917}
918
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500919// This function lowers and produces executable binary code in memory for the input functions,
920// and returns a Routine with the entry points to these functions.
921template<size_t Count>
922static std::shared_ptr<Routine> acquireRoutine(Ice::Cfg *const (&functions)[Count], const char *const (&names)[Count], const Config::Edit &cfgEdit)
Nicolas Capens157ba262019-12-10 17:49:14 -0500923{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500924 // This logic is modeled after the IceCompiler, as well as GlobalContext::translateFunctions
925 // and GlobalContext::emitItems.
926
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500927 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500928 {
929 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500930 ::context->getStrDump().SetUnbuffered();
Nicolas Capens157ba262019-12-10 17:49:14 -0500931 }
932
933 ::context->emitFileHeader();
934
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500935 // Translate
936
937 for(size_t i = 0; i < Count; ++i)
Nicolas Capens157ba262019-12-10 17:49:14 -0500938 {
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500939 Ice::Cfg *currFunc = functions[i];
940
941 // Install function allocator in TLS for Cfg-specific container allocators
942 Ice::CfgLocalAllocatorScope allocScope(currFunc);
943
944 currFunc->setFunctionName(Ice::GlobalString::createWithString(::context, names[i]));
945
946 rr::optimize(currFunc);
947
948 currFunc->computeInOutEdges();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500949 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500950
951 currFunc->translate();
Antonio Maioranob3d9a2a2020-02-14 14:38:04 -0500952 ASSERT_MSG(!currFunc->hasError(), "%s", currFunc->getError().c_str());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500953
954 currFunc->getAssembler<>()->setInternal(currFunc->getInternal());
955
956 if(subzeroEmitTextAsm)
957 {
958 currFunc->emit();
959 }
960
961 currFunc->emitIAS();
Nicolas Capens157ba262019-12-10 17:49:14 -0500962 }
963
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500964 // Emit items
965
966 ::context->lowerGlobals("");
967
Nicolas Capens157ba262019-12-10 17:49:14 -0500968 auto objectWriter = ::context->getObjectWriter();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500969
970 for(size_t i = 0; i < Count; ++i)
971 {
972 Ice::Cfg *currFunc = functions[i];
973
974 // Accumulate globals from functions to emit into the "last" section at the end
975 auto globals = currFunc->getGlobalInits();
976 if(globals && !globals->empty())
977 {
978 ::context->getGlobals()->merge(globals.get());
979 }
980
981 auto assembler = currFunc->releaseAssembler();
982 assembler->alignFunction();
983 objectWriter->writeFunctionCode(currFunc->getFunctionName(), currFunc->getInternal(), assembler.get());
984 }
985
Nicolas Capens157ba262019-12-10 17:49:14 -0500986 ::context->lowerGlobals("last");
987 ::context->lowerConstants();
988 ::context->lowerJumpTables();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500989
Nicolas Capens157ba262019-12-10 17:49:14 -0500990 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500991 ::context->emitTargetRODataSections();
Nicolas Capens157ba262019-12-10 17:49:14 -0500992 objectWriter->writeNonUserSections();
993
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -0500994 // Done compiling functions, get entry pointers to each of them
995 for(size_t i = 0; i < Count; ++i)
996 {
997 const void *entry = ::routine->getEntryByName(names[i]);
998 ::routine->setEntry(i, entry);
999 }
1000
1001 ::routine->finalize();
Nicolas Capens157ba262019-12-10 17:49:14 -05001002
1003 Routine *handoffRoutine = ::routine;
1004 ::routine = nullptr;
1005
1006 return std::shared_ptr<Routine>(handoffRoutine);
1007}
1008
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001009std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
1010{
1011 createRetVoidIfNoRet();
1012 return rr::acquireRoutine({ ::function }, { name }, cfgEdit);
1013}
1014
Nicolas Capens157ba262019-12-10 17:49:14 -05001015Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
1016{
1017 Ice::Type type = T(t);
1018 int typeSize = Ice::typeWidthInBytes(type);
1019 int totalSize = typeSize * (arraySize ? arraySize : 1);
1020
1021 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
1022 auto address = ::function->makeVariable(T(getPointerType(t)));
1023 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
1024 ::function->getEntryNode()->getInsts().push_front(alloca);
1025
1026 return V(address);
1027}
1028
1029BasicBlock *Nucleus::createBasicBlock()
1030{
1031 return B(::function->makeNode());
1032}
1033
1034BasicBlock *Nucleus::getInsertBlock()
1035{
1036 return B(::basicBlock);
1037}
1038
1039void Nucleus::setInsertBlock(BasicBlock *basicBlock)
1040{
Ben Clayton713b8d32019-12-17 20:37:56 +00001041 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -05001042
1043 Variable::materializeAll();
1044
1045 ::basicBlock = basicBlock;
1046}
1047
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001048void Nucleus::createFunction(Type *returnType, const std::vector<Type *> &paramTypes)
Nicolas Capens157ba262019-12-10 17:49:14 -05001049{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001050 ASSERT(::function == nullptr);
1051 ASSERT(::allocator == nullptr);
1052 ASSERT(::basicBlock == nullptr);
1053
1054 ::function = sz::createFunction(::context, T(returnType), T(paramTypes));
1055
1056 // NOTE: The scoped allocator sets the TLS allocator to the one in the function. This global one
1057 // becomes invalid if another one is created; for example, when creating await and destroy functions
1058 // for coroutines, in which case, we must make sure to create a new scoped allocator for ::function again.
1059 // TODO: Get rid of this as a global, and create scoped allocs in every Nucleus function instead.
Nicolas Capens157ba262019-12-10 17:49:14 -05001060 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
1061
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001062 ::basicBlock = ::function->getEntryNode();
Nicolas Capens157ba262019-12-10 17:49:14 -05001063}
1064
1065Value *Nucleus::getArgument(unsigned int index)
1066{
1067 return V(::function->getArgs()[index]);
1068}
1069
1070void Nucleus::createRetVoid()
1071{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001072 RR_DEBUG_INFO_UPDATE_LOC();
1073
Nicolas Capens157ba262019-12-10 17:49:14 -05001074 // Code generated after this point is unreachable, so any variables
1075 // being read can safely return an undefined value. We have to avoid
1076 // materializing variables after the terminator ret instruction.
1077 Variable::killUnmaterialized();
1078
1079 Ice::InstRet *ret = Ice::InstRet::create(::function);
1080 ::basicBlock->appendInst(ret);
1081}
1082
1083void Nucleus::createRet(Value *v)
1084{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001085 RR_DEBUG_INFO_UPDATE_LOC();
1086
Nicolas Capens157ba262019-12-10 17:49:14 -05001087 // Code generated after this point is unreachable, so any variables
1088 // being read can safely return an undefined value. We have to avoid
1089 // materializing variables after the terminator ret instruction.
1090 Variable::killUnmaterialized();
1091
1092 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
1093 ::basicBlock->appendInst(ret);
1094}
1095
1096void Nucleus::createBr(BasicBlock *dest)
1097{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001098 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001099 Variable::materializeAll();
1100
1101 auto br = Ice::InstBr::create(::function, dest);
1102 ::basicBlock->appendInst(br);
1103}
1104
1105void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1106{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001107 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001108 Variable::materializeAll();
1109
1110 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
1111 ::basicBlock->appendInst(br);
1112}
1113
1114static bool isCommutative(Ice::InstArithmetic::OpKind op)
1115{
1116 switch(op)
1117 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001118 case Ice::InstArithmetic::Add:
1119 case Ice::InstArithmetic::Fadd:
1120 case Ice::InstArithmetic::Mul:
1121 case Ice::InstArithmetic::Fmul:
1122 case Ice::InstArithmetic::And:
1123 case Ice::InstArithmetic::Or:
1124 case Ice::InstArithmetic::Xor:
1125 return true;
1126 default:
1127 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -05001128 }
1129}
1130
1131static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
1132{
1133 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
1134
1135 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
1136
1137 Ice::Variable *result = ::function->makeVariable(lhs->getType());
1138 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
1139 ::basicBlock->appendInst(arithmetic);
1140
1141 return V(result);
1142}
1143
1144Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1145{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001146 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001147 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
1148}
1149
1150Value *Nucleus::createSub(Value *lhs, Value *rhs)
1151{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001152 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001153 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
1154}
1155
1156Value *Nucleus::createMul(Value *lhs, Value *rhs)
1157{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001158 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001159 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
1160}
1161
1162Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1163{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001164 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001165 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
1166}
1167
1168Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1169{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001170 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001171 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
1172}
1173
1174Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1175{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001176 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001177 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
1178}
1179
1180Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1181{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001182 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001183 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
1184}
1185
1186Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1187{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001188 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001189 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
1190}
1191
1192Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1193{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001194 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001195 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
1196}
1197
1198Value *Nucleus::createURem(Value *lhs, Value *rhs)
1199{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001200 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001201 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
1202}
1203
1204Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1205{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001206 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001207 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
1208}
1209
1210Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1211{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001212 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001213 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
1214 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Ben Claytonce54c592020-02-07 11:30:51 +00001215 UNIMPLEMENTED("b/148139679 Nucleus::createFRem");
Antonio Maiorano5ef91b82020-01-21 15:10:22 -05001216 return nullptr;
1217}
1218
1219RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
1220{
1221 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -05001222}
1223
1224Value *Nucleus::createShl(Value *lhs, Value *rhs)
1225{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001226 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001227 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
1228}
1229
1230Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1231{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001232 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001233 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
1234}
1235
1236Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1237{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001238 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001239 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
1240}
1241
1242Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1243{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001244 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001245 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
1246}
1247
1248Value *Nucleus::createOr(Value *lhs, Value *rhs)
1249{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001250 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001251 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
1252}
1253
1254Value *Nucleus::createXor(Value *lhs, Value *rhs)
1255{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001256 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001257 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1258}
1259
1260Value *Nucleus::createNeg(Value *v)
1261{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001262 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001263 return createSub(createNullValue(T(v->getType())), v);
1264}
1265
1266Value *Nucleus::createFNeg(Value *v)
1267{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001268 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00001269 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1270 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001271
1272 return createFSub(negativeZero, v);
1273}
1274
1275Value *Nucleus::createNot(Value *v)
1276{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001277 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001278 if(Ice::isScalarIntegerType(v->getType()))
1279 {
1280 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1281 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001282 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001283 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001284 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001285 return createXor(v, createConstantVector(c, T(v->getType())));
1286 }
1287}
1288
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001289static void validateAtomicAndMemoryOrderArgs(bool atomic, std::memory_order memoryOrder)
1290{
1291#if defined(__i386__) || defined(__x86_64__)
1292 // We're good, atomics and strictest memory order (except seq_cst) are guaranteed.
1293 // Note that sequential memory ordering could be guaranteed by using x86's LOCK prefix.
1294 // Note also that relaxed memory order could be implemented using MOVNTPS and friends.
1295#else
1296 if(atomic)
1297 {
1298 UNIMPLEMENTED("b/150475088 Atomic load/store not implemented for current platform");
1299 }
1300 if(memoryOrder != std::memory_order_relaxed)
1301 {
1302 UNIMPLEMENTED("b/150475088 Memory order other than memory_order_relaxed not implemented for current platform");
1303 }
1304#endif
1305
1306 // Vulkan doesn't allow sequential memory order
1307 ASSERT(memoryOrder != std::memory_order_seq_cst);
1308}
1309
Nicolas Capens157ba262019-12-10 17:49:14 -05001310Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001312 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001313 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001314
1315 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001316 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001317
Ben Clayton713b8d32019-12-17 20:37:56 +00001318 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001319 {
1320 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001321 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001322 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001323 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001324 auto pointer = RValue<Pointer<Byte>>(ptr);
1325 Int x = *Pointer<Int>(pointer);
1326
1327 Int4 vector;
1328 vector = Insert(vector, x, 0);
1329
Antonio Maiorano02a39532020-01-21 15:15:34 -05001330 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001331 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1332 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001333 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001334 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001335 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001336 ASSERT_MSG(!atomic, "Emulated 64-bit loads are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001337 auto pointer = RValue<Pointer<Byte>>(ptr);
1338 Int x = *Pointer<Int>(pointer);
1339 Int y = *Pointer<Int>(pointer + 4);
1340
1341 Int4 vector;
1342 vector = Insert(vector, x, 0);
1343 vector = Insert(vector, y, 1);
1344
Antonio Maiorano02a39532020-01-21 15:15:34 -05001345 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001346 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1347 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001348 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001349 else
1350 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001351 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001352 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001353 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001354 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001355 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001356 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001357 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1358 load->addArg(ptr);
1359 load->addArg(::context->getConstantInt32(typeSize(type)));
1360 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001361 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001362 }
1363 else
1364 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001365 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001366 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001367
Antonio Maiorano02a39532020-01-21 15:15:34 -05001368 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001369 return V(result);
1370}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001371
Nicolas Capens157ba262019-12-10 17:49:14 -05001372Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1373{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001374 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001375 validateAtomicAndMemoryOrderArgs(atomic, memoryOrder);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001376
Ben Clayton713b8d32019-12-17 20:37:56 +00001377#if __has_feature(memory_sanitizer)
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001378 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
Ben Clayton713b8d32019-12-17 20:37:56 +00001379 if(align != 0)
1380 {
1381 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1382 call->addArg(ptr);
1383 call->addArg(::context->getConstantInt64(typeSize(type)));
1384 ::basicBlock->appendInst(call);
1385 }
1386#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001387
Nicolas Capens157ba262019-12-10 17:49:14 -05001388 int valueType = (int)reinterpret_cast<intptr_t>(type);
1389
Ben Clayton713b8d32019-12-17 20:37:56 +00001390 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001391 {
1392 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001393 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001394 if(typeSize(type) == 4)
1395 {
1396 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1397 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1398 ::basicBlock->appendInst(bitcast);
1399
1400 RValue<Int4> v(V(vector));
1401
1402 auto pointer = RValue<Pointer<Byte>>(ptr);
1403 Int x = Extract(v, 0);
1404 *Pointer<Int>(pointer) = x;
1405 }
1406 else if(typeSize(type) == 8)
1407 {
Antonio Maiorano2e6cd9c2020-02-28 15:48:22 -05001408 ASSERT_MSG(!atomic, "Emulated 64-bit stores are not atomic");
Nicolas Capens157ba262019-12-10 17:49:14 -05001409 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1410 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1411 ::basicBlock->appendInst(bitcast);
1412
1413 RValue<Int4> v(V(vector));
1414
1415 auto pointer = RValue<Pointer<Byte>>(ptr);
1416 Int x = Extract(v, 0);
1417 *Pointer<Int>(pointer) = x;
1418 Int y = Extract(v, 1);
1419 *Pointer<Int>(pointer + 4) = y;
1420 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001421 else
1422 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001423 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001424 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001425 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001426 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001427 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1428 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1429 store->addArg(value);
1430 store->addArg(ptr);
1431 store->addArg(::context->getConstantInt32(typeSize(type)));
1432 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001433 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001434 }
1435 else
1436 {
1437 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001438
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001439 auto store = Ice::InstStore::create(::function, V(value), V(ptr), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001440 ::basicBlock->appendInst(store);
1441 }
1442
1443 return value;
1444}
1445
1446Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1447{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001448 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001449 ASSERT(index->getType() == Ice::IceType_i32);
1450
1451 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1452 {
1453 int32_t offset = constant->getValue() * (int)typeSize(type);
1454
1455 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001456 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001457 return ptr;
1458 }
1459
Nicolas Capens157ba262019-12-10 17:49:14 -05001460 return createAdd(ptr, createConstantInt(offset));
1461 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001462
Nicolas Capens157ba262019-12-10 17:49:14 -05001463 if(!Ice::isByteSizedType(T(type)))
1464 {
1465 index = createMul(index, createConstantInt((int)typeSize(type)));
1466 }
1467
Ben Clayton713b8d32019-12-17 20:37:56 +00001468 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001469 {
1470 if(unsignedIndex)
1471 {
1472 index = createZExt(index, T(Ice::IceType_i64));
1473 }
1474 else
1475 {
1476 index = createSExt(index, T(Ice::IceType_i64));
1477 }
1478 }
1479
1480 return createAdd(ptr, index);
1481}
1482
Antonio Maiorano370cba52019-12-31 11:36:07 -05001483static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1484{
1485 Ice::Variable *result = ::function->makeVariable(value->getType());
1486
1487 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1488 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1489 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1490 auto op = ::context->getConstantInt32(rmwOp);
1491 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1492 inst->addArg(op);
1493 inst->addArg(ptr);
1494 inst->addArg(value);
1495 inst->addArg(order);
1496 ::basicBlock->appendInst(inst);
1497
1498 return V(result);
1499}
1500
Nicolas Capens157ba262019-12-10 17:49:14 -05001501Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1502{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001503 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001504 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001505}
1506
1507Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1508{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001509 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001510 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001511}
1512
1513Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1514{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001515 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001516 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001517}
1518
1519Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1520{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001521 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001522 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001523}
1524
1525Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1526{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001527 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001528 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001529}
1530
1531Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1532{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001533 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001534 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001535}
1536
1537Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1538{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001539 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05001540 Ice::Variable *result = ::function->makeVariable(value->getType());
1541
1542 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1543 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1544 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1545 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1546 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1547 inst->addArg(ptr);
1548 inst->addArg(compare);
1549 inst->addArg(value);
1550 inst->addArg(orderEq);
1551 inst->addArg(orderNeq);
1552 ::basicBlock->appendInst(inst);
1553
1554 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001555}
1556
1557static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1558{
1559 if(v->getType() == T(destType))
1560 {
1561 return v;
1562 }
1563
1564 Ice::Variable *result = ::function->makeVariable(T(destType));
1565 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1566 ::basicBlock->appendInst(cast);
1567
1568 return V(result);
1569}
1570
1571Value *Nucleus::createTrunc(Value *v, Type *destType)
1572{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001573 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001574 return createCast(Ice::InstCast::Trunc, v, destType);
1575}
1576
1577Value *Nucleus::createZExt(Value *v, Type *destType)
1578{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001579 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001580 return createCast(Ice::InstCast::Zext, v, destType);
1581}
1582
1583Value *Nucleus::createSExt(Value *v, Type *destType)
1584{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001585 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001586 return createCast(Ice::InstCast::Sext, v, destType);
1587}
1588
1589Value *Nucleus::createFPToUI(Value *v, Type *destType)
1590{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001591 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001592 return createCast(Ice::InstCast::Fptoui, v, destType);
1593}
1594
1595Value *Nucleus::createFPToSI(Value *v, Type *destType)
1596{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001597 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001598 return createCast(Ice::InstCast::Fptosi, v, destType);
1599}
1600
1601Value *Nucleus::createSIToFP(Value *v, Type *destType)
1602{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001603 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001604 return createCast(Ice::InstCast::Sitofp, v, destType);
1605}
1606
1607Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1608{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001609 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001610 return createCast(Ice::InstCast::Fptrunc, v, destType);
1611}
1612
1613Value *Nucleus::createFPExt(Value *v, Type *destType)
1614{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001615 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001616 return createCast(Ice::InstCast::Fpext, v, destType);
1617}
1618
1619Value *Nucleus::createBitCast(Value *v, Type *destType)
1620{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001621 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001622 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1623 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1624 // emulate them by writing to the stack and reading back as the destination type.
1625 if(emulateMismatchedBitCast)
1626 {
1627 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1628 {
1629 Value *address = allocateStackVariable(destType);
1630 createStore(v, address, T(v->getType()));
1631 return createLoad(address, destType);
1632 }
1633 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1634 {
1635 Value *address = allocateStackVariable(T(v->getType()));
1636 createStore(v, address, T(v->getType()));
1637 return createLoad(address, destType);
1638 }
1639 }
1640
1641 return createCast(Ice::InstCast::Bitcast, v, destType);
1642}
1643
1644static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1645{
1646 ASSERT(lhs->getType() == rhs->getType());
1647
1648 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1649 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1650 ::basicBlock->appendInst(cmp);
1651
1652 return V(result);
1653}
1654
1655Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1656{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001657 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001658 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1659}
1660
1661Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1662{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001663 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001664 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1665}
1666
1667Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1668{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001669 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001670 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1671}
1672
1673Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1674{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001675 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001676 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1677}
1678
1679Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1680{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001681 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001682 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1683}
1684
1685Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1686{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001687 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001688 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1689}
1690
1691Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1692{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001693 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001694 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1695}
1696
1697Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1698{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001699 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001700 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1701}
1702
1703Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1704{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001705 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001706 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1707}
1708
1709Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1710{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001711 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001712 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1713}
1714
1715Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1716{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001717 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001718 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1719}
1720
1721static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1722{
1723 ASSERT(lhs->getType() == rhs->getType());
1724 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1725
1726 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1727 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1728 ::basicBlock->appendInst(cmp);
1729
1730 return V(result);
1731}
1732
1733Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1734{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001735 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001736 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1737}
1738
1739Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1740{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001741 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001742 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1743}
1744
1745Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1746{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001747 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001748 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1749}
1750
1751Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1752{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001753 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001754 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1755}
1756
1757Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1758{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001759 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001760 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1761}
1762
1763Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1764{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001765 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001766 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1767}
1768
1769Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1770{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001771 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001772 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1773}
1774
1775Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1776{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001777 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001778 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1779}
1780
1781Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1782{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001783 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001784 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1785}
1786
1787Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1788{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001789 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001790 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1791}
1792
1793Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1794{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001795 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001796 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1797}
1798
1799Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1800{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001801 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001802 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1803}
1804
1805Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1806{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001807 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001808 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1809}
1810
1811Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1812{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001813 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001814 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1815}
1816
1817Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1818{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001819 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001820 auto result = ::function->makeVariable(T(type));
Antonio Maiorano62427e02020-02-13 09:18:05 -05001821 auto extract = Ice::InstExtractElement::create(::function, result, V(vector), ::context->getConstantInt32(index));
Nicolas Capens157ba262019-12-10 17:49:14 -05001822 ::basicBlock->appendInst(extract);
1823
1824 return V(result);
1825}
1826
1827Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1828{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001829 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001830 auto result = ::function->makeVariable(vector->getType());
1831 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1832 ::basicBlock->appendInst(insert);
1833
1834 return V(result);
1835}
1836
1837Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1838{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001839 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001840 ASSERT(V1->getType() == V2->getType());
1841
1842 int size = Ice::typeNumElements(V1->getType());
1843 auto result = ::function->makeVariable(V1->getType());
1844 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1845
1846 for(int i = 0; i < size; i++)
1847 {
1848 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1849 }
1850
1851 ::basicBlock->appendInst(shuffle);
1852
1853 return V(result);
1854}
1855
1856Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1857{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001858 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001859 ASSERT(ifTrue->getType() == ifFalse->getType());
1860
1861 auto result = ::function->makeVariable(ifTrue->getType());
1862 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1863 ::basicBlock->appendInst(select);
1864
1865 return V(result);
1866}
1867
1868SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1869{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001870 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001871 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1872 ::basicBlock->appendInst(switchInst);
1873
Ben Clayton713b8d32019-12-17 20:37:56 +00001874 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001875}
1876
1877void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1878{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001879 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001880 switchCases->addBranch(label, label, branch);
1881}
1882
1883void Nucleus::createUnreachable()
1884{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001885 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001886 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1887 ::basicBlock->appendInst(unreachable);
1888}
1889
Antonio Maiorano62427e02020-02-13 09:18:05 -05001890Type *Nucleus::getType(Value *value)
1891{
1892 return T(V(value)->getType());
1893}
1894
1895Type *Nucleus::getContainedType(Type *vectorType)
1896{
1897 Ice::Type vecTy = T(vectorType);
1898 switch(vecTy)
1899 {
1900 case Ice::IceType_v4i1: return T(Ice::IceType_i1);
1901 case Ice::IceType_v8i1: return T(Ice::IceType_i1);
1902 case Ice::IceType_v16i1: return T(Ice::IceType_i1);
1903 case Ice::IceType_v16i8: return T(Ice::IceType_i8);
1904 case Ice::IceType_v8i16: return T(Ice::IceType_i16);
1905 case Ice::IceType_v4i32: return T(Ice::IceType_i32);
1906 case Ice::IceType_v4f32: return T(Ice::IceType_f32);
1907 default:
1908 ASSERT_MSG(false, "getContainedType: input type is not a vector type");
1909 return {};
1910 }
1911}
1912
Nicolas Capens157ba262019-12-10 17:49:14 -05001913Type *Nucleus::getPointerType(Type *ElementType)
1914{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05001915 return T(sz::getPointerType(T(ElementType)));
Nicolas Capens157ba262019-12-10 17:49:14 -05001916}
1917
Antonio Maiorano62427e02020-02-13 09:18:05 -05001918static constexpr Ice::Type getNaturalIntType()
1919{
1920 constexpr size_t intSize = sizeof(int);
1921 static_assert(intSize == 4 || intSize == 8, "");
1922 return intSize == 4 ? Ice::IceType_i32 : Ice::IceType_i64;
1923}
1924
1925Type *Nucleus::getPrintfStorageType(Type *valueType)
1926{
1927 Ice::Type valueTy = T(valueType);
1928 switch(valueTy)
1929 {
1930 case Ice::IceType_i32:
1931 return T(getNaturalIntType());
1932
1933 case Ice::IceType_f32:
1934 return T(Ice::IceType_f64);
1935
1936 default:
1937 UNIMPLEMENTED_NO_BUG("getPrintfStorageType: add more cases as needed");
1938 return {};
1939 }
1940}
1941
Nicolas Capens157ba262019-12-10 17:49:14 -05001942Value *Nucleus::createNullValue(Type *Ty)
1943{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001944 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001945 if(Ice::isVectorType(T(Ty)))
1946 {
1947 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001948 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001949 return createConstantVector(c, Ty);
1950 }
1951 else
1952 {
1953 return V(::context->getConstantZero(T(Ty)));
1954 }
1955}
1956
1957Value *Nucleus::createConstantLong(int64_t i)
1958{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001959 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001960 return V(::context->getConstantInt64(i));
1961}
1962
1963Value *Nucleus::createConstantInt(int i)
1964{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001965 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001966 return V(::context->getConstantInt32(i));
1967}
1968
1969Value *Nucleus::createConstantInt(unsigned int i)
1970{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001971 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001972 return V(::context->getConstantInt32(i));
1973}
1974
1975Value *Nucleus::createConstantBool(bool b)
1976{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001977 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001978 return V(::context->getConstantInt1(b));
1979}
1980
1981Value *Nucleus::createConstantByte(signed char i)
1982{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001983 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001984 return V(::context->getConstantInt8(i));
1985}
1986
1987Value *Nucleus::createConstantByte(unsigned char i)
1988{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001989 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001990 return V(::context->getConstantInt8(i));
1991}
1992
1993Value *Nucleus::createConstantShort(short i)
1994{
Antonio Maioranoaae33732020-02-14 14:52:34 -05001995 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05001996 return V(::context->getConstantInt16(i));
1997}
1998
1999Value *Nucleus::createConstantShort(unsigned short i)
2000{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002001 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002002 return V(::context->getConstantInt16(i));
2003}
2004
2005Value *Nucleus::createConstantFloat(float x)
2006{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002007 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002008 return V(::context->getConstantFloat(x));
2009}
2010
2011Value *Nucleus::createNullPointer(Type *Ty)
2012{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002013 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00002014 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05002015}
2016
Antonio Maiorano02a39532020-01-21 15:15:34 -05002017static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
2018{
2019 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
2020}
2021
Nicolas Capens157ba262019-12-10 17:49:14 -05002022Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
2023{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002024 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002025 const int vectorSize = 16;
2026 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
2027 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05002028
2029 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00002030 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05002031
2032 // TODO(148082873): Fix global variable constants when generating multiple functions
2033 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05002034
2035 switch((int)reinterpret_cast<intptr_t>(type))
2036 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002037 case Ice::IceType_v4i32:
2038 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002039 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002040 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002041 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002042 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002043 }
2044 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002045 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002046 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002047 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002048 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002049 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002050 }
2051 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002052 case Ice::IceType_v8i16:
2053 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002054 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002055 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002056 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002057 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002058 }
2059 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002060 case Ice::IceType_v16i8:
2061 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05002062 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002063 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002064 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002065 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002066 }
2067 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002068 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002069 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002070 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002071 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002072 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002073 }
2074 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002075 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05002076 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002077 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002078 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002079 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002080 }
2081 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002082 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05002083 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002084 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002085 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002086 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002087 }
2088 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002089 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002090 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002091 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002092 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002093 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002094 }
2095 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002096 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05002097 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002098 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05002099 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05002100 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002101 }
2102 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00002103 default:
2104 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05002105 }
2106
Antonio Maiorano02a39532020-01-21 15:15:34 -05002107 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05002108
Antonio Maiorano02a39532020-01-21 15:15:34 -05002109 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05002110 return V(result);
2111}
2112
2113Value *Nucleus::createConstantVector(const double *constants, Type *type)
2114{
Ben Clayton713b8d32019-12-17 20:37:56 +00002115 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05002116}
2117
Antonio Maiorano62427e02020-02-13 09:18:05 -05002118Value *Nucleus::createConstantString(const char *v)
2119{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002120 // NOTE: Do not call RR_DEBUG_INFO_UPDATE_LOC() here to avoid recursion when called from rr::Printv
Antonio Maiorano62427e02020-02-13 09:18:05 -05002121 return V(IceConstantData(v, strlen(v) + 1));
2122}
2123
Nicolas Capens157ba262019-12-10 17:49:14 -05002124Type *Void::getType()
2125{
2126 return T(Ice::IceType_void);
2127}
2128
2129Type *Bool::getType()
2130{
2131 return T(Ice::IceType_i1);
2132}
2133
2134Type *Byte::getType()
2135{
2136 return T(Ice::IceType_i8);
2137}
2138
2139Type *SByte::getType()
2140{
2141 return T(Ice::IceType_i8);
2142}
2143
2144Type *Short::getType()
2145{
2146 return T(Ice::IceType_i16);
2147}
2148
2149Type *UShort::getType()
2150{
2151 return T(Ice::IceType_i16);
2152}
2153
2154Type *Byte4::getType()
2155{
2156 return T(Type_v4i8);
2157}
2158
2159Type *SByte4::getType()
2160{
2161 return T(Type_v4i8);
2162}
2163
Ben Clayton713b8d32019-12-17 20:37:56 +00002164namespace {
2165RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05002166{
Ben Clayton713b8d32019-12-17 20:37:56 +00002167 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05002168}
2169
Ben Clayton713b8d32019-12-17 20:37:56 +00002170RValue<Byte> Extract(RValue<Byte8> val, int i)
2171{
2172 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
2173}
2174
2175RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2176{
2177 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
2178}
2179} // namespace
2180
Nicolas Capens157ba262019-12-10 17:49:14 -05002181RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2182{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002183 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002184 if(emulateIntrinsics)
2185 {
2186 Byte8 result;
2187 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2188 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2189 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2190 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2191 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2192 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2193 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2194 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
2195
2196 return result;
2197 }
2198 else
2199 {
2200 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002201 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002202 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2203 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2204 paddusb->addArg(x.value);
2205 paddusb->addArg(y.value);
2206 ::basicBlock->appendInst(paddusb);
2207
2208 return RValue<Byte8>(V(result));
2209 }
2210}
2211
2212RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2213{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002214 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002215 if(emulateIntrinsics)
2216 {
2217 Byte8 result;
2218 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2219 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2220 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2221 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2222 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2223 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2224 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2225 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
2226
2227 return result;
2228 }
2229 else
2230 {
2231 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002232 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002233 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2234 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2235 psubusw->addArg(x.value);
2236 psubusw->addArg(y.value);
2237 ::basicBlock->appendInst(psubusw);
2238
2239 return RValue<Byte8>(V(result));
2240 }
2241}
2242
2243RValue<SByte> Extract(RValue<SByte8> val, int i)
2244{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002245 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002246 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
2247}
2248
2249RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2250{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002251 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002252 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2253}
2254
2255RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2256{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002257 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002258 if(emulateIntrinsics)
2259 {
2260 SByte8 result;
2261 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2262 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2263 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2264 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2265 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2266 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2267 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2268 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2269
2270 return result;
2271 }
2272 else
2273 {
Ben Clayton713b8d32019-12-17 20:37:56 +00002274#if defined(__i386__) || defined(__x86_64__)
2275 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2276 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
2277 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05002278
Ben Clayton713b8d32019-12-17 20:37:56 +00002279 return As<SByte8>(hi | lo);
2280#else
2281 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2282#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04002283 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002284}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002285
Nicolas Capens157ba262019-12-10 17:49:14 -05002286RValue<Int> SignMask(RValue<Byte8> x)
2287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002289 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002290 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002291 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2292 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002293 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002294 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01002295 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002296 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002297 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002298 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2299 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2300 movmsk->addArg(x.value);
2301 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01002302
Nicolas Capens157ba262019-12-10 17:49:14 -05002303 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01002304 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002305}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002306
2307// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2308// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05002309// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002310// }
2311
Nicolas Capens157ba262019-12-10 17:49:14 -05002312RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2313{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002314 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002315 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2316}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002317
Nicolas Capens157ba262019-12-10 17:49:14 -05002318Type *Byte8::getType()
2319{
2320 return T(Type_v8i8);
2321}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002322
Nicolas Capens598f8d82016-09-26 15:09:10 -04002323// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2324// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002325// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002326// }
2327
2328// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2329// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05002330// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002331// }
2332
Nicolas Capens157ba262019-12-10 17:49:14 -05002333RValue<SByte> SaturateSigned(RValue<Short> x)
2334{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002335 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002336 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
2337}
2338
2339RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2340{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002341 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002342 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04002343 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002344 SByte8 result;
2345 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
2346 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
2347 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
2348 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
2349 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
2350 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
2351 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
2352 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04002353
Nicolas Capens157ba262019-12-10 17:49:14 -05002354 return result;
2355 }
2356 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002357 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002358 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002359 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002360 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2361 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2362 paddsb->addArg(x.value);
2363 paddsb->addArg(y.value);
2364 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002365
Nicolas Capens157ba262019-12-10 17:49:14 -05002366 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002367 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002368}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002369
Nicolas Capens157ba262019-12-10 17:49:14 -05002370RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2371{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002372 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002373 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002374 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002375 SByte8 result;
2376 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
2377 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
2378 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
2379 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
2380 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
2381 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
2382 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
2383 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002384
Nicolas Capens157ba262019-12-10 17:49:14 -05002385 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002386 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002387 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002388 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002389 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002390 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002391 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2392 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2393 psubsb->addArg(x.value);
2394 psubsb->addArg(y.value);
2395 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002396
Nicolas Capens157ba262019-12-10 17:49:14 -05002397 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002398 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002399}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002400
Nicolas Capens157ba262019-12-10 17:49:14 -05002401RValue<Int> SignMask(RValue<SByte8> x)
2402{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002403 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002404 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002405 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002406 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2407 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002408 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002409 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002410 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002411 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002412 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002413 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2414 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2415 movmsk->addArg(x.value);
2416 ::basicBlock->appendInst(movmsk);
2417
2418 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002419 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002420}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002421
Nicolas Capens157ba262019-12-10 17:49:14 -05002422RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2423{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002424 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002425 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2426}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002427
Nicolas Capens157ba262019-12-10 17:49:14 -05002428RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2429{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002430 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002431 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2432}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002433
Nicolas Capens157ba262019-12-10 17:49:14 -05002434Type *SByte8::getType()
2435{
2436 return T(Type_v8i8);
2437}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002438
Nicolas Capens157ba262019-12-10 17:49:14 -05002439Type *Byte16::getType()
2440{
2441 return T(Ice::IceType_v16i8);
2442}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002443
Nicolas Capens157ba262019-12-10 17:49:14 -05002444Type *SByte16::getType()
2445{
2446 return T(Ice::IceType_v16i8);
2447}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002448
Nicolas Capens157ba262019-12-10 17:49:14 -05002449Type *Short2::getType()
2450{
2451 return T(Type_v2i16);
2452}
Nicolas Capensd4227962016-11-09 14:24:25 -05002453
Nicolas Capens157ba262019-12-10 17:49:14 -05002454Type *UShort2::getType()
2455{
2456 return T(Type_v2i16);
2457}
Nicolas Capensd4227962016-11-09 14:24:25 -05002458
Nicolas Capens157ba262019-12-10 17:49:14 -05002459Short4::Short4(RValue<Int4> cast)
2460{
Ben Clayton713b8d32019-12-17 20:37:56 +00002461 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002462 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2463 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2464
2465 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
2466 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2467
2468 storeValue(short4);
2469}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002470
2471// Short4::Short4(RValue<Float> cast)
2472// {
2473// }
2474
Nicolas Capens157ba262019-12-10 17:49:14 -05002475Short4::Short4(RValue<Float4> cast)
2476{
Ben Claytonce54c592020-02-07 11:30:51 +00002477 UNIMPLEMENTED_NO_BUG("Short4::Short4(RValue<Float4> cast)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002478}
2479
2480RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2481{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002482 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002483 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002484 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002485 Short4 result;
2486 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2487 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2488 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2489 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002490
2491 return result;
2492 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002493 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002494 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002495 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2496 }
2497}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002498
Nicolas Capens157ba262019-12-10 17:49:14 -05002499RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2500{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002501 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002502 if(emulateIntrinsics)
2503 {
2504 Short4 result;
2505 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2506 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2507 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2508 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2509
2510 return result;
2511 }
2512 else
2513 {
2514 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2515 }
2516}
2517
2518RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2519{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002520 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002521 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2522 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2523 ::basicBlock->appendInst(cmp);
2524
2525 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2526 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2527 ::basicBlock->appendInst(select);
2528
2529 return RValue<Short4>(V(result));
2530}
2531
2532RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2533{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002534 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002535 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2536 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2537 ::basicBlock->appendInst(cmp);
2538
2539 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2540 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2541 ::basicBlock->appendInst(select);
2542
2543 return RValue<Short4>(V(result));
2544}
2545
2546RValue<Short> SaturateSigned(RValue<Int> x)
2547{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002548 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002549 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2550}
2551
2552RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2553{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002554 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002555 if(emulateIntrinsics)
2556 {
2557 Short4 result;
2558 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2559 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2560 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2561 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2562
2563 return result;
2564 }
2565 else
2566 {
2567 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002568 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002569 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2570 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2571 paddsw->addArg(x.value);
2572 paddsw->addArg(y.value);
2573 ::basicBlock->appendInst(paddsw);
2574
2575 return RValue<Short4>(V(result));
2576 }
2577}
2578
2579RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2580{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002581 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002582 if(emulateIntrinsics)
2583 {
2584 Short4 result;
2585 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2586 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2587 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2588 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2589
2590 return result;
2591 }
2592 else
2593 {
2594 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002595 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002596 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2597 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2598 psubsw->addArg(x.value);
2599 psubsw->addArg(y.value);
2600 ::basicBlock->appendInst(psubsw);
2601
2602 return RValue<Short4>(V(result));
2603 }
2604}
2605
2606RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2607{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002608 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002609 if(emulateIntrinsics)
2610 {
2611 Short4 result;
2612 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2613 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2614 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2615 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2616
2617 return result;
2618 }
2619 else
2620 {
2621 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002622 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002623 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2624 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2625 pmulhw->addArg(x.value);
2626 pmulhw->addArg(y.value);
2627 ::basicBlock->appendInst(pmulhw);
2628
2629 return RValue<Short4>(V(result));
2630 }
2631}
2632
2633RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2634{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002635 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002636 if(emulateIntrinsics)
2637 {
2638 Int2 result;
2639 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2640 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2641
2642 return result;
2643 }
2644 else
2645 {
2646 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002647 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002648 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2649 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2650 pmaddwd->addArg(x.value);
2651 pmaddwd->addArg(y.value);
2652 ::basicBlock->appendInst(pmaddwd);
2653
2654 return As<Int2>(V(result));
2655 }
2656}
2657
2658RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2659{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002660 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002661 if(emulateIntrinsics)
2662 {
2663 SByte8 result;
2664 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2665 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2666 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2667 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2668 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2669 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2670 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2671 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2672
2673 return result;
2674 }
2675 else
2676 {
2677 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002678 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002679 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2680 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2681 pack->addArg(x.value);
2682 pack->addArg(y.value);
2683 ::basicBlock->appendInst(pack);
2684
2685 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2686 }
2687}
2688
2689RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2690{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002691 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002692 if(emulateIntrinsics)
2693 {
2694 Byte8 result;
2695 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2696 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2697 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2698 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2699 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2700 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2701 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2702 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2703
2704 return result;
2705 }
2706 else
2707 {
2708 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002709 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002710 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2711 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2712 pack->addArg(x.value);
2713 pack->addArg(y.value);
2714 ::basicBlock->appendInst(pack);
2715
2716 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2717 }
2718}
2719
2720RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2721{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002722 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002723 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2724}
2725
2726RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2727{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002728 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002729 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2730}
2731
2732Type *Short4::getType()
2733{
2734 return T(Type_v4i16);
2735}
2736
2737UShort4::UShort4(RValue<Float4> cast, bool saturate)
2738{
2739 if(saturate)
2740 {
2741 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002742 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002743 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2744 // PackUnsigned takes care of 0x0000 saturation.
2745 Int4 int4(Min(cast, Float4(0xFFFF)));
2746 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002747 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002748 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002749 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002750 // ARM saturates the 32-bit integer result on overflow/undeflow.
2751 Int4 int4(cast);
2752 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002753 }
2754 else
2755 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002756 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002757 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002758 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002759 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002760 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002761 *this = Short4(Int4(cast));
2762 }
2763}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002764
Nicolas Capens157ba262019-12-10 17:49:14 -05002765RValue<UShort> Extract(RValue<UShort4> val, int i)
2766{
2767 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2768}
2769
2770RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2771{
2772 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2773}
2774
2775RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2776{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002777 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002778 if(emulateIntrinsics)
Antonio Maioranoaae33732020-02-14 14:52:34 -05002779
Nicolas Capens157ba262019-12-10 17:49:14 -05002780 {
2781 UShort4 result;
2782 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2783 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2784 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2785 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2786
2787 return result;
2788 }
2789 else
2790 {
2791 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2792 }
2793}
2794
2795RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2796{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002797 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002798 if(emulateIntrinsics)
2799 {
2800 UShort4 result;
2801 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2802 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2803 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2804 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2805
2806 return result;
2807 }
2808 else
2809 {
2810 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2811 }
2812}
2813
2814RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2815{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002816 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002817 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2818 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2819 ::basicBlock->appendInst(cmp);
2820
2821 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2822 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2823 ::basicBlock->appendInst(select);
2824
2825 return RValue<UShort4>(V(result));
2826}
2827
2828RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2829{
2830 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2831 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2832 ::basicBlock->appendInst(cmp);
2833
2834 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2835 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2836 ::basicBlock->appendInst(select);
2837
2838 return RValue<UShort4>(V(result));
2839}
2840
2841RValue<UShort> SaturateUnsigned(RValue<Int> x)
2842{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002843 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002844 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2845}
2846
2847RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2848{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002849 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002850 if(emulateIntrinsics)
2851 {
2852 UShort4 result;
2853 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2854 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2855 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2856 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2857
2858 return result;
2859 }
2860 else
2861 {
2862 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002863 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002864 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2865 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2866 paddusw->addArg(x.value);
2867 paddusw->addArg(y.value);
2868 ::basicBlock->appendInst(paddusw);
2869
2870 return RValue<UShort4>(V(result));
2871 }
2872}
2873
2874RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2875{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002876 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002877 if(emulateIntrinsics)
2878 {
2879 UShort4 result;
2880 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2881 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2882 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2883 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2884
2885 return result;
2886 }
2887 else
2888 {
2889 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002890 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002891 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2892 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2893 psubusw->addArg(x.value);
2894 psubusw->addArg(y.value);
2895 ::basicBlock->appendInst(psubusw);
2896
2897 return RValue<UShort4>(V(result));
2898 }
2899}
2900
2901RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2902{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002903 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002904 if(emulateIntrinsics)
2905 {
2906 UShort4 result;
2907 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2908 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2909 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2910 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2911
2912 return result;
2913 }
2914 else
2915 {
2916 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002917 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002918 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2919 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2920 pmulhuw->addArg(x.value);
2921 pmulhuw->addArg(y.value);
2922 ::basicBlock->appendInst(pmulhuw);
2923
2924 return RValue<UShort4>(V(result));
2925 }
2926}
2927
2928RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2929{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002930 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002931 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2932
2933 // Scalarized implementation.
2934 Int4 result;
2935 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2936 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2937 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2938 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2939
2940 return result;
2941}
2942
2943RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2944{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002945 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002946 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2947
2948 if(false) // Partial product based implementation.
2949 {
2950 auto xh = x >> 16;
2951 auto yh = y >> 16;
2952 auto xl = x & UInt4(0x0000FFFF);
2953 auto yl = y & UInt4(0x0000FFFF);
2954 auto xlyh = xl * yh;
2955 auto xhyl = xh * yl;
2956 auto xlyhh = xlyh >> 16;
2957 auto xhylh = xhyl >> 16;
2958 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2959 auto xhyll = xhyl & UInt4(0x0000FFFF);
2960 auto xlylh = (xl * yl) >> 16;
2961 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2962
2963 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002964 }
2965
Nicolas Capens157ba262019-12-10 17:49:14 -05002966 // Scalarized implementation.
2967 Int4 result;
2968 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
2969 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
2970 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
2971 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
2972
2973 return As<UInt4>(result);
2974}
2975
2976RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2977{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002978 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00002979 UNIMPLEMENTED_NO_BUG("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05002980 return UShort4(0);
2981}
2982
2983Type *UShort4::getType()
2984{
2985 return T(Type_v4i16);
2986}
2987
2988RValue<Short> Extract(RValue<Short8> val, int i)
2989{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002990 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002991 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2992}
2993
2994RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2995{
Antonio Maioranoaae33732020-02-14 14:52:34 -05002996 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05002997 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2998}
2999
3000RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3001{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003002 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003003 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003004 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003005 Short8 result;
3006 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3007 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3008 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3009 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3010 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3011 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3012 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3013 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003014
Nicolas Capens157ba262019-12-10 17:49:14 -05003015 return result;
3016 }
3017 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003018 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003019 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003020 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003021}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003022
Nicolas Capens157ba262019-12-10 17:49:14 -05003023RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3024{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003025 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003026 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003027 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003028 Short8 result;
3029 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3030 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3031 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3032 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3033 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3034 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3035 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3036 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003037
Nicolas Capens157ba262019-12-10 17:49:14 -05003038 return result;
3039 }
3040 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003041 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003042 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003043 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003044}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003045
Nicolas Capens157ba262019-12-10 17:49:14 -05003046RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3047{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003048 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003049 UNIMPLEMENTED_NO_BUG("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003050 return Int4(0);
3051}
3052
3053RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3054{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003055 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003056 UNIMPLEMENTED_NO_BUG("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003057 return Short8(0);
3058}
3059
3060Type *Short8::getType()
3061{
3062 return T(Ice::IceType_v8i16);
3063}
3064
3065RValue<UShort> Extract(RValue<UShort8> val, int i)
3066{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003067 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003068 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
3069}
3070
3071RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3072{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003073 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003074 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
3075}
3076
3077RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3078{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003079 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003080 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003081 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003082 UShort8 result;
3083 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3084 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3085 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3086 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3087 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3088 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3089 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3090 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003091
Nicolas Capens157ba262019-12-10 17:49:14 -05003092 return result;
3093 }
3094 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003095 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003096 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003097 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003098}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003099
Nicolas Capens157ba262019-12-10 17:49:14 -05003100RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3101{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003102 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003103 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003104 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003105 UShort8 result;
3106 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3107 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3108 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3109 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3110 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
3111 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
3112 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
3113 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003114
Nicolas Capens157ba262019-12-10 17:49:14 -05003115 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003116 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003117 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003118 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003119 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003120 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003121}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003122
Nicolas Capens157ba262019-12-10 17:49:14 -05003123RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3124{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003125 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00003126 UNIMPLEMENTED_NO_BUG("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
Nicolas Capens157ba262019-12-10 17:49:14 -05003127 return UShort8(0);
3128}
3129
Nicolas Capens157ba262019-12-10 17:49:14 -05003130Type *UShort8::getType()
3131{
3132 return T(Ice::IceType_v8i16);
3133}
3134
Ben Clayton713b8d32019-12-17 20:37:56 +00003135RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003136{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003137 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003138 RValue<Int> res = val;
3139 val += 1;
3140 return res;
3141}
3142
Ben Clayton713b8d32019-12-17 20:37:56 +00003143const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003144{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003145 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003146 val += 1;
3147 return val;
3148}
3149
Ben Clayton713b8d32019-12-17 20:37:56 +00003150RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003151{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003152 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003153 RValue<Int> res = val;
3154 val -= 1;
3155 return res;
3156}
3157
Ben Clayton713b8d32019-12-17 20:37:56 +00003158const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003159{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003160 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003161 val -= 1;
3162 return val;
3163}
3164
3165RValue<Int> RoundInt(RValue<Float> cast)
3166{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003167 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003168 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003169 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003170 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3171 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003172 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003173 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003174 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003175 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003176 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003177 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3178 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3179 nearbyint->addArg(cast.value);
3180 ::basicBlock->appendInst(nearbyint);
3181
3182 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003183 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003184}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003185
Nicolas Capens157ba262019-12-10 17:49:14 -05003186Type *Int::getType()
3187{
3188 return T(Ice::IceType_i32);
3189}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003190
Nicolas Capens157ba262019-12-10 17:49:14 -05003191Type *Long::getType()
3192{
3193 return T(Ice::IceType_i64);
3194}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003195
Nicolas Capens157ba262019-12-10 17:49:14 -05003196UInt::UInt(RValue<Float> cast)
3197{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003198 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003199 // Smallest positive value representable in UInt, but not in Int
3200 const unsigned int ustart = 0x80000000u;
3201 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003202
Nicolas Capens157ba262019-12-10 17:49:14 -05003203 // If the value is negative, store 0, otherwise store the result of the conversion
3204 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00003205 // Check if the value can be represented as an Int
3206 IfThenElse(cast >= ustartf,
3207 // If the value is too large, subtract ustart and re-add it after conversion.
3208 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3209 // Otherwise, just convert normally
3210 Int(cast)))
3211 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05003212}
Nicolas Capensa8086512016-11-07 17:32:17 -05003213
Ben Clayton713b8d32019-12-17 20:37:56 +00003214RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003215{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003216 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003217 RValue<UInt> res = val;
3218 val += 1;
3219 return res;
3220}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003221
Ben Clayton713b8d32019-12-17 20:37:56 +00003222const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05003223{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003224 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003225 val += 1;
3226 return val;
3227}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003228
Ben Clayton713b8d32019-12-17 20:37:56 +00003229RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003230{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003231 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003232 RValue<UInt> res = val;
3233 val -= 1;
3234 return res;
3235}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003236
Ben Clayton713b8d32019-12-17 20:37:56 +00003237const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05003238{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003239 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003240 val -= 1;
3241 return val;
3242}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003243
Nicolas Capens598f8d82016-09-26 15:09:10 -04003244// RValue<UInt> RoundUInt(RValue<Float> cast)
3245// {
Ben Claytoneb50d252019-04-15 13:50:01 -04003246// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003247// }
3248
Nicolas Capens157ba262019-12-10 17:49:14 -05003249Type *UInt::getType()
3250{
3251 return T(Ice::IceType_i32);
3252}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003253
3254// Int2::Int2(RValue<Int> cast)
3255// {
3256// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
3257// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
3258//
3259// Constant *shuffle[2];
3260// shuffle[0] = Nucleus::createConstantInt(0);
3261// shuffle[1] = Nucleus::createConstantInt(0);
3262//
3263// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
3264//
3265// storeValue(replicate);
3266// }
3267
Nicolas Capens157ba262019-12-10 17:49:14 -05003268RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
3269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003271 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003272 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003273 Int2 result;
3274 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3275 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003276
Nicolas Capens157ba262019-12-10 17:49:14 -05003277 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003278 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003279 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003280 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003281 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003282 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003283}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003284
Nicolas Capens157ba262019-12-10 17:49:14 -05003285RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
3286{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003287 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003288 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003289 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003290 Int2 result;
3291 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3292 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3293
3294 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003295 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003296 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003297 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003298 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003299 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003300}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003301
Nicolas Capens157ba262019-12-10 17:49:14 -05003302Type *Int2::getType()
3303{
3304 return T(Type_v2i32);
3305}
3306
3307RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
3308{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003309 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003310 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003311 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003312 UInt2 result;
3313 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3314 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003315
Nicolas Capens157ba262019-12-10 17:49:14 -05003316 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003317 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003318 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003319 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003320 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003321 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003322}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003323
Nicolas Capens157ba262019-12-10 17:49:14 -05003324RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
3325{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003326 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003327 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003328 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003329 UInt2 result;
3330 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3331 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05003332
Nicolas Capens157ba262019-12-10 17:49:14 -05003333 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003334 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003335 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003336 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003337 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003338 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003339}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003340
Nicolas Capens157ba262019-12-10 17:49:14 -05003341Type *UInt2::getType()
3342{
3343 return T(Type_v2i32);
3344}
3345
Ben Clayton713b8d32019-12-17 20:37:56 +00003346Int4::Int4(RValue<Byte4> cast)
3347 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003348{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003349 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003350 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3351 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3352
3353 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00003354 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003355 Value *b = Nucleus::createBitCast(a, Byte16::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003356 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), swizzle);
Nicolas Capens157ba262019-12-10 17:49:14 -05003357
Ben Clayton713b8d32019-12-17 20:37:56 +00003358 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003359 Value *d = Nucleus::createBitCast(c, Short8::getType());
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05003360 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), swizzle2);
Nicolas Capens157ba262019-12-10 17:49:14 -05003361
3362 Value *f = Nucleus::createBitCast(e, Int4::getType());
3363 storeValue(f);
3364}
3365
Ben Clayton713b8d32019-12-17 20:37:56 +00003366Int4::Int4(RValue<SByte4> cast)
3367 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003368{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003369 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003370 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
3371 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
3372
Ben Clayton713b8d32019-12-17 20:37:56 +00003373 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003374 Value *b = Nucleus::createBitCast(a, Byte16::getType());
3375 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
3376
Ben Clayton713b8d32019-12-17 20:37:56 +00003377 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003378 Value *d = Nucleus::createBitCast(c, Short8::getType());
3379 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
3380
3381 *this = As<Int4>(e) >> 24;
3382}
3383
Ben Clayton713b8d32019-12-17 20:37:56 +00003384Int4::Int4(RValue<Short4> cast)
3385 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003386{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003387 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003388 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003389 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
3390
3391 *this = As<Int4>(c) >> 16;
3392}
3393
Ben Clayton713b8d32019-12-17 20:37:56 +00003394Int4::Int4(RValue<UShort4> cast)
3395 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003396{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003397 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00003398 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003399 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
3400 Value *d = Nucleus::createBitCast(c, Int4::getType());
3401 storeValue(d);
3402}
3403
Ben Clayton713b8d32019-12-17 20:37:56 +00003404Int4::Int4(RValue<Int> rhs)
3405 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003406{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003407 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003408 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
3409
Ben Clayton713b8d32019-12-17 20:37:56 +00003410 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003411 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3412
3413 storeValue(replicate);
3414}
3415
3416RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
3417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003418 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003419 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003420 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003421 Int4 result;
3422 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
3423 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
3424 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
3425 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003426
Nicolas Capens157ba262019-12-10 17:49:14 -05003427 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003428 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003429 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003430 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003431 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003432 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003433}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003434
Nicolas Capens157ba262019-12-10 17:49:14 -05003435RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
3436{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003437 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003438 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003439 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003440 Int4 result;
3441 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
3442 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
3443 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
3444 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05003445
Nicolas Capens157ba262019-12-10 17:49:14 -05003446 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003447 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003448 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003449 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003450 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003451 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003452}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003453
Nicolas Capens157ba262019-12-10 17:49:14 -05003454RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3455{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003456 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003457 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
3458}
3459
3460RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3461{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003462 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003463 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
3464}
3465
3466RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3467{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003468 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003469 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
3470}
3471
3472RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3473{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003474 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003475 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
3476}
3477
3478RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3479{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003480 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003481 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
3482}
3483
3484RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3485{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003486 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003487 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
3488}
3489
3490RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3491{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003492 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003493 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3494 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3495 ::basicBlock->appendInst(cmp);
3496
3497 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3498 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3499 ::basicBlock->appendInst(select);
3500
3501 return RValue<Int4>(V(result));
3502}
3503
3504RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3505{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003506 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003507 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3508 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3509 ::basicBlock->appendInst(cmp);
3510
3511 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3512 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3513 ::basicBlock->appendInst(select);
3514
3515 return RValue<Int4>(V(result));
3516}
3517
3518RValue<Int4> RoundInt(RValue<Float4> cast)
3519{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003520 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003521 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003522 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003523 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3524 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003525 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003526 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003527 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003528 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003529 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003530 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3531 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3532 nearbyint->addArg(cast.value);
3533 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003534
3535 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003536 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003537}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003538
Nicolas Capens157ba262019-12-10 17:49:14 -05003539RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3540{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003541 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003542 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003543 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003544 Short8 result;
3545 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3546 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3547 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3548 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3549 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3550 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3551 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3552 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003553
Nicolas Capens157ba262019-12-10 17:49:14 -05003554 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003555 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003556 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003557 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003558 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003559 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003560 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3561 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3562 pack->addArg(x.value);
3563 pack->addArg(y.value);
3564 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003565
Nicolas Capens157ba262019-12-10 17:49:14 -05003566 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003567 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003568}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003569
Nicolas Capens157ba262019-12-10 17:49:14 -05003570RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3571{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003572 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003573 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003574 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003575 RValue<Int4> sx = As<Int4>(x);
3576 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003577
Nicolas Capens157ba262019-12-10 17:49:14 -05003578 RValue<Int4> sy = As<Int4>(y);
3579 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003580
Nicolas Capens157ba262019-12-10 17:49:14 -05003581 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003582 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003583 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003584 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003585 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003586 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003587 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3588 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3589 pack->addArg(x.value);
3590 pack->addArg(y.value);
3591 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003592
Nicolas Capens157ba262019-12-10 17:49:14 -05003593 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003594 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003595}
Nicolas Capens33438a62017-09-27 11:47:35 -04003596
Nicolas Capens157ba262019-12-10 17:49:14 -05003597RValue<Int> SignMask(RValue<Int4> x)
3598{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003599 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003600 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003601 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003602 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3603 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003604 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003605 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003606 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003607 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003608 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003609 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3610 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3611 movmsk->addArg(x.value);
3612 ::basicBlock->appendInst(movmsk);
3613
3614 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003615 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003616}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003617
Nicolas Capens157ba262019-12-10 17:49:14 -05003618Type *Int4::getType()
3619{
3620 return T(Ice::IceType_v4i32);
3621}
3622
Ben Clayton713b8d32019-12-17 20:37:56 +00003623UInt4::UInt4(RValue<Float4> cast)
3624 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003625{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003626 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003627 // Smallest positive value representable in UInt, but not in Int
3628 const unsigned int ustart = 0x80000000u;
3629 const float ustartf = float(ustart);
3630
3631 // Check if the value can be represented as an Int
3632 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3633 // If the value is too large, subtract ustart and re-add it after conversion.
3634 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003635 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003636 (~uiValue & Int4(cast));
3637 // If the value is negative, store 0, otherwise store the result of the conversion
3638 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3639}
3640
Ben Clayton713b8d32019-12-17 20:37:56 +00003641UInt4::UInt4(RValue<UInt> rhs)
3642 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003643{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003644 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003645 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3646
Ben Clayton713b8d32019-12-17 20:37:56 +00003647 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003648 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3649
3650 storeValue(replicate);
3651}
3652
3653RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3654{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003655 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003656 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003657 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003658 UInt4 result;
3659 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3660 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3661 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3662 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003663
Nicolas Capens157ba262019-12-10 17:49:14 -05003664 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003665 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003666 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003667 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003668 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003669 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003670}
Ben Clayton88816fa2019-05-15 17:08:14 +01003671
Nicolas Capens157ba262019-12-10 17:49:14 -05003672RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3673{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003674 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003675 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003676 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003677 UInt4 result;
3678 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3679 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3680 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3681 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003682
Nicolas Capens157ba262019-12-10 17:49:14 -05003683 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003684 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003685 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003686 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003687 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003688 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003689}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003690
Nicolas Capens157ba262019-12-10 17:49:14 -05003691RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3692{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003693 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003694 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3695}
3696
3697RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3698{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003699 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003700 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3701}
3702
3703RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3704{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003705 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003706 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3707}
3708
3709RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3710{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003711 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003712 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3713}
3714
3715RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3716{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003717 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003718 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3719}
3720
3721RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3722{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003723 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003724 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3725}
3726
3727RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3728{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003729 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003730 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3731 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3732 ::basicBlock->appendInst(cmp);
3733
3734 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3735 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3736 ::basicBlock->appendInst(select);
3737
3738 return RValue<UInt4>(V(result));
3739}
3740
3741RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3742{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003743 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003744 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3745 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3746 ::basicBlock->appendInst(cmp);
3747
3748 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3749 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3750 ::basicBlock->appendInst(select);
3751
3752 return RValue<UInt4>(V(result));
3753}
3754
3755Type *UInt4::getType()
3756{
3757 return T(Ice::IceType_v4i32);
3758}
3759
3760Type *Half::getType()
3761{
3762 return T(Ice::IceType_i16);
3763}
3764
3765RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3766{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003767 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003768 return 1.0f / x;
3769}
3770
3771RValue<Float> RcpSqrt_pp(RValue<Float> x)
3772{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003773 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003774 return Rcp_pp(Sqrt(x));
3775}
3776
3777RValue<Float> Sqrt(RValue<Float> x)
3778{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003779 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003780 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003781 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003782 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3783 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3784 sqrt->addArg(x.value);
3785 ::basicBlock->appendInst(sqrt);
3786
3787 return RValue<Float>(V(result));
3788}
3789
3790RValue<Float> Round(RValue<Float> x)
3791{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003792 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003793 return Float4(Round(Float4(x))).x;
3794}
3795
3796RValue<Float> Trunc(RValue<Float> x)
3797{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003798 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003799 return Float4(Trunc(Float4(x))).x;
3800}
3801
3802RValue<Float> Frac(RValue<Float> x)
3803{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003804 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003805 return Float4(Frac(Float4(x))).x;
3806}
3807
3808RValue<Float> Floor(RValue<Float> x)
3809{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003810 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003811 return Float4(Floor(Float4(x))).x;
3812}
3813
3814RValue<Float> Ceil(RValue<Float> x)
3815{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003816 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003817 return Float4(Ceil(Float4(x))).x;
3818}
3819
3820Type *Float::getType()
3821{
3822 return T(Ice::IceType_f32);
3823}
3824
3825Type *Float2::getType()
3826{
3827 return T(Type_v2f32);
3828}
3829
Ben Clayton713b8d32019-12-17 20:37:56 +00003830Float4::Float4(RValue<Float> rhs)
3831 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003832{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003833 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003834 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3835
Ben Clayton713b8d32019-12-17 20:37:56 +00003836 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003837 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3838
3839 storeValue(replicate);
3840}
3841
3842RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3843{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003844 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003845 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3846 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3847 ::basicBlock->appendInst(cmp);
3848
3849 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3850 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3851 ::basicBlock->appendInst(select);
3852
3853 return RValue<Float4>(V(result));
3854}
3855
3856RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3857{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003858 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003859 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3860 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3861 ::basicBlock->appendInst(cmp);
3862
3863 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3864 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3865 ::basicBlock->appendInst(select);
3866
3867 return RValue<Float4>(V(result));
3868}
3869
3870RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3871{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003872 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003873 return Float4(1.0f) / x;
3874}
3875
3876RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3877{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003878 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003879 return Rcp_pp(Sqrt(x));
3880}
3881
3882RValue<Float4> Sqrt(RValue<Float4> x)
3883{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003884 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003885 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003886 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003887 Float4 result;
3888 result.x = Sqrt(Float(Float4(x).x));
3889 result.y = Sqrt(Float(Float4(x).y));
3890 result.z = Sqrt(Float(Float4(x).z));
3891 result.w = Sqrt(Float(Float4(x).w));
3892
3893 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003894 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003895 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003896 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003897 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003898 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003899 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3900 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3901 sqrt->addArg(x.value);
3902 ::basicBlock->appendInst(sqrt);
3903
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003904 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003905 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003906}
Nicolas Capens157ba262019-12-10 17:49:14 -05003907
3908RValue<Int> SignMask(RValue<Float4> x)
3909{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003910 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003911 if(emulateIntrinsics || CPUID::ARM)
3912 {
3913 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3914 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3915 }
3916 else
3917 {
3918 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003919 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003920 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3921 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3922 movmsk->addArg(x.value);
3923 ::basicBlock->appendInst(movmsk);
3924
3925 return RValue<Int>(V(result));
3926 }
3927}
3928
3929RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3930{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003931 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003932 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3933}
3934
3935RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3936{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003937 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003938 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3939}
3940
3941RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3942{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003943 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003944 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3945}
3946
3947RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3948{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003949 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003950 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3951}
3952
3953RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3954{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003955 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003956 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3957}
3958
3959RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3960{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003961 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003962 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
3963}
3964
3965RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3966{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003967 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003968 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3969}
3970
3971RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3972{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003973 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003974 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3975}
3976
3977RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3978{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003979 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003980 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3981}
3982
3983RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3984{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003985 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003986 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3987}
3988
3989RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3990{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003991 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003992 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3993}
3994
3995RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3996{
Antonio Maioranoaae33732020-02-14 14:52:34 -05003997 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05003998 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3999}
4000
4001RValue<Float4> Round(RValue<Float4> x)
4002{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004003 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004004 if(emulateIntrinsics || CPUID::ARM)
4005 {
4006 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4007 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
4008 }
4009 else if(CPUID::SSE4_1)
4010 {
4011 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004012 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004013 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4014 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4015 round->addArg(x.value);
4016 round->addArg(::context->getConstantInt32(0));
4017 ::basicBlock->appendInst(round);
4018
4019 return RValue<Float4>(V(result));
4020 }
4021 else
4022 {
4023 return Float4(RoundInt(x));
4024 }
4025}
4026
4027RValue<Float4> Trunc(RValue<Float4> x)
4028{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004029 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004030 if(CPUID::SSE4_1)
4031 {
4032 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004033 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004034 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4035 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4036 round->addArg(x.value);
4037 round->addArg(::context->getConstantInt32(3));
4038 ::basicBlock->appendInst(round);
4039
4040 return RValue<Float4>(V(result));
4041 }
4042 else
4043 {
4044 return Float4(Int4(x));
4045 }
4046}
4047
4048RValue<Float4> Frac(RValue<Float4> x)
4049{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004050 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004051 Float4 frc;
4052
4053 if(CPUID::SSE4_1)
4054 {
4055 frc = x - Floor(x);
4056 }
4057 else
4058 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004059 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05004060
Ben Clayton713b8d32019-12-17 20:37:56 +00004061 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05004062 }
4063
4064 // x - floor(x) can be 1.0 for very small negative x.
4065 // Clamp against the value just below 1.0.
4066 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
4067}
4068
4069RValue<Float4> Floor(RValue<Float4> x)
4070{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004071 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004072 if(CPUID::SSE4_1)
4073 {
4074 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004075 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004076 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4077 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4078 round->addArg(x.value);
4079 round->addArg(::context->getConstantInt32(1));
4080 ::basicBlock->appendInst(round);
4081
4082 return RValue<Float4>(V(result));
4083 }
4084 else
4085 {
4086 return x - Frac(x);
4087 }
4088}
4089
4090RValue<Float4> Ceil(RValue<Float4> x)
4091{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004092 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004093 if(CPUID::SSE4_1)
4094 {
4095 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00004096 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004097 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4098 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4099 round->addArg(x.value);
4100 round->addArg(::context->getConstantInt32(2));
4101 ::basicBlock->appendInst(round);
4102
4103 return RValue<Float4>(V(result));
4104 }
4105 else
4106 {
4107 return -Floor(-x);
4108 }
4109}
4110
4111Type *Float4::getType()
4112{
4113 return T(Ice::IceType_v4f32);
4114}
4115
4116RValue<Long> Ticks()
4117{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004118 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004119 UNIMPLEMENTED_NO_BUG("RValue<Long> Ticks()");
Nicolas Capens157ba262019-12-10 17:49:14 -05004120 return Long(Int(0));
4121}
4122
Ben Clayton713b8d32019-12-17 20:37:56 +00004123RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004124{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004125 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004126 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004127}
4128
Ben Clayton713b8d32019-12-17 20:37:56 +00004129RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05004130{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004131 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano02a39532020-01-21 15:15:34 -05004132 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05004133}
4134
Ben Clayton713b8d32019-12-17 20:37:56 +00004135Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05004136{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004137 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004138 Ice::Variable *ret = nullptr;
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004139 if(retTy != nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05004140 {
4141 ret = ::function->makeVariable(T(retTy));
4142 }
4143 auto call = Ice::InstCall::create(::function, args.size(), ret, V(fptr.value), false);
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004144 for(auto arg : args)
Nicolas Capens157ba262019-12-10 17:49:14 -05004145 {
4146 call->addArg(V(arg));
4147 }
4148 ::basicBlock->appendInst(call);
4149 return V(ret);
4150}
4151
4152void Breakpoint()
4153{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004154 RR_DEBUG_INFO_UPDATE_LOC();
Ben Clayton713b8d32019-12-17 20:37:56 +00004155 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05004156 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4157 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4158 ::basicBlock->appendInst(trap);
4159}
4160
Ben Clayton713b8d32019-12-17 20:37:56 +00004161void Nucleus::createFence(std::memory_order memoryOrder)
4162{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004163 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004164 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4165 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4166 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
4167 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
4168 inst->addArg(order);
4169 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00004170}
Antonio Maiorano370cba52019-12-31 11:36:07 -05004171
Ben Clayton713b8d32019-12-17 20:37:56 +00004172Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
4173{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004174 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004175 UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004176 return nullptr;
4177}
4178void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
4179{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004180 RR_DEBUG_INFO_UPDATE_LOC();
Ben Claytonce54c592020-02-07 11:30:51 +00004181 UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004182}
Nicolas Capens157ba262019-12-10 17:49:14 -05004183
4184RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4185{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004186 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004187 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4188}
4189
4190RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
4191{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004192 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004193 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
4194}
4195
4196void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4197{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004198 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004199 return emulated::Scatter(base, val, offsets, mask, alignment);
4200}
4201
4202void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
4203{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004204 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004205 return emulated::Scatter(base, val, offsets, mask, alignment);
4206}
4207
4208RValue<Float> Exp2(RValue<Float> x)
4209{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004210 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004211 return emulated::Exp2(x);
4212}
4213
4214RValue<Float> Log2(RValue<Float> x)
4215{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004216 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004217 return emulated::Log2(x);
4218}
4219
4220RValue<Float4> Sin(RValue<Float4> x)
4221{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004222 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004223 return emulated::Sin(x);
4224}
4225
4226RValue<Float4> Cos(RValue<Float4> x)
4227{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004228 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004229 return emulated::Cos(x);
4230}
4231
4232RValue<Float4> Tan(RValue<Float4> x)
4233{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004234 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004235 return emulated::Tan(x);
4236}
4237
4238RValue<Float4> Asin(RValue<Float4> x)
4239{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004240 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004241 return emulated::Asin(x);
4242}
4243
4244RValue<Float4> Acos(RValue<Float4> x)
4245{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004246 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004247 return emulated::Acos(x);
4248}
4249
4250RValue<Float4> Atan(RValue<Float4> x)
4251{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004252 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004253 return emulated::Atan(x);
4254}
4255
4256RValue<Float4> Sinh(RValue<Float4> x)
4257{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004258 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004259 return emulated::Sinh(x);
4260}
4261
4262RValue<Float4> Cosh(RValue<Float4> x)
4263{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004264 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004265 return emulated::Cosh(x);
4266}
4267
4268RValue<Float4> Tanh(RValue<Float4> x)
4269{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004270 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004271 return emulated::Tanh(x);
4272}
4273
4274RValue<Float4> Asinh(RValue<Float4> x)
4275{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004276 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004277 return emulated::Asinh(x);
4278}
4279
4280RValue<Float4> Acosh(RValue<Float4> x)
4281{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004282 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004283 return emulated::Acosh(x);
4284}
4285
4286RValue<Float4> Atanh(RValue<Float4> x)
4287{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004288 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004289 return emulated::Atanh(x);
4290}
4291
4292RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
4293{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004294 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004295 return emulated::Atan2(x, y);
4296}
4297
4298RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
4299{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004300 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004301 return emulated::Pow(x, y);
4302}
4303
4304RValue<Float4> Exp(RValue<Float4> x)
4305{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004306 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004307 return emulated::Exp(x);
4308}
4309
4310RValue<Float4> Log(RValue<Float4> x)
4311{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004312 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004313 return emulated::Log(x);
4314}
4315
4316RValue<Float4> Exp2(RValue<Float4> x)
4317{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004318 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004319 return emulated::Exp2(x);
4320}
4321
4322RValue<Float4> Log2(RValue<Float4> x)
4323{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004324 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens157ba262019-12-10 17:49:14 -05004325 return emulated::Log2(x);
4326}
4327
4328RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
4329{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004330 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004331 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004332 {
Ben Claytonce54c592020-02-07 11:30:51 +00004333 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004334 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004335 }
4336 else
4337 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004338 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004339 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4340 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4341 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4342 ctlz->addArg(x.value);
4343 ::basicBlock->appendInst(ctlz);
4344
4345 return RValue<UInt>(V(result));
4346 }
4347}
4348
4349RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
4350{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004351 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004352 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004353 {
Ben Claytonce54c592020-02-07 11:30:51 +00004354 UNIMPLEMENTED_NO_BUG("Subzero Ctlz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004355 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004356 }
4357 else
4358 {
4359 // TODO: implement vectorized version in Subzero
4360 UInt4 result;
4361 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
4362 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
4363 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
4364 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
4365 return result;
4366 }
4367}
4368
4369RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
4370{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004371 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004372 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004373 {
Ben Claytonce54c592020-02-07 11:30:51 +00004374 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004375 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004376 }
4377 else
4378 {
Ben Clayton713b8d32019-12-17 20:37:56 +00004379 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05004380 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
4381 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4382 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4383 ctlz->addArg(x.value);
4384 ::basicBlock->appendInst(ctlz);
4385
4386 return RValue<UInt>(V(result));
4387 }
4388}
4389
4390RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
4391{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004392 RR_DEBUG_INFO_UPDATE_LOC();
Nicolas Capens81bc9d92019-12-16 15:05:57 -05004393 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05004394 {
Ben Claytonce54c592020-02-07 11:30:51 +00004395 UNIMPLEMENTED_NO_BUG("Subzero Cttz()");
Ben Clayton713b8d32019-12-17 20:37:56 +00004396 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05004397 }
4398 else
4399 {
4400 // TODO: implement vectorized version in Subzero
4401 UInt4 result;
4402 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
4403 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
4404 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
4405 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
4406 return result;
4407 }
4408}
4409
Antonio Maiorano370cba52019-12-31 11:36:07 -05004410RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4411{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004412 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004413 return emulated::MinAtomic(x, y, memoryOrder);
4414}
4415
4416RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4417{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004418 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004419 return emulated::MinAtomic(x, y, memoryOrder);
4420}
4421
4422RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
4423{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004424 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004425 return emulated::MaxAtomic(x, y, memoryOrder);
4426}
4427
4428RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
4429{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004430 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano370cba52019-12-31 11:36:07 -05004431 return emulated::MaxAtomic(x, y, memoryOrder);
4432}
4433
Antonio Maioranoaae33732020-02-14 14:52:34 -05004434void EmitDebugLocation()
4435{
4436#ifdef ENABLE_RR_DEBUG_INFO
4437# ifdef ENABLE_RR_EMIT_PRINT_LOCATION
4438 emitPrintLocation(getCallerBacktrace());
4439# endif // ENABLE_RR_EMIT_PRINT_LOCATION
4440#endif // ENABLE_RR_DEBUG_INFO
4441}
Ben Clayton713b8d32019-12-17 20:37:56 +00004442void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05004443void FlushDebug() {}
4444
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004445namespace {
4446namespace coro {
4447
4448using FiberHandle = void *;
4449
4450// Instance data per generated coroutine
4451// This is the "handle" type used for Coroutine functions
4452// Lifetime: from yield to when CoroutineEntryDestroy generated function is called.
4453struct CoroutineData
Nicolas Capens157ba262019-12-10 17:49:14 -05004454{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004455 FiberHandle mainFiber{};
4456 FiberHandle routineFiber{};
4457 bool convertedFiber = false;
4458
4459 // Variables used by coroutines
4460 bool done = false;
4461 void *promisePtr = nullptr;
4462};
4463
4464CoroutineData *createCoroutineData()
4465{
4466 return new CoroutineData{};
4467}
4468
4469void destroyCoroutineData(CoroutineData *coroData)
4470{
4471 delete coroData;
4472}
4473
4474void convertThreadToMainFiber(Nucleus::CoroutineHandle handle)
4475{
4476#if defined(_WIN32)
4477 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4478
4479 coroData->mainFiber = ::ConvertThreadToFiber(nullptr);
4480
4481 if(coroData->mainFiber)
4482 {
4483 coroData->convertedFiber = true;
4484 }
4485 else
4486 {
4487 // We're probably already on a fiber, so just grab it and remember that we didn't
4488 // convert it, so not to convert back to thread.
4489 coroData->mainFiber = GetCurrentFiber();
4490 coroData->convertedFiber = false;
4491 }
4492 ASSERT(coroData->mainFiber);
4493#else
Ben Claytonce54c592020-02-07 11:30:51 +00004494 UNIMPLEMENTED_NO_BUG("convertThreadToMainFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004495#endif
4496}
4497
4498void convertMainFiberToThread(Nucleus::CoroutineHandle handle)
4499{
4500#if defined(_WIN32)
4501 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4502
4503 ASSERT(coroData->mainFiber);
4504
4505 if(coroData->convertedFiber)
4506 {
4507 ::ConvertFiberToThread();
4508 coroData->mainFiber = nullptr;
4509 }
4510#else
Ben Claytonce54c592020-02-07 11:30:51 +00004511 UNIMPLEMENTED_NO_BUG("convertMainFiberToThread not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004512#endif
4513}
4514using FiberFunc = std::function<void()>;
4515
4516void createRoutineFiber(Nucleus::CoroutineHandle handle, FiberFunc *fiberFunc)
4517{
4518#if defined(_WIN32)
4519 struct Invoker
4520 {
4521 FiberFunc func;
4522
4523 static VOID __stdcall fiberEntry(LPVOID lpParameter)
4524 {
4525 auto *func = reinterpret_cast<FiberFunc *>(lpParameter);
4526 (*func)();
4527 }
4528 };
4529
4530 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4531
4532 constexpr SIZE_T StackSize = 2 * 1024 * 1024;
4533 coroData->routineFiber = ::CreateFiber(StackSize, &Invoker::fiberEntry, fiberFunc);
4534 ASSERT(coroData->routineFiber);
4535#else
Ben Claytonce54c592020-02-07 11:30:51 +00004536 UNIMPLEMENTED_NO_BUG("createRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004537#endif
4538}
4539
4540void deleteRoutineFiber(Nucleus::CoroutineHandle handle)
4541{
4542#if defined(_WIN32)
4543 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4544 ASSERT(coroData->routineFiber);
4545 ::DeleteFiber(coroData->routineFiber);
4546 coroData->routineFiber = nullptr;
4547#else
Ben Claytonce54c592020-02-07 11:30:51 +00004548 UNIMPLEMENTED_NO_BUG("deleteRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004549#endif
4550}
4551
4552void switchToMainFiber(Nucleus::CoroutineHandle handle)
4553{
4554#if defined(_WIN32)
4555 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4556
4557 // Win32
4558 ASSERT(coroData->mainFiber);
4559 ::SwitchToFiber(coroData->mainFiber);
4560#else
Ben Claytonce54c592020-02-07 11:30:51 +00004561 UNIMPLEMENTED_NO_BUG("switchToMainFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004562#endif
4563}
4564
4565void switchToRoutineFiber(Nucleus::CoroutineHandle handle)
4566{
4567#if defined(_WIN32)
4568 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4569
4570 // Win32
4571 ASSERT(coroData->routineFiber);
4572 ::SwitchToFiber(coroData->routineFiber);
4573#else
Ben Claytonce54c592020-02-07 11:30:51 +00004574 UNIMPLEMENTED_NO_BUG("switchToRoutineFiber not implemented for current platform");
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004575#endif
4576}
4577
4578namespace detail {
4579thread_local rr::Nucleus::CoroutineHandle coroHandle{};
4580} // namespace detail
4581
4582void setHandleParam(Nucleus::CoroutineHandle handle)
4583{
4584 ASSERT(!detail::coroHandle);
4585 detail::coroHandle = handle;
4586}
4587
4588Nucleus::CoroutineHandle getHandleParam()
4589{
4590 ASSERT(detail::coroHandle);
4591 auto handle = detail::coroHandle;
4592 detail::coroHandle = {};
4593 return handle;
4594}
4595
4596void setDone(Nucleus::CoroutineHandle handle)
4597{
4598 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4599 ASSERT(!coroData->done); // Should be called once
4600 coroData->done = true;
4601}
4602
4603bool isDone(Nucleus::CoroutineHandle handle)
4604{
4605 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4606 return coroData->done;
4607}
4608
4609void setPromisePtr(Nucleus::CoroutineHandle handle, void *promisePtr)
4610{
4611 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4612 coroData->promisePtr = promisePtr;
4613}
4614
4615void *getPromisePtr(Nucleus::CoroutineHandle handle)
4616{
4617 auto *coroData = reinterpret_cast<CoroutineData *>(handle);
4618 return coroData->promisePtr;
4619}
4620
4621} // namespace coro
4622} // namespace
4623
4624// Used to generate coroutines.
4625// Lifetime: from yield to acquireCoroutine
4626class CoroutineGenerator
4627{
4628public:
4629 CoroutineGenerator()
4630 {
4631 }
4632
4633 // Inserts instructions at the top of the current function to make it a coroutine.
4634 void generateCoroutineBegin()
4635 {
4636 // Begin building the main coroutine_begin() function.
4637 // We insert these instructions at the top of the entry node,
4638 // before existing reactor-generated instructions.
4639
4640 // CoroutineHandle coroutine_begin(<Arguments>)
4641 // {
4642 // this->handle = coro::getHandleParam();
4643 //
4644 // YieldType promise;
4645 // coro::setPromisePtr(handle, &promise); // For await
4646 //
4647 // ... <REACTOR CODE> ...
4648 //
4649
4650 // Save original entry block and current block, and create a new entry block and make it current.
4651 // This new block will be used to inject code above the begin routine's existing code. We make
4652 // this block branch to the original entry block as the last instruction.
4653 auto origEntryBB = ::function->getEntryNode();
4654 auto origCurrBB = ::basicBlock;
4655 auto newBB = ::function->makeNode();
4656 sz::replaceEntryNode(::function, newBB);
4657 ::basicBlock = newBB;
4658
4659 // this->handle = coro::getHandleParam();
4660 this->handle = sz::Call(::function, ::basicBlock, coro::getHandleParam);
4661
4662 // YieldType promise;
4663 // coro::setPromisePtr(handle, &promise); // For await
4664 this->promise = sz::allocateStackVariable(::function, T(::coroYieldType));
4665 sz::Call(::function, ::basicBlock, coro::setPromisePtr, this->handle, this->promise);
4666
4667 // Branch to original entry block
4668 auto br = Ice::InstBr::create(::function, origEntryBB);
4669 ::basicBlock->appendInst(br);
4670
4671 // Restore current block for future instructions
4672 ::basicBlock = origCurrBB;
4673 }
4674
4675 // Adds instructions for Yield() calls at the current location of the main coroutine function.
4676 void generateYield(Value *val)
4677 {
4678 // ... <REACTOR CODE> ...
4679 //
4680 // promise = val;
4681 // coro::switchToMainFiber(handle);
4682 //
4683 // ... <REACTOR CODE> ...
4684
4685 Nucleus::createStore(val, V(this->promise), ::coroYieldType);
4686 sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
4687 }
4688
4689 // Adds instructions at the end of the current main coroutine function to end the coroutine.
4690 void generateCoroutineEnd()
4691 {
4692 // ... <REACTOR CODE> ...
4693 //
4694 // coro::setDone(handle);
4695 // coro::switchToMainFiber();
4696 // // Unreachable
4697 // }
4698 //
4699
4700 sz::Call(::function, ::basicBlock, coro::setDone, this->handle);
4701
4702 // A Win32 Fiber function must not end, otherwise it tears down the thread it's running on.
4703 // So we add code to switch back to the main thread.
4704 sz::Call(::function, ::basicBlock, coro::switchToMainFiber, this->handle);
4705 }
4706
4707 using FunctionUniquePtr = std::unique_ptr<Ice::Cfg>;
4708
4709 // Generates the await function for the current coroutine.
4710 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4711 static FunctionUniquePtr generateAwaitFunction()
4712 {
4713 // bool coroutine_await(CoroutineHandle handle, YieldType* out)
4714 // {
4715 // if (coro::isDone())
4716 // {
4717 // return false;
4718 // }
4719 // else // resume
4720 // {
4721 // YieldType* promise = coro::getPromisePtr(handle);
4722 // *out = *promise;
4723 // coro::switchToRoutineFiber(handle);
4724 // return true;
4725 // }
4726 // }
4727
4728 // Subzero doesn't support bool types (IceType_i1) as return type
4729 const Ice::Type ReturnType = Ice::IceType_i32;
4730 const Ice::Type YieldPtrType = sz::getPointerType(T(::coroYieldType));
4731 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4732
4733 Ice::Cfg *awaitFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType, YieldPtrType });
4734 Ice::CfgLocalAllocatorScope scopedAlloc{ awaitFunc };
4735
4736 Ice::Variable *handle = awaitFunc->getArgs()[0];
4737 Ice::Variable *outPtr = awaitFunc->getArgs()[1];
4738
4739 auto doneBlock = awaitFunc->makeNode();
4740 {
4741 // return false;
4742 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(0));
4743 doneBlock->appendInst(ret);
4744 }
4745
4746 auto resumeBlock = awaitFunc->makeNode();
4747 {
4748 // YieldType* promise = coro::getPromisePtr(handle);
4749 Ice::Variable *promise = sz::Call(awaitFunc, resumeBlock, coro::getPromisePtr, handle);
4750
4751 // *out = *promise;
4752 // Load promise value
4753 Ice::Variable *promiseVal = awaitFunc->makeVariable(T(::coroYieldType));
4754 auto load = Ice::InstLoad::create(awaitFunc, promiseVal, promise);
4755 resumeBlock->appendInst(load);
4756 // Then store it in output param
4757 auto store = Ice::InstStore::create(awaitFunc, promiseVal, outPtr);
4758 resumeBlock->appendInst(store);
4759
4760 // coro::switchToRoutineFiber(handle);
4761 sz::Call(awaitFunc, resumeBlock, coro::switchToRoutineFiber, handle);
4762
4763 // return true;
4764 Ice::InstRet *ret = Ice::InstRet::create(awaitFunc, ::context->getConstantInt32(1));
4765 resumeBlock->appendInst(ret);
4766 }
4767
4768 // if (coro::isDone())
4769 // {
4770 // <doneBlock>
4771 // }
4772 // else // resume
4773 // {
4774 // <resumeBlock>
4775 // }
4776 Ice::CfgNode *bb = awaitFunc->getEntryNode();
4777 Ice::Variable *done = sz::Call(awaitFunc, bb, coro::isDone);
4778 auto br = Ice::InstBr::create(awaitFunc, done, doneBlock, resumeBlock);
4779 bb->appendInst(br);
4780
4781 return FunctionUniquePtr{ awaitFunc };
4782 }
4783
4784 // Generates the destroy function for the current coroutine.
4785 // Cannot use Nucleus functions that modify ::function and ::basicBlock.
4786 static FunctionUniquePtr generateDestroyFunction()
4787 {
4788 // void coroutine_destroy(Nucleus::CoroutineHandle handle)
4789 // {
4790 // coro::convertMainFiberToThread(coroData);
4791 // coro::deleteRoutineFiber(handle);
4792 // coro::destroyCoroutineData(handle);
4793 // return;
4794 // }
4795
4796 const Ice::Type ReturnType = Ice::IceType_void;
4797 const Ice::Type HandleType = sz::getPointerType(Ice::IceType_void);
4798
4799 Ice::Cfg *destroyFunc = sz::createFunction(::context, ReturnType, std::vector<Ice::Type>{ HandleType });
4800 Ice::CfgLocalAllocatorScope scopedAlloc{ destroyFunc };
4801
4802 Ice::Variable *handle = destroyFunc->getArgs()[0];
4803
4804 auto *bb = destroyFunc->getEntryNode();
4805
4806 // coro::convertMainFiberToThread(coroData);
4807 sz::Call(destroyFunc, bb, coro::convertMainFiberToThread, handle);
4808
4809 // coro::deleteRoutineFiber(handle);
4810 sz::Call(destroyFunc, bb, coro::deleteRoutineFiber, handle);
4811
4812 // coro::destroyCoroutineData(handle);
4813 sz::Call(destroyFunc, bb, coro::destroyCoroutineData, handle);
4814
4815 // return;
4816 Ice::InstRet *ret = Ice::InstRet::create(destroyFunc);
4817 bb->appendInst(ret);
4818
4819 return FunctionUniquePtr{ destroyFunc };
4820 }
4821
4822private:
4823 Ice::Variable *handle{};
4824 Ice::Variable *promise{};
4825};
4826
4827static Nucleus::CoroutineHandle invokeCoroutineBegin(std::function<Nucleus::CoroutineHandle()> beginFunc)
4828{
4829 // This doubles up as our coroutine handle
4830 auto coroData = coro::createCoroutineData();
4831
4832 // Convert current thread to a fiber so we can create new fibers and switch to them
4833 coro::convertThreadToMainFiber(coroData);
4834
4835 coro::FiberFunc fiberFunc = [&]() {
4836 // Store handle in TLS so that the coroutine can grab it right away, before
4837 // any fiber switch occurs.
4838 coro::setHandleParam(coroData);
4839
4840 // Invoke the begin function in the context of the routine fiber
4841 beginFunc();
4842
4843 // Either it yielded, or finished. In either case, we switch back to the main fiber.
4844 // We don't ever return from this function, or the current thread will be destroyed.
4845 coro::switchToMainFiber(coroData);
4846 };
4847
4848 coro::createRoutineFiber(coroData, &fiberFunc);
4849
4850 // Fiber will now start running, executing the saved beginFunc
4851 coro::switchToRoutineFiber(coroData);
4852
4853 return coroData;
4854}
4855
4856void Nucleus::createCoroutine(Type *yieldType, const std::vector<Type *> &params)
4857{
4858 // Start by creating a regular function
4859 createFunction(yieldType, params);
4860
4861 // Save in case yield() is called
4862 ASSERT(::coroYieldType == nullptr); // Only one coroutine can be generated at once
4863 ::coroYieldType = yieldType;
4864}
4865
4866void Nucleus::yield(Value *val)
4867{
Antonio Maioranoaae33732020-02-14 14:52:34 -05004868 RR_DEBUG_INFO_UPDATE_LOC();
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004869 Variable::materializeAll();
4870
4871 // On first yield, we start generating coroutine functions
4872 if(!::coroGen)
4873 {
4874 ::coroGen = std::make_shared<CoroutineGenerator>();
4875 ::coroGen->generateCoroutineBegin();
4876 }
4877
4878 ASSERT(::coroGen);
4879 ::coroGen->generateYield(val);
Nicolas Capens157ba262019-12-10 17:49:14 -05004880}
4881
Ben Clayton713b8d32019-12-17 20:37:56 +00004882static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
4883{
4884 return false;
4885}
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004886
4887static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle handle)
4888{
4889}
Nicolas Capens157ba262019-12-10 17:49:14 -05004890
4891std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
4892{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004893 if(::coroGen)
4894 {
4895 // Finish generating coroutine functions
4896 {
4897 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4898 ::coroGen->generateCoroutineEnd();
4899 createRetVoidIfNoRet();
4900 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004901
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004902 auto awaitFunc = ::coroGen->generateAwaitFunction();
4903 auto destroyFunc = ::coroGen->generateDestroyFunction();
Nicolas Capens157ba262019-12-10 17:49:14 -05004904
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004905 // At this point, we no longer need the CoroutineGenerator.
4906 ::coroGen.reset();
4907 ::coroYieldType = nullptr;
4908
4909 auto routine = rr::acquireRoutine({ ::function, awaitFunc.get(), destroyFunc.get() },
4910 { name, "await", "destroy" },
4911 cfgEdit);
4912
4913 return routine;
4914 }
4915 else
4916 {
4917 {
4918 Ice::CfgLocalAllocatorScope scopedAlloc{ ::function };
4919 createRetVoidIfNoRet();
4920 }
4921
4922 ::coroYieldType = nullptr;
4923
4924 // Not an actual coroutine (no yields), so return stubs for await and destroy
4925 auto routine = rr::acquireRoutine({ ::function }, { name }, cfgEdit);
4926
4927 auto routineImpl = std::static_pointer_cast<ELFMemoryStreamer>(routine);
4928 routineImpl->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
4929 routineImpl->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
4930 return routine;
4931 }
Nicolas Capens157ba262019-12-10 17:49:14 -05004932}
4933
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004934Nucleus::CoroutineHandle Nucleus::invokeCoroutineBegin(Routine &routine, std::function<Nucleus::CoroutineHandle()> func)
Ben Clayton713b8d32019-12-17 20:37:56 +00004935{
Antonio Maiorano5ba2a5b2020-01-17 15:29:37 -05004936 const bool isCoroutine = routine.getEntry(Nucleus::CoroutineEntryAwait) != reinterpret_cast<const void *>(&coroutineEntryAwaitStub);
4937
4938 if(isCoroutine)
4939 {
4940 return rr::invokeCoroutineBegin(func);
4941 }
4942 else
4943 {
4944 // For regular routines, just invoke the begin func directly
4945 return func();
4946 }
Ben Clayton713b8d32019-12-17 20:37:56 +00004947}
Nicolas Capens157ba262019-12-10 17:49:14 -05004948
4949} // namespace rr