blob: a9375c4da49d1ce6cbf59a6e1826c0c46c28f10c [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000017#include "Reactor.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040018
Nicolas Capens1a3ce872018-10-10 10:42:36 -040019#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000020#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040021
Nicolas Capens598f8d82016-09-26 15:09:10 -040022#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040023#include "src/IceCfgNode.h"
24#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000025#include "src/IceELFStreamer.h"
26#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040027#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040029
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031#include "llvm/Support/FileSystem.h"
32#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040033
34#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000035# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040036#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040037
Nicolas Capensbd65da92017-01-05 16:31:06 -050038#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# ifndef WIN32_LEAN_AND_MEAN
40# define WIN32_LEAN_AND_MEAN
41# endif // !WIN32_LEAN_AND_MEAN
42# ifndef NOMINMAX
43# define NOMINMAX
44# endif // !NOMINMAX
45# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050046#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040047
Nicolas Capens598f8d82016-09-26 15:09:10 -040048#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000049#include <limits>
50#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Antonio Maiorano02a39532020-01-21 15:15:34 -050052// Subzero utility functions
53// These functions only accept and return Subzero (Ice) types, and do not access any globals.
54namespace sz {
55static Ice::Constant *getConstantPointer(Ice::GlobalContext *context, void const *ptr)
56{
57 if(sizeof(void *) == 8)
58 {
59 return context->getConstantInt64(reinterpret_cast<intptr_t>(ptr));
60 }
61 else
62 {
63 return context->getConstantInt32(reinterpret_cast<intptr_t>(ptr));
64 }
65}
66
67// Returns a non-const variable copy of const v
68static Ice::Variable *createUnconstCast(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Constant *v)
69{
70 Ice::Variable *result = function->makeVariable(v->getType());
71 Ice::InstCast *cast = Ice::InstCast::create(function, Ice::InstCast::Bitcast, result, v);
72 basicBlock->appendInst(cast);
73 return result;
74}
75
76static Ice::Variable *createLoad(Ice::Cfg *function, Ice::CfgNode *basicBlock, Ice::Operand *ptr, Ice::Type type, unsigned int align)
77{
78 // TODO(b/148272103): InstLoad assumes that a constant ptr is an offset, rather than an
79 // absolute address. We circumvent this by casting to a non-const variable, and loading
80 // from that.
81 if(auto *cptr = llvm::dyn_cast<Ice::Constant>(ptr))
82 {
83 ptr = sz::createUnconstCast(function, basicBlock, cptr);
84 }
85
86 Ice::Variable *result = function->makeVariable(type);
87 auto load = Ice::InstLoad::create(function, result, ptr, align);
88 basicBlock->appendInst(load);
89
90 return result;
91}
92
93} // namespace sz
Ben Clayton713b8d32019-12-17 20:37:56 +000094namespace rr {
95class ELFMemoryStreamer;
96}
Nicolas Capens157ba262019-12-10 17:49:14 -050097
98namespace {
99
100// Default configuration settings. Must be accessed under mutex lock.
101std::mutex defaultConfigLock;
102rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000103{
Nicolas Capens157ba262019-12-10 17:49:14 -0500104 // This uses a static in a function to avoid the cost of a global static
105 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
106 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +0000107 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -0500108 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +0000109}
110
Nicolas Capens157ba262019-12-10 17:49:14 -0500111Ice::GlobalContext *context = nullptr;
112Ice::Cfg *function = nullptr;
113Ice::CfgNode *basicBlock = nullptr;
114Ice::CfgLocalAllocatorScope *allocator = nullptr;
115rr::ELFMemoryStreamer *routine = nullptr;
116
117std::mutex codegenMutex;
118
119Ice::ELFFileStreamer *elfFile = nullptr;
120Ice::Fdstream *out = nullptr;
121
122} // Anonymous namespace
123
124namespace {
125
126#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +0000127# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500128#endif
129
Ben Clayton713b8d32019-12-17 20:37:56 +0000130#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
131# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -0500132#endif
133
Antonio Maiorano370cba52019-12-31 11:36:07 -0500134Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400135{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500136 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100137 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500138 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +0000139 case rr::Optimization::Level::None: return Ice::Opt_m1;
140 case rr::Optimization::Level::Less: return Ice::Opt_m1;
141 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500142 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
143 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100144 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500145 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400146}
147
Antonio Maiorano370cba52019-12-31 11:36:07 -0500148Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
149{
150 switch(memoryOrder)
151 {
152 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
153 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
154 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
155 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
156 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
157 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
158 }
159 return Ice::Intrinsics::MemoryOrderInvalid;
160}
161
Nicolas Capens157ba262019-12-10 17:49:14 -0500162class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500163{
Nicolas Capens157ba262019-12-10 17:49:14 -0500164public:
165 const static bool ARM;
166 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400167
Nicolas Capens157ba262019-12-10 17:49:14 -0500168private:
169 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100170 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000171#if defined(__i386__) || defined(__x86_64__)
172# if defined(_WIN32)
173 __cpuid(registers, info);
174# else
175 __asm volatile("cpuid"
176 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
177 : "a"(info));
178# endif
179#else
180 registers[0] = 0;
181 registers[1] = 0;
182 registers[2] = 0;
183 registers[3] = 0;
184#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100185 }
186
Nicolas Capens157ba262019-12-10 17:49:14 -0500187 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500188 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000189#if defined(__arm__) || defined(__aarch64__)
190 return true;
191#elif defined(__i386__) || defined(__x86_64__)
192 return false;
193#elif defined(__mips__)
194 return false;
195#else
196# error "Unknown architecture"
197#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500198 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500199
Nicolas Capens157ba262019-12-10 17:49:14 -0500200 static bool detectSSE4_1()
201 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000202#if defined(__i386__) || defined(__x86_64__)
203 int registers[4];
204 cpuid(registers, 1);
205 return (registers[2] & 0x00080000) != 0;
206#else
207 return false;
208#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500209 }
210};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500211
Nicolas Capens157ba262019-12-10 17:49:14 -0500212const bool CPUID::ARM = CPUID::detectARM();
213const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
214const bool emulateIntrinsics = false;
215const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400216
Nicolas Capens157ba262019-12-10 17:49:14 -0500217constexpr bool subzeroDumpEnabled = false;
218constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500219
220#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500221static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
222static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500223#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500224
225} // anonymous namespace
226
227namespace rr {
228
Antonio Maioranoab210f92019-12-13 16:26:24 -0500229std::string BackendName()
230{
231 return "Subzero";
232}
233
Ben Clayton713b8d32019-12-17 20:37:56 +0000234const Capabilities Caps = {
235 false, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500236};
237
238enum EmulatedType
239{
240 EmulatedShift = 16,
241 EmulatedV2 = 2 << EmulatedShift,
242 EmulatedV4 = 4 << EmulatedShift,
243 EmulatedV8 = 8 << EmulatedShift,
244 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
245
246 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
247 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
248 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000249 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
250 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500251 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
252};
253
Ben Clayton713b8d32019-12-17 20:37:56 +0000254class Value : public Ice::Operand
255{};
256class SwitchCases : public Ice::InstSwitch
257{};
258class BasicBlock : public Ice::CfgNode
259{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500260
261Ice::Type T(Type *t)
262{
263 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
264 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500265}
266
Nicolas Capens157ba262019-12-10 17:49:14 -0500267Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400268{
Ben Clayton713b8d32019-12-17 20:37:56 +0000269 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500270}
271
272Type *T(EmulatedType t)
273{
Ben Clayton713b8d32019-12-17 20:37:56 +0000274 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500275}
276
277Value *V(Ice::Operand *v)
278{
Ben Clayton713b8d32019-12-17 20:37:56 +0000279 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500280}
281
282BasicBlock *B(Ice::CfgNode *b)
283{
Ben Clayton713b8d32019-12-17 20:37:56 +0000284 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500285}
286
287static size_t typeSize(Type *type)
288{
289 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400290 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500291 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800292 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000293 case Type_v2i32: return 8;
294 case Type_v4i16: return 8;
295 case Type_v2i16: return 4;
296 case Type_v8i8: return 8;
297 case Type_v4i8: return 4;
298 case Type_v2f32: return 8;
299 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500300 }
301 }
302
303 return Ice::typeWidthInBytes(T(type));
304}
305
Ben Clayton713b8d32019-12-17 20:37:56 +0000306using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
307using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500308
309inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
310{
Ben Clayton713b8d32019-12-17 20:37:56 +0000311 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500312}
313
314inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
315{
316 return &sectionHeader(elfHeader)[index];
317}
318
319static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
320{
321 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
322
323 uint32_t index = relocation.getSymbol();
324 int table = relocationTable.sh_link;
325 void *symbolValue = nullptr;
326
327 if(index != SHN_UNDEF)
328 {
329 if(table == SHN_UNDEF) return nullptr;
330 const SectionHeader *symbolTable = elfSection(elfHeader, table);
331
332 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
333 if(index >= symtab_entries)
334 {
335 ASSERT(index < symtab_entries && "Symbol Index out of range");
336 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800337 }
338
Nicolas Capens157ba262019-12-10 17:49:14 -0500339 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000340 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500341 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800342
Nicolas Capens157ba262019-12-10 17:49:14 -0500343 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400344 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500345 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000346 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400347 }
348 else
349 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500350 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400351 }
Nicolas Capens66478362016-10-13 15:36:36 -0400352 }
353
Nicolas Capens157ba262019-12-10 17:49:14 -0500354 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000355 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500356
357 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400358 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400359 switch(relocation.getType())
360 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000361 case R_ARM_NONE:
362 // No relocation
363 break;
364 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500365 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000366 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500367 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
368 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
369 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400370 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000371 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500372 {
373 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
374 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
375 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400376 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000377 default:
378 ASSERT(false && "Unsupported relocation type");
379 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400380 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500381 }
382 else
383 {
384 switch(relocation.getType())
385 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000386 case R_386_NONE:
387 // No relocation
388 break;
389 case R_386_32:
390 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
391 break;
392 case R_386_PC32:
393 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
394 break;
395 default:
396 ASSERT(false && "Unsupported relocation type");
397 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500398 }
Nicolas Capens66478362016-10-13 15:36:36 -0400399 }
400
Nicolas Capens157ba262019-12-10 17:49:14 -0500401 return symbolValue;
402}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400403
Nicolas Capens157ba262019-12-10 17:49:14 -0500404static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
405{
406 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
407
408 uint32_t index = relocation.getSymbol();
409 int table = relocationTable.sh_link;
410 void *symbolValue = nullptr;
411
412 if(index != SHN_UNDEF)
413 {
414 if(table == SHN_UNDEF) return nullptr;
415 const SectionHeader *symbolTable = elfSection(elfHeader, table);
416
417 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
418 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400419 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500420 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400421 return nullptr;
422 }
423
Nicolas Capens157ba262019-12-10 17:49:14 -0500424 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000425 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500426 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400427
Nicolas Capens157ba262019-12-10 17:49:14 -0500428 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400429 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500430 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000431 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500432 }
433 else
434 {
435 return nullptr;
436 }
437 }
Nicolas Capens66478362016-10-13 15:36:36 -0400438
Nicolas Capens157ba262019-12-10 17:49:14 -0500439 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000440 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
441 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400442
Nicolas Capens157ba262019-12-10 17:49:14 -0500443 switch(relocation.getType())
444 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000445 case R_X86_64_NONE:
446 // No relocation
447 break;
448 case R_X86_64_64:
449 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
450 break;
451 case R_X86_64_PC32:
452 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
453 break;
454 case R_X86_64_32S:
455 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
456 break;
457 default:
458 ASSERT(false && "Unsupported relocation type");
459 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500460 }
461
462 return symbolValue;
463}
464
465void *loadImage(uint8_t *const elfImage, size_t &codeSize)
466{
Ben Clayton713b8d32019-12-17 20:37:56 +0000467 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500468
469 if(!elfHeader->checkMagic())
470 {
471 return nullptr;
472 }
473
474 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000475 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
476#if defined(__i386__)
477 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
478#elif defined(__x86_64__)
479 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
480#elif defined(__arm__)
481 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
482#elif defined(__aarch64__)
483 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
484#elif defined(__mips__)
485 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
486#else
487# error "Unsupported platform"
488#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500489
Ben Clayton713b8d32019-12-17 20:37:56 +0000490 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500491 void *entry = nullptr;
492
493 for(int i = 0; i < elfHeader->e_shnum; i++)
494 {
495 if(sectionHeader[i].sh_type == SHT_PROGBITS)
496 {
497 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
498 {
499 entry = elfImage + sectionHeader[i].sh_offset;
500 codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400501 }
502 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500503 else if(sectionHeader[i].sh_type == SHT_REL)
504 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000505 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400506
Nicolas Capens157ba262019-12-10 17:49:14 -0500507 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
508 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000509 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500510 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
511 }
512 }
513 else if(sectionHeader[i].sh_type == SHT_RELA)
514 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000515 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500516
517 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
518 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000519 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500520 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
521 }
522 }
523 }
524
525 return entry;
526}
527
528template<typename T>
529struct ExecutableAllocator
530{
531 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000532 template<class U>
533 ExecutableAllocator(const ExecutableAllocator<U> &other)
534 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500535
536 using value_type = T;
537 using size_type = std::size_t;
538
539 T *allocate(size_type n)
540 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000541 return (T *)allocateMemoryPages(
542 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500543 }
544
545 void deallocate(T *p, size_type n)
546 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800547 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500548 }
549};
550
551class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
552{
553 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
554 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
555
556public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000557 ELFMemoryStreamer()
558 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500559 {
560 position = 0;
561 buffer.reserve(0x1000);
562 }
563
564 ~ELFMemoryStreamer() override
565 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500566 }
567
568 void write8(uint8_t Value) override
569 {
570 if(position == (uint64_t)buffer.size())
571 {
572 buffer.push_back(Value);
573 position++;
574 }
575 else if(position < (uint64_t)buffer.size())
576 {
577 buffer[position] = Value;
578 position++;
579 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000580 else
581 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500582 }
583
584 void writeBytes(llvm::StringRef Bytes) override
585 {
586 std::size_t oldSize = buffer.size();
587 buffer.resize(oldSize + Bytes.size());
588 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
589 position += Bytes.size();
590 }
591
592 uint64_t tell() const override { return position; }
593
594 void seek(uint64_t Off) override { position = Off; }
595
Ben Clayton713b8d32019-12-17 20:37:56 +0000596 const void *finalizeEntryBegin()
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000598 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
Nicolas Capens157ba262019-12-10 17:49:14 -0500599
600 size_t codeSize = 0;
601 const void *entry = loadImage(&buffer[0], codeSize);
602
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800603 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
Nicolas Capens157ba262019-12-10 17:49:14 -0500604#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500605 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
606#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000607 __builtin___clear_cache((char *)entry, (char *)entry + codeSize);
Nicolas Capens157ba262019-12-10 17:49:14 -0500608#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -0400609 return entry;
610 }
611
Ben Clayton713b8d32019-12-17 20:37:56 +0000612 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400613 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500614 ASSERT(func);
615 funcs[index] = func;
616 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400617
Nicolas Capens157ba262019-12-10 17:49:14 -0500618 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400619 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500620 ASSERT(funcs[index]);
621 return funcs[index];
622 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400623
Antonio Maiorano02a39532020-01-21 15:15:34 -0500624 const void *addConstantData(const void *data, size_t size, size_t alignment = 1)
Nicolas Capens157ba262019-12-10 17:49:14 -0500625 {
Antonio Maiorano02a39532020-01-21 15:15:34 -0500626 // TODO(b/148086935): Replace with a buffer allocator.
627 size_t space = size + alignment;
628 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[space]);
629 void *ptr = buf.get();
630 void *alignedPtr = std::align(alignment, size, ptr, space);
631 ASSERT(alignedPtr);
632 memcpy(alignedPtr, data, size);
Nicolas Capens157ba262019-12-10 17:49:14 -0500633 constantData.emplace_back(std::move(buf));
Antonio Maiorano02a39532020-01-21 15:15:34 -0500634 return alignedPtr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500635 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400636
Nicolas Capens157ba262019-12-10 17:49:14 -0500637private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000638 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500639 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
640 std::size_t position;
641 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500642};
643
644Nucleus::Nucleus()
645{
Ben Clayton713b8d32019-12-17 20:37:56 +0000646 ::codegenMutex.lock(); // Reactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500647
648 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
649 Ice::ClFlags::getParsedClFlags(Flags);
650
Ben Clayton713b8d32019-12-17 20:37:56 +0000651#if defined(__arm__)
652 Flags.setTargetArch(Ice::Target_ARM32);
653 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
654#elif defined(__mips__)
655 Flags.setTargetArch(Ice::Target_MIPS32);
656 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
657#else // x86
658 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
659 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
660#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500661 Flags.setOutFileType(Ice::FT_Elf);
662 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
663 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
664 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
665 Flags.setDisableHybridAssembly(true);
666
667 static llvm::raw_os_ostream cout(std::cout);
668 static llvm::raw_os_ostream cerr(std::cerr);
669
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500670 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500671 {
672 // Decorate text asm with liveness info
673 Flags.setDecorateAsm(true);
674 }
675
Ben Clayton713b8d32019-12-17 20:37:56 +0000676 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500677 {
678 std::error_code errorCode;
679 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
680 ::elfFile = new Ice::ELFFileStreamer(*out);
681 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
682 }
683 else
684 {
685 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
686 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
687 ::routine = elfMemory;
688 }
689}
690
691Nucleus::~Nucleus()
692{
693 delete ::routine;
694
695 delete ::allocator;
696 delete ::function;
697 delete ::context;
698
699 delete ::elfFile;
700 delete ::out;
701
702 ::codegenMutex.unlock();
703}
704
705void Nucleus::setDefaultConfig(const Config &cfg)
706{
707 std::unique_lock<std::mutex> lock(::defaultConfigLock);
708 ::defaultConfig() = cfg;
709}
710
711void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
712{
713 std::unique_lock<std::mutex> lock(::defaultConfigLock);
714 auto &config = ::defaultConfig();
715 config = cfgEdit.apply(config);
716}
717
718Config Nucleus::getDefaultConfig()
719{
720 std::unique_lock<std::mutex> lock(::defaultConfigLock);
721 return ::defaultConfig();
722}
723
724std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
725{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500726 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500727 {
728 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
729 context->getStrDump().SetUnbuffered();
730 }
731
732 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
733 {
734 createRetVoid();
735 }
736
737 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, name));
738
739 rr::optimize(::function);
740
741 ::function->computeInOutEdges();
742 ASSERT(!::function->hasError());
743
744 ::function->translate();
745 ASSERT(!::function->hasError());
746
747 auto globals = ::function->getGlobalInits();
748
749 if(globals && !globals->empty())
750 {
751 ::context->getGlobals()->merge(globals.get());
752 }
753
754 ::context->emitFileHeader();
755
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500756 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500757 {
758 ::function->emit();
759 }
760
761 ::function->emitIAS();
762 auto assembler = ::function->releaseAssembler();
763 auto objectWriter = ::context->getObjectWriter();
764 assembler->alignFunction();
765 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
766 ::context->lowerGlobals("last");
767 ::context->lowerConstants();
768 ::context->lowerJumpTables();
769 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
770 objectWriter->writeNonUserSections();
771
Ben Clayton713b8d32019-12-17 20:37:56 +0000772 const void *entryBegin = ::routine->finalizeEntryBegin();
Nicolas Capens157ba262019-12-10 17:49:14 -0500773 ::routine->setEntry(Nucleus::CoroutineEntryBegin, entryBegin);
774
775 Routine *handoffRoutine = ::routine;
776 ::routine = nullptr;
777
778 return std::shared_ptr<Routine>(handoffRoutine);
779}
780
781Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
782{
783 Ice::Type type = T(t);
784 int typeSize = Ice::typeWidthInBytes(type);
785 int totalSize = typeSize * (arraySize ? arraySize : 1);
786
787 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
788 auto address = ::function->makeVariable(T(getPointerType(t)));
789 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
790 ::function->getEntryNode()->getInsts().push_front(alloca);
791
792 return V(address);
793}
794
795BasicBlock *Nucleus::createBasicBlock()
796{
797 return B(::function->makeNode());
798}
799
800BasicBlock *Nucleus::getInsertBlock()
801{
802 return B(::basicBlock);
803}
804
805void Nucleus::setInsertBlock(BasicBlock *basicBlock)
806{
Ben Clayton713b8d32019-12-17 20:37:56 +0000807 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -0500808
809 Variable::materializeAll();
810
811 ::basicBlock = basicBlock;
812}
813
Ben Clayton713b8d32019-12-17 20:37:56 +0000814void Nucleus::createFunction(Type *ReturnType, std::vector<Type *> &Params)
Nicolas Capens157ba262019-12-10 17:49:14 -0500815{
816 uint32_t sequenceNumber = 0;
817 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
818 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
819
820 for(Type *type : Params)
821 {
822 Ice::Variable *arg = ::function->makeVariable(T(type));
823 ::function->addArg(arg);
824 }
825
826 Ice::CfgNode *node = ::function->makeNode();
827 ::function->setEntryNode(node);
828 ::basicBlock = node;
829}
830
831Value *Nucleus::getArgument(unsigned int index)
832{
833 return V(::function->getArgs()[index]);
834}
835
836void Nucleus::createRetVoid()
837{
838 // Code generated after this point is unreachable, so any variables
839 // being read can safely return an undefined value. We have to avoid
840 // materializing variables after the terminator ret instruction.
841 Variable::killUnmaterialized();
842
843 Ice::InstRet *ret = Ice::InstRet::create(::function);
844 ::basicBlock->appendInst(ret);
845}
846
847void Nucleus::createRet(Value *v)
848{
849 // Code generated after this point is unreachable, so any variables
850 // being read can safely return an undefined value. We have to avoid
851 // materializing variables after the terminator ret instruction.
852 Variable::killUnmaterialized();
853
854 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
855 ::basicBlock->appendInst(ret);
856}
857
858void Nucleus::createBr(BasicBlock *dest)
859{
860 Variable::materializeAll();
861
862 auto br = Ice::InstBr::create(::function, dest);
863 ::basicBlock->appendInst(br);
864}
865
866void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
867{
868 Variable::materializeAll();
869
870 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
871 ::basicBlock->appendInst(br);
872}
873
874static bool isCommutative(Ice::InstArithmetic::OpKind op)
875{
876 switch(op)
877 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000878 case Ice::InstArithmetic::Add:
879 case Ice::InstArithmetic::Fadd:
880 case Ice::InstArithmetic::Mul:
881 case Ice::InstArithmetic::Fmul:
882 case Ice::InstArithmetic::And:
883 case Ice::InstArithmetic::Or:
884 case Ice::InstArithmetic::Xor:
885 return true;
886 default:
887 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -0500888 }
889}
890
891static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
892{
893 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
894
895 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
896
897 Ice::Variable *result = ::function->makeVariable(lhs->getType());
898 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
899 ::basicBlock->appendInst(arithmetic);
900
901 return V(result);
902}
903
904Value *Nucleus::createAdd(Value *lhs, Value *rhs)
905{
906 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
907}
908
909Value *Nucleus::createSub(Value *lhs, Value *rhs)
910{
911 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
912}
913
914Value *Nucleus::createMul(Value *lhs, Value *rhs)
915{
916 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
917}
918
919Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
920{
921 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
922}
923
924Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
925{
926 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
927}
928
929Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
930{
931 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
932}
933
934Value *Nucleus::createFSub(Value *lhs, Value *rhs)
935{
936 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
937}
938
939Value *Nucleus::createFMul(Value *lhs, Value *rhs)
940{
941 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
942}
943
944Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
945{
946 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
947}
948
949Value *Nucleus::createURem(Value *lhs, Value *rhs)
950{
951 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
952}
953
954Value *Nucleus::createSRem(Value *lhs, Value *rhs)
955{
956 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
957}
958
959Value *Nucleus::createFRem(Value *lhs, Value *rhs)
960{
Antonio Maiorano5ef91b82020-01-21 15:10:22 -0500961 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
962 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
963 UNIMPLEMENTED("Nucleus::createFRem");
964 return nullptr;
965}
966
967RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
968{
969 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -0500970}
971
972Value *Nucleus::createShl(Value *lhs, Value *rhs)
973{
974 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
975}
976
977Value *Nucleus::createLShr(Value *lhs, Value *rhs)
978{
979 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
980}
981
982Value *Nucleus::createAShr(Value *lhs, Value *rhs)
983{
984 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
985}
986
987Value *Nucleus::createAnd(Value *lhs, Value *rhs)
988{
989 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
990}
991
992Value *Nucleus::createOr(Value *lhs, Value *rhs)
993{
994 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
995}
996
997Value *Nucleus::createXor(Value *lhs, Value *rhs)
998{
999 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
1000}
1001
1002Value *Nucleus::createNeg(Value *v)
1003{
1004 return createSub(createNullValue(T(v->getType())), v);
1005}
1006
1007Value *Nucleus::createFNeg(Value *v)
1008{
Ben Clayton713b8d32019-12-17 20:37:56 +00001009 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
1010 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -05001011
1012 return createFSub(negativeZero, v);
1013}
1014
1015Value *Nucleus::createNot(Value *v)
1016{
1017 if(Ice::isScalarIntegerType(v->getType()))
1018 {
1019 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
1020 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001021 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -05001022 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001023 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001024 return createXor(v, createConstantVector(c, T(v->getType())));
1025 }
1026}
1027
1028Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1029{
Ben Clayton713b8d32019-12-17 20:37:56 +00001030 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -05001031 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
1032
1033 int valueType = (int)reinterpret_cast<intptr_t>(type);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001034 Ice::Variable *result = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001035
Ben Clayton713b8d32019-12-17 20:37:56 +00001036 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001037 {
1038 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001039 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001040 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001041 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001042 auto pointer = RValue<Pointer<Byte>>(ptr);
1043 Int x = *Pointer<Int>(pointer);
1044
1045 Int4 vector;
1046 vector = Insert(vector, x, 0);
1047
Antonio Maiorano02a39532020-01-21 15:15:34 -05001048 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001049 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1050 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001051 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001052 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001053 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001054 auto pointer = RValue<Pointer<Byte>>(ptr);
1055 Int x = *Pointer<Int>(pointer);
1056 Int y = *Pointer<Int>(pointer + 4);
1057
1058 Int4 vector;
1059 vector = Insert(vector, x, 0);
1060 vector = Insert(vector, y, 1);
1061
Antonio Maiorano02a39532020-01-21 15:15:34 -05001062 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001063 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1064 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001065 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001066 else
1067 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001068 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001069 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001070 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001071 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001072 auto target = ::context->getConstantUndef(Ice::IceType_i32);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001073 result = ::function->makeVariable(T(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001074 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1075 load->addArg(ptr);
1076 load->addArg(::context->getConstantInt32(typeSize(type)));
1077 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001078 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001079 }
1080 else
1081 {
Antonio Maiorano02a39532020-01-21 15:15:34 -05001082 result = sz::createLoad(::function, ::basicBlock, V(ptr), T(type), align);
Nicolas Capens157ba262019-12-10 17:49:14 -05001083 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001084
Antonio Maiorano02a39532020-01-21 15:15:34 -05001085 ASSERT(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001086 return V(result);
1087}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001088
Nicolas Capens157ba262019-12-10 17:49:14 -05001089Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1090{
Ben Clayton713b8d32019-12-17 20:37:56 +00001091 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -05001092 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
Nicolas Capens598f8d82016-09-26 15:09:10 -04001093
Ben Clayton713b8d32019-12-17 20:37:56 +00001094#if __has_feature(memory_sanitizer)
1095 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
1096 if(align != 0)
1097 {
1098 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1099 call->addArg(ptr);
1100 call->addArg(::context->getConstantInt64(typeSize(type)));
1101 ::basicBlock->appendInst(call);
1102 }
1103#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001104
Nicolas Capens157ba262019-12-10 17:49:14 -05001105 int valueType = (int)reinterpret_cast<intptr_t>(type);
1106
Ben Clayton713b8d32019-12-17 20:37:56 +00001107 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001108 {
1109 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001110 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001111 if(typeSize(type) == 4)
1112 {
1113 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1114 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1115 ::basicBlock->appendInst(bitcast);
1116
1117 RValue<Int4> v(V(vector));
1118
1119 auto pointer = RValue<Pointer<Byte>>(ptr);
1120 Int x = Extract(v, 0);
1121 *Pointer<Int>(pointer) = x;
1122 }
1123 else if(typeSize(type) == 8)
1124 {
1125 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1126 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1127 ::basicBlock->appendInst(bitcast);
1128
1129 RValue<Int4> v(V(vector));
1130
1131 auto pointer = RValue<Pointer<Byte>>(ptr);
1132 Int x = Extract(v, 0);
1133 *Pointer<Int>(pointer) = x;
1134 Int y = Extract(v, 1);
1135 *Pointer<Int>(pointer + 4) = y;
1136 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001137 else
1138 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001139 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001140 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001141 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001142 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001143 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1144 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1145 store->addArg(value);
1146 store->addArg(ptr);
1147 store->addArg(::context->getConstantInt32(typeSize(type)));
1148 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001149 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001150 }
1151 else
1152 {
1153 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001154
Nicolas Capens157ba262019-12-10 17:49:14 -05001155 auto store = Ice::InstStore::create(::function, value, ptr, align);
1156 ::basicBlock->appendInst(store);
1157 }
1158
1159 return value;
1160}
1161
1162Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1163{
1164 ASSERT(index->getType() == Ice::IceType_i32);
1165
1166 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1167 {
1168 int32_t offset = constant->getValue() * (int)typeSize(type);
1169
1170 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001171 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001172 return ptr;
1173 }
1174
Nicolas Capens157ba262019-12-10 17:49:14 -05001175 return createAdd(ptr, createConstantInt(offset));
1176 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001177
Nicolas Capens157ba262019-12-10 17:49:14 -05001178 if(!Ice::isByteSizedType(T(type)))
1179 {
1180 index = createMul(index, createConstantInt((int)typeSize(type)));
1181 }
1182
Ben Clayton713b8d32019-12-17 20:37:56 +00001183 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001184 {
1185 if(unsignedIndex)
1186 {
1187 index = createZExt(index, T(Ice::IceType_i64));
1188 }
1189 else
1190 {
1191 index = createSExt(index, T(Ice::IceType_i64));
1192 }
1193 }
1194
1195 return createAdd(ptr, index);
1196}
1197
Antonio Maiorano370cba52019-12-31 11:36:07 -05001198static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1199{
1200 Ice::Variable *result = ::function->makeVariable(value->getType());
1201
1202 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1203 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1204 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1205 auto op = ::context->getConstantInt32(rmwOp);
1206 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1207 inst->addArg(op);
1208 inst->addArg(ptr);
1209 inst->addArg(value);
1210 inst->addArg(order);
1211 ::basicBlock->appendInst(inst);
1212
1213 return V(result);
1214}
1215
Nicolas Capens157ba262019-12-10 17:49:14 -05001216Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1217{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001218 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001219}
1220
1221Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1222{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001223 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001224}
1225
1226Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1227{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001228 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001229}
1230
1231Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1232{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001233 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001234}
1235
1236Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1237{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001238 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001239}
1240
1241Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1242{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001243 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001244}
1245
1246Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1247{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001248 Ice::Variable *result = ::function->makeVariable(value->getType());
1249
1250 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1251 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1252 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1253 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1254 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1255 inst->addArg(ptr);
1256 inst->addArg(compare);
1257 inst->addArg(value);
1258 inst->addArg(orderEq);
1259 inst->addArg(orderNeq);
1260 ::basicBlock->appendInst(inst);
1261
1262 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001263}
1264
1265static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1266{
1267 if(v->getType() == T(destType))
1268 {
1269 return v;
1270 }
1271
1272 Ice::Variable *result = ::function->makeVariable(T(destType));
1273 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1274 ::basicBlock->appendInst(cast);
1275
1276 return V(result);
1277}
1278
1279Value *Nucleus::createTrunc(Value *v, Type *destType)
1280{
1281 return createCast(Ice::InstCast::Trunc, v, destType);
1282}
1283
1284Value *Nucleus::createZExt(Value *v, Type *destType)
1285{
1286 return createCast(Ice::InstCast::Zext, v, destType);
1287}
1288
1289Value *Nucleus::createSExt(Value *v, Type *destType)
1290{
1291 return createCast(Ice::InstCast::Sext, v, destType);
1292}
1293
1294Value *Nucleus::createFPToUI(Value *v, Type *destType)
1295{
1296 return createCast(Ice::InstCast::Fptoui, v, destType);
1297}
1298
1299Value *Nucleus::createFPToSI(Value *v, Type *destType)
1300{
1301 return createCast(Ice::InstCast::Fptosi, v, destType);
1302}
1303
1304Value *Nucleus::createSIToFP(Value *v, Type *destType)
1305{
1306 return createCast(Ice::InstCast::Sitofp, v, destType);
1307}
1308
1309Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1310{
1311 return createCast(Ice::InstCast::Fptrunc, v, destType);
1312}
1313
1314Value *Nucleus::createFPExt(Value *v, Type *destType)
1315{
1316 return createCast(Ice::InstCast::Fpext, v, destType);
1317}
1318
1319Value *Nucleus::createBitCast(Value *v, Type *destType)
1320{
1321 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1322 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1323 // emulate them by writing to the stack and reading back as the destination type.
1324 if(emulateMismatchedBitCast)
1325 {
1326 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1327 {
1328 Value *address = allocateStackVariable(destType);
1329 createStore(v, address, T(v->getType()));
1330 return createLoad(address, destType);
1331 }
1332 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1333 {
1334 Value *address = allocateStackVariable(T(v->getType()));
1335 createStore(v, address, T(v->getType()));
1336 return createLoad(address, destType);
1337 }
1338 }
1339
1340 return createCast(Ice::InstCast::Bitcast, v, destType);
1341}
1342
1343static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1344{
1345 ASSERT(lhs->getType() == rhs->getType());
1346
1347 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1348 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1349 ::basicBlock->appendInst(cmp);
1350
1351 return V(result);
1352}
1353
1354Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1355{
1356 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1357}
1358
1359Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1360{
1361 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1362}
1363
1364Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1365{
1366 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1367}
1368
1369Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1370{
1371 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1372}
1373
1374Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1375{
1376 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1377}
1378
1379Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1380{
1381 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1382}
1383
1384Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1385{
1386 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1387}
1388
1389Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1390{
1391 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1392}
1393
1394Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1395{
1396 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1397}
1398
1399Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1400{
1401 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1402}
1403
1404Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1405{
1406 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1407}
1408
1409static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1410{
1411 ASSERT(lhs->getType() == rhs->getType());
1412 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1413
1414 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1415 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1416 ::basicBlock->appendInst(cmp);
1417
1418 return V(result);
1419}
1420
1421Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1422{
1423 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1424}
1425
1426Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1427{
1428 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1429}
1430
1431Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1432{
1433 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1434}
1435
1436Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1437{
1438 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1439}
1440
1441Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1442{
1443 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1444}
1445
1446Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1447{
1448 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1449}
1450
1451Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1452{
1453 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1454}
1455
1456Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1457{
1458 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1459}
1460
1461Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1462{
1463 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1464}
1465
1466Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1467{
1468 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1469}
1470
1471Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1472{
1473 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1474}
1475
1476Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1477{
1478 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1479}
1480
1481Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1482{
1483 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1484}
1485
1486Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1487{
1488 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1489}
1490
1491Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1492{
1493 auto result = ::function->makeVariable(T(type));
1494 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1495 ::basicBlock->appendInst(extract);
1496
1497 return V(result);
1498}
1499
1500Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1501{
1502 auto result = ::function->makeVariable(vector->getType());
1503 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1504 ::basicBlock->appendInst(insert);
1505
1506 return V(result);
1507}
1508
1509Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1510{
1511 ASSERT(V1->getType() == V2->getType());
1512
1513 int size = Ice::typeNumElements(V1->getType());
1514 auto result = ::function->makeVariable(V1->getType());
1515 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1516
1517 for(int i = 0; i < size; i++)
1518 {
1519 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1520 }
1521
1522 ::basicBlock->appendInst(shuffle);
1523
1524 return V(result);
1525}
1526
1527Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1528{
1529 ASSERT(ifTrue->getType() == ifFalse->getType());
1530
1531 auto result = ::function->makeVariable(ifTrue->getType());
1532 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1533 ::basicBlock->appendInst(select);
1534
1535 return V(result);
1536}
1537
1538SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1539{
1540 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1541 ::basicBlock->appendInst(switchInst);
1542
Ben Clayton713b8d32019-12-17 20:37:56 +00001543 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001544}
1545
1546void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1547{
1548 switchCases->addBranch(label, label, branch);
1549}
1550
1551void Nucleus::createUnreachable()
1552{
1553 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1554 ::basicBlock->appendInst(unreachable);
1555}
1556
1557Type *Nucleus::getPointerType(Type *ElementType)
1558{
Ben Clayton713b8d32019-12-17 20:37:56 +00001559 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001560 {
1561 return T(Ice::IceType_i64);
1562 }
1563 else
1564 {
1565 return T(Ice::IceType_i32);
1566 }
1567}
1568
1569Value *Nucleus::createNullValue(Type *Ty)
1570{
1571 if(Ice::isVectorType(T(Ty)))
1572 {
1573 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001574 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001575 return createConstantVector(c, Ty);
1576 }
1577 else
1578 {
1579 return V(::context->getConstantZero(T(Ty)));
1580 }
1581}
1582
1583Value *Nucleus::createConstantLong(int64_t i)
1584{
1585 return V(::context->getConstantInt64(i));
1586}
1587
1588Value *Nucleus::createConstantInt(int i)
1589{
1590 return V(::context->getConstantInt32(i));
1591}
1592
1593Value *Nucleus::createConstantInt(unsigned int i)
1594{
1595 return V(::context->getConstantInt32(i));
1596}
1597
1598Value *Nucleus::createConstantBool(bool b)
1599{
1600 return V(::context->getConstantInt1(b));
1601}
1602
1603Value *Nucleus::createConstantByte(signed char i)
1604{
1605 return V(::context->getConstantInt8(i));
1606}
1607
1608Value *Nucleus::createConstantByte(unsigned char i)
1609{
1610 return V(::context->getConstantInt8(i));
1611}
1612
1613Value *Nucleus::createConstantShort(short i)
1614{
1615 return V(::context->getConstantInt16(i));
1616}
1617
1618Value *Nucleus::createConstantShort(unsigned short i)
1619{
1620 return V(::context->getConstantInt16(i));
1621}
1622
1623Value *Nucleus::createConstantFloat(float x)
1624{
1625 return V(::context->getConstantFloat(x));
1626}
1627
1628Value *Nucleus::createNullPointer(Type *Ty)
1629{
Ben Clayton713b8d32019-12-17 20:37:56 +00001630 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05001631}
1632
Antonio Maiorano02a39532020-01-21 15:15:34 -05001633static Ice::Constant *IceConstantData(void const *data, size_t size, size_t alignment = 1)
1634{
1635 return sz::getConstantPointer(::context, ::routine->addConstantData(data, size, alignment));
1636}
1637
Nicolas Capens157ba262019-12-10 17:49:14 -05001638Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1639{
1640 const int vectorSize = 16;
1641 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
1642 const int alignment = vectorSize;
Nicolas Capens157ba262019-12-10 17:49:14 -05001643
1644 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00001645 const double *f = reinterpret_cast<const double *>(constants);
Antonio Maiorano02a39532020-01-21 15:15:34 -05001646
1647 // TODO(148082873): Fix global variable constants when generating multiple functions
1648 Ice::Constant *ptr = nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -05001649
1650 switch((int)reinterpret_cast<intptr_t>(type))
1651 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001652 case Ice::IceType_v4i32:
1653 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001654 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001655 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001656 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001657 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001658 }
1659 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001660 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001661 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001662 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001663 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001664 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001665 }
1666 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001667 case Ice::IceType_v8i16:
1668 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001669 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001670 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001671 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001672 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001673 }
1674 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001675 case Ice::IceType_v16i8:
1676 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001677 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001678 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001679 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001680 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001681 }
1682 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001683 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001684 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001685 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001686 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001687 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001688 }
1689 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001690 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001691 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001692 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001693 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001694 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001695 }
1696 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001697 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05001698 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001699 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001700 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001701 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001702 }
1703 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001704 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05001705 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001706 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001707 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001708 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001709 }
1710 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001711 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05001712 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001713 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001714 static_assert(sizeof(initializer) == vectorSize, "!");
Antonio Maiorano02a39532020-01-21 15:15:34 -05001715 ptr = IceConstantData(initializer, vectorSize, alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001716 }
1717 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001718 default:
1719 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001720 }
1721
Antonio Maiorano02a39532020-01-21 15:15:34 -05001722 ASSERT(ptr);
Nicolas Capens157ba262019-12-10 17:49:14 -05001723
Antonio Maiorano02a39532020-01-21 15:15:34 -05001724 Ice::Variable *result = sz::createLoad(::function, ::basicBlock, ptr, T(type), alignment);
Nicolas Capens157ba262019-12-10 17:49:14 -05001725 return V(result);
1726}
1727
1728Value *Nucleus::createConstantVector(const double *constants, Type *type)
1729{
Ben Clayton713b8d32019-12-17 20:37:56 +00001730 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05001731}
1732
1733Type *Void::getType()
1734{
1735 return T(Ice::IceType_void);
1736}
1737
1738Type *Bool::getType()
1739{
1740 return T(Ice::IceType_i1);
1741}
1742
1743Type *Byte::getType()
1744{
1745 return T(Ice::IceType_i8);
1746}
1747
1748Type *SByte::getType()
1749{
1750 return T(Ice::IceType_i8);
1751}
1752
1753Type *Short::getType()
1754{
1755 return T(Ice::IceType_i16);
1756}
1757
1758Type *UShort::getType()
1759{
1760 return T(Ice::IceType_i16);
1761}
1762
1763Type *Byte4::getType()
1764{
1765 return T(Type_v4i8);
1766}
1767
1768Type *SByte4::getType()
1769{
1770 return T(Type_v4i8);
1771}
1772
Ben Clayton713b8d32019-12-17 20:37:56 +00001773namespace {
1774RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05001775{
Ben Clayton713b8d32019-12-17 20:37:56 +00001776 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05001777}
1778
Ben Clayton713b8d32019-12-17 20:37:56 +00001779RValue<Byte> Extract(RValue<Byte8> val, int i)
1780{
1781 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
1782}
1783
1784RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
1785{
1786 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
1787}
1788} // namespace
1789
Nicolas Capens157ba262019-12-10 17:49:14 -05001790RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
1791{
1792 if(emulateIntrinsics)
1793 {
1794 Byte8 result;
1795 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1796 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1797 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1798 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1799 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1800 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1801 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1802 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
1803
1804 return result;
1805 }
1806 else
1807 {
1808 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00001809 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001810 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1811 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1812 paddusb->addArg(x.value);
1813 paddusb->addArg(y.value);
1814 ::basicBlock->appendInst(paddusb);
1815
1816 return RValue<Byte8>(V(result));
1817 }
1818}
1819
1820RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
1821{
1822 if(emulateIntrinsics)
1823 {
1824 Byte8 result;
1825 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1826 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1827 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1828 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1829 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1830 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1831 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1832 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
1833
1834 return result;
1835 }
1836 else
1837 {
1838 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00001839 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001840 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1841 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1842 psubusw->addArg(x.value);
1843 psubusw->addArg(y.value);
1844 ::basicBlock->appendInst(psubusw);
1845
1846 return RValue<Byte8>(V(result));
1847 }
1848}
1849
1850RValue<SByte> Extract(RValue<SByte8> val, int i)
1851{
1852 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
1853}
1854
1855RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
1856{
1857 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
1858}
1859
1860RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1861{
1862 if(emulateIntrinsics)
1863 {
1864 SByte8 result;
1865 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
1866 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
1867 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
1868 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
1869 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
1870 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
1871 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
1872 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
1873
1874 return result;
1875 }
1876 else
1877 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001878#if defined(__i386__) || defined(__x86_64__)
1879 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
1880 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
1881 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001882
Ben Clayton713b8d32019-12-17 20:37:56 +00001883 return As<SByte8>(hi | lo);
1884#else
1885 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
1886#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001887 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001888}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001889
Nicolas Capens157ba262019-12-10 17:49:14 -05001890RValue<Int> SignMask(RValue<Byte8> x)
1891{
1892 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001893 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001894 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
1895 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001896 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001897 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01001898 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001899 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00001900 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001901 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1902 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
1903 movmsk->addArg(x.value);
1904 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01001905
Nicolas Capens157ba262019-12-10 17:49:14 -05001906 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01001907 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001908}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001909
1910// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
1911// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05001912// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001913// }
1914
Nicolas Capens157ba262019-12-10 17:49:14 -05001915RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
1916{
1917 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
1918}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001919
Nicolas Capens157ba262019-12-10 17:49:14 -05001920Type *Byte8::getType()
1921{
1922 return T(Type_v8i8);
1923}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001924
Nicolas Capens598f8d82016-09-26 15:09:10 -04001925// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
1926// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001927// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001928// }
1929
1930// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1931// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001932// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001933// }
1934
Nicolas Capens157ba262019-12-10 17:49:14 -05001935RValue<SByte> SaturateSigned(RValue<Short> x)
1936{
1937 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
1938}
1939
1940RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
1941{
1942 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04001943 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001944 SByte8 result;
1945 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1946 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1947 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1948 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1949 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1950 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1951 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1952 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04001953
Nicolas Capens157ba262019-12-10 17:49:14 -05001954 return result;
1955 }
1956 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001957 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001958 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00001959 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001960 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1961 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1962 paddsb->addArg(x.value);
1963 paddsb->addArg(y.value);
1964 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001965
Nicolas Capens157ba262019-12-10 17:49:14 -05001966 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001967 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001968}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001969
Nicolas Capens157ba262019-12-10 17:49:14 -05001970RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
1971{
1972 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001973 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001974 SByte8 result;
1975 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1976 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1977 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1978 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1979 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1980 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1981 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1982 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001983
Nicolas Capens157ba262019-12-10 17:49:14 -05001984 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04001985 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001986 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001987 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001988 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00001989 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001990 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1991 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1992 psubsb->addArg(x.value);
1993 psubsb->addArg(y.value);
1994 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04001995
Nicolas Capens157ba262019-12-10 17:49:14 -05001996 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001997 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001998}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001999
Nicolas Capens157ba262019-12-10 17:49:14 -05002000RValue<Int> SignMask(RValue<SByte8> x)
2001{
2002 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002003 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002004 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2005 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002006 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002007 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002008 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002009 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002010 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002011 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2012 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2013 movmsk->addArg(x.value);
2014 ::basicBlock->appendInst(movmsk);
2015
2016 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002017 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002018}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002019
Nicolas Capens157ba262019-12-10 17:49:14 -05002020RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2021{
2022 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2023}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002024
Nicolas Capens157ba262019-12-10 17:49:14 -05002025RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2026{
2027 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2028}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002029
Nicolas Capens157ba262019-12-10 17:49:14 -05002030Type *SByte8::getType()
2031{
2032 return T(Type_v8i8);
2033}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002034
Nicolas Capens157ba262019-12-10 17:49:14 -05002035Type *Byte16::getType()
2036{
2037 return T(Ice::IceType_v16i8);
2038}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002039
Nicolas Capens157ba262019-12-10 17:49:14 -05002040Type *SByte16::getType()
2041{
2042 return T(Ice::IceType_v16i8);
2043}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002044
Nicolas Capens157ba262019-12-10 17:49:14 -05002045Type *Short2::getType()
2046{
2047 return T(Type_v2i16);
2048}
Nicolas Capensd4227962016-11-09 14:24:25 -05002049
Nicolas Capens157ba262019-12-10 17:49:14 -05002050Type *UShort2::getType()
2051{
2052 return T(Type_v2i16);
2053}
Nicolas Capensd4227962016-11-09 14:24:25 -05002054
Nicolas Capens157ba262019-12-10 17:49:14 -05002055Short4::Short4(RValue<Int4> cast)
2056{
Ben Clayton713b8d32019-12-17 20:37:56 +00002057 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002058 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2059 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2060
2061 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
2062 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2063
2064 storeValue(short4);
2065}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002066
2067// Short4::Short4(RValue<Float> cast)
2068// {
2069// }
2070
Nicolas Capens157ba262019-12-10 17:49:14 -05002071Short4::Short4(RValue<Float4> cast)
2072{
2073 UNIMPLEMENTED("Short4::Short4(RValue<Float4> cast)");
2074}
2075
2076RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2077{
2078 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002079 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002080 Short4 result;
2081 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2082 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2083 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2084 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002085
2086 return result;
2087 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002088 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002089 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002090 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2091 }
2092}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002093
Nicolas Capens157ba262019-12-10 17:49:14 -05002094RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2095{
2096 if(emulateIntrinsics)
2097 {
2098 Short4 result;
2099 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2100 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2101 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2102 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2103
2104 return result;
2105 }
2106 else
2107 {
2108 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2109 }
2110}
2111
2112RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2113{
2114 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2115 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2116 ::basicBlock->appendInst(cmp);
2117
2118 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2119 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2120 ::basicBlock->appendInst(select);
2121
2122 return RValue<Short4>(V(result));
2123}
2124
2125RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2126{
2127 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2128 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2129 ::basicBlock->appendInst(cmp);
2130
2131 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2132 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2133 ::basicBlock->appendInst(select);
2134
2135 return RValue<Short4>(V(result));
2136}
2137
2138RValue<Short> SaturateSigned(RValue<Int> x)
2139{
2140 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2141}
2142
2143RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2144{
2145 if(emulateIntrinsics)
2146 {
2147 Short4 result;
2148 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2149 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2150 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2151 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2152
2153 return result;
2154 }
2155 else
2156 {
2157 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002158 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002159 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2160 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2161 paddsw->addArg(x.value);
2162 paddsw->addArg(y.value);
2163 ::basicBlock->appendInst(paddsw);
2164
2165 return RValue<Short4>(V(result));
2166 }
2167}
2168
2169RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2170{
2171 if(emulateIntrinsics)
2172 {
2173 Short4 result;
2174 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2175 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2176 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2177 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2178
2179 return result;
2180 }
2181 else
2182 {
2183 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002184 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002185 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2186 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2187 psubsw->addArg(x.value);
2188 psubsw->addArg(y.value);
2189 ::basicBlock->appendInst(psubsw);
2190
2191 return RValue<Short4>(V(result));
2192 }
2193}
2194
2195RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2196{
2197 if(emulateIntrinsics)
2198 {
2199 Short4 result;
2200 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2201 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2202 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2203 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2204
2205 return result;
2206 }
2207 else
2208 {
2209 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002210 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002211 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2212 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2213 pmulhw->addArg(x.value);
2214 pmulhw->addArg(y.value);
2215 ::basicBlock->appendInst(pmulhw);
2216
2217 return RValue<Short4>(V(result));
2218 }
2219}
2220
2221RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2222{
2223 if(emulateIntrinsics)
2224 {
2225 Int2 result;
2226 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2227 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2228
2229 return result;
2230 }
2231 else
2232 {
2233 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002234 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002235 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2236 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2237 pmaddwd->addArg(x.value);
2238 pmaddwd->addArg(y.value);
2239 ::basicBlock->appendInst(pmaddwd);
2240
2241 return As<Int2>(V(result));
2242 }
2243}
2244
2245RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2246{
2247 if(emulateIntrinsics)
2248 {
2249 SByte8 result;
2250 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2251 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2252 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2253 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2254 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2255 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2256 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2257 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2258
2259 return result;
2260 }
2261 else
2262 {
2263 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002264 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002265 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2266 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2267 pack->addArg(x.value);
2268 pack->addArg(y.value);
2269 ::basicBlock->appendInst(pack);
2270
2271 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2272 }
2273}
2274
2275RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2276{
2277 if(emulateIntrinsics)
2278 {
2279 Byte8 result;
2280 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2281 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2282 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2283 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2284 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2285 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2286 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2287 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2288
2289 return result;
2290 }
2291 else
2292 {
2293 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002294 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002295 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2296 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2297 pack->addArg(x.value);
2298 pack->addArg(y.value);
2299 ::basicBlock->appendInst(pack);
2300
2301 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2302 }
2303}
2304
2305RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2306{
2307 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2308}
2309
2310RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2311{
2312 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2313}
2314
2315Type *Short4::getType()
2316{
2317 return T(Type_v4i16);
2318}
2319
2320UShort4::UShort4(RValue<Float4> cast, bool saturate)
2321{
2322 if(saturate)
2323 {
2324 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002325 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002326 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2327 // PackUnsigned takes care of 0x0000 saturation.
2328 Int4 int4(Min(cast, Float4(0xFFFF)));
2329 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002330 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002331 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002332 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002333 // ARM saturates the 32-bit integer result on overflow/undeflow.
2334 Int4 int4(cast);
2335 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002336 }
2337 else
2338 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002339 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002340 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002341 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002342 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002343 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002344 *this = Short4(Int4(cast));
2345 }
2346}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002347
Nicolas Capens157ba262019-12-10 17:49:14 -05002348RValue<UShort> Extract(RValue<UShort4> val, int i)
2349{
2350 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2351}
2352
2353RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2354{
2355 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2356}
2357
2358RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2359{
2360 if(emulateIntrinsics)
2361 {
2362 UShort4 result;
2363 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2364 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2365 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2366 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2367
2368 return result;
2369 }
2370 else
2371 {
2372 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2373 }
2374}
2375
2376RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2377{
2378 if(emulateIntrinsics)
2379 {
2380 UShort4 result;
2381 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2382 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2383 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2384 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2385
2386 return result;
2387 }
2388 else
2389 {
2390 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2391 }
2392}
2393
2394RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2395{
2396 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2397 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2398 ::basicBlock->appendInst(cmp);
2399
2400 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2401 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2402 ::basicBlock->appendInst(select);
2403
2404 return RValue<UShort4>(V(result));
2405}
2406
2407RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2408{
2409 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2410 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2411 ::basicBlock->appendInst(cmp);
2412
2413 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2414 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2415 ::basicBlock->appendInst(select);
2416
2417 return RValue<UShort4>(V(result));
2418}
2419
2420RValue<UShort> SaturateUnsigned(RValue<Int> x)
2421{
2422 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2423}
2424
2425RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2426{
2427 if(emulateIntrinsics)
2428 {
2429 UShort4 result;
2430 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2431 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2432 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2433 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2434
2435 return result;
2436 }
2437 else
2438 {
2439 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002440 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002441 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2442 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2443 paddusw->addArg(x.value);
2444 paddusw->addArg(y.value);
2445 ::basicBlock->appendInst(paddusw);
2446
2447 return RValue<UShort4>(V(result));
2448 }
2449}
2450
2451RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2452{
2453 if(emulateIntrinsics)
2454 {
2455 UShort4 result;
2456 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2457 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2458 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2459 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2460
2461 return result;
2462 }
2463 else
2464 {
2465 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002466 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002467 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2468 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2469 psubusw->addArg(x.value);
2470 psubusw->addArg(y.value);
2471 ::basicBlock->appendInst(psubusw);
2472
2473 return RValue<UShort4>(V(result));
2474 }
2475}
2476
2477RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2478{
2479 if(emulateIntrinsics)
2480 {
2481 UShort4 result;
2482 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2483 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2484 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2485 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2486
2487 return result;
2488 }
2489 else
2490 {
2491 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002492 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002493 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2494 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2495 pmulhuw->addArg(x.value);
2496 pmulhuw->addArg(y.value);
2497 ::basicBlock->appendInst(pmulhuw);
2498
2499 return RValue<UShort4>(V(result));
2500 }
2501}
2502
2503RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2504{
2505 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2506
2507 // Scalarized implementation.
2508 Int4 result;
2509 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2510 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2511 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2512 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2513
2514 return result;
2515}
2516
2517RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2518{
2519 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2520
2521 if(false) // Partial product based implementation.
2522 {
2523 auto xh = x >> 16;
2524 auto yh = y >> 16;
2525 auto xl = x & UInt4(0x0000FFFF);
2526 auto yl = y & UInt4(0x0000FFFF);
2527 auto xlyh = xl * yh;
2528 auto xhyl = xh * yl;
2529 auto xlyhh = xlyh >> 16;
2530 auto xhylh = xhyl >> 16;
2531 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2532 auto xhyll = xhyl & UInt4(0x0000FFFF);
2533 auto xlylh = (xl * yl) >> 16;
2534 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2535
2536 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002537 }
2538
Nicolas Capens157ba262019-12-10 17:49:14 -05002539 // Scalarized implementation.
2540 Int4 result;
2541 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
2542 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
2543 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
2544 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
2545
2546 return As<UInt4>(result);
2547}
2548
2549RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2550{
2551 UNIMPLEMENTED("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
2552 return UShort4(0);
2553}
2554
2555Type *UShort4::getType()
2556{
2557 return T(Type_v4i16);
2558}
2559
2560RValue<Short> Extract(RValue<Short8> val, int i)
2561{
2562 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2563}
2564
2565RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2566{
2567 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2568}
2569
2570RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2571{
2572 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002573 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002574 Short8 result;
2575 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2576 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2577 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2578 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
2579 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
2580 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
2581 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
2582 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002583
Nicolas Capens157ba262019-12-10 17:49:14 -05002584 return result;
2585 }
2586 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002587 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002588 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002589 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002590}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002591
Nicolas Capens157ba262019-12-10 17:49:14 -05002592RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
2593{
2594 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002595 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002596 Short8 result;
2597 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2598 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2599 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2600 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2601 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
2602 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
2603 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
2604 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002605
Nicolas Capens157ba262019-12-10 17:49:14 -05002606 return result;
2607 }
2608 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002609 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002610 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002611 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002612}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002613
Nicolas Capens157ba262019-12-10 17:49:14 -05002614RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
2615{
2616 UNIMPLEMENTED("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
2617 return Int4(0);
2618}
2619
2620RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
2621{
2622 UNIMPLEMENTED("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
2623 return Short8(0);
2624}
2625
2626Type *Short8::getType()
2627{
2628 return T(Ice::IceType_v8i16);
2629}
2630
2631RValue<UShort> Extract(RValue<UShort8> val, int i)
2632{
2633 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2634}
2635
2636RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
2637{
2638 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
2639}
2640
2641RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
2642{
2643 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002644 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002645 UShort8 result;
2646 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2647 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2648 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2649 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2650 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
2651 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
2652 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
2653 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002654
Nicolas Capens157ba262019-12-10 17:49:14 -05002655 return result;
2656 }
2657 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002658 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002659 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002660 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002661}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002662
Nicolas Capens157ba262019-12-10 17:49:14 -05002663RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
2664{
2665 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002666 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002667 UShort8 result;
2668 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2669 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2670 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2671 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2672 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
2673 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
2674 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
2675 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002676
Nicolas Capens157ba262019-12-10 17:49:14 -05002677 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002678 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002679 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002680 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002681 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002682 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002683}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002684
Nicolas Capens157ba262019-12-10 17:49:14 -05002685RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
2686{
2687 UNIMPLEMENTED("RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)");
2688 return UShort8(0);
2689}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002690
Nicolas Capens157ba262019-12-10 17:49:14 -05002691RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
2692{
2693 UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
2694 return UShort8(0);
2695}
2696
2697// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
Nicolas Capens598f8d82016-09-26 15:09:10 -04002698// RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
2699// {
Ben Claytoneb50d252019-04-15 13:50:01 -04002700// ASSERT(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002701// }
2702
Nicolas Capens157ba262019-12-10 17:49:14 -05002703Type *UShort8::getType()
2704{
2705 return T(Ice::IceType_v8i16);
2706}
2707
Ben Clayton713b8d32019-12-17 20:37:56 +00002708RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002709{
2710 RValue<Int> res = val;
2711 val += 1;
2712 return res;
2713}
2714
Ben Clayton713b8d32019-12-17 20:37:56 +00002715const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002716{
2717 val += 1;
2718 return val;
2719}
2720
Ben Clayton713b8d32019-12-17 20:37:56 +00002721RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002722{
2723 RValue<Int> res = val;
2724 val -= 1;
2725 return res;
2726}
2727
Ben Clayton713b8d32019-12-17 20:37:56 +00002728const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002729{
2730 val -= 1;
2731 return val;
2732}
2733
2734RValue<Int> RoundInt(RValue<Float> cast)
2735{
2736 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002737 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002738 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2739 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002740 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002741 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002742 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002743 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002744 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002745 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2746 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2747 nearbyint->addArg(cast.value);
2748 ::basicBlock->appendInst(nearbyint);
2749
2750 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002751 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002752}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002753
Nicolas Capens157ba262019-12-10 17:49:14 -05002754Type *Int::getType()
2755{
2756 return T(Ice::IceType_i32);
2757}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002758
Nicolas Capens157ba262019-12-10 17:49:14 -05002759Type *Long::getType()
2760{
2761 return T(Ice::IceType_i64);
2762}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002763
Nicolas Capens157ba262019-12-10 17:49:14 -05002764UInt::UInt(RValue<Float> cast)
2765{
2766 // Smallest positive value representable in UInt, but not in Int
2767 const unsigned int ustart = 0x80000000u;
2768 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002769
Nicolas Capens157ba262019-12-10 17:49:14 -05002770 // If the value is negative, store 0, otherwise store the result of the conversion
2771 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00002772 // Check if the value can be represented as an Int
2773 IfThenElse(cast >= ustartf,
2774 // If the value is too large, subtract ustart and re-add it after conversion.
2775 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2776 // Otherwise, just convert normally
2777 Int(cast)))
2778 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05002779}
Nicolas Capensa8086512016-11-07 17:32:17 -05002780
Ben Clayton713b8d32019-12-17 20:37:56 +00002781RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002782{
2783 RValue<UInt> res = val;
2784 val += 1;
2785 return res;
2786}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002787
Ben Clayton713b8d32019-12-17 20:37:56 +00002788const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002789{
2790 val += 1;
2791 return val;
2792}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002793
Ben Clayton713b8d32019-12-17 20:37:56 +00002794RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002795{
2796 RValue<UInt> res = val;
2797 val -= 1;
2798 return res;
2799}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002800
Ben Clayton713b8d32019-12-17 20:37:56 +00002801const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002802{
2803 val -= 1;
2804 return val;
2805}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002806
Nicolas Capens598f8d82016-09-26 15:09:10 -04002807// RValue<UInt> RoundUInt(RValue<Float> cast)
2808// {
Ben Claytoneb50d252019-04-15 13:50:01 -04002809// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002810// }
2811
Nicolas Capens157ba262019-12-10 17:49:14 -05002812Type *UInt::getType()
2813{
2814 return T(Ice::IceType_i32);
2815}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002816
2817// Int2::Int2(RValue<Int> cast)
2818// {
2819// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2820// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
2821//
2822// Constant *shuffle[2];
2823// shuffle[0] = Nucleus::createConstantInt(0);
2824// shuffle[1] = Nucleus::createConstantInt(0);
2825//
2826// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
2827//
2828// storeValue(replicate);
2829// }
2830
Nicolas Capens157ba262019-12-10 17:49:14 -05002831RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
2832{
2833 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002834 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002835 Int2 result;
2836 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2837 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002838
Nicolas Capens157ba262019-12-10 17:49:14 -05002839 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002840 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002841 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002842 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002843 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002844 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002845}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002846
Nicolas Capens157ba262019-12-10 17:49:14 -05002847RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
2848{
2849 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002850 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002851 Int2 result;
2852 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2853 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2854
2855 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002856 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002857 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002858 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002859 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002860 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002861}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002862
Nicolas Capens157ba262019-12-10 17:49:14 -05002863Type *Int2::getType()
2864{
2865 return T(Type_v2i32);
2866}
2867
2868RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
2869{
2870 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002871 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002872 UInt2 result;
2873 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
2874 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002875
Nicolas Capens157ba262019-12-10 17:49:14 -05002876 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002877 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002878 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002879 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002880 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002881 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002882}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002883
Nicolas Capens157ba262019-12-10 17:49:14 -05002884RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
2885{
2886 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002887 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002888 UInt2 result;
2889 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
2890 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05002891
Nicolas Capens157ba262019-12-10 17:49:14 -05002892 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002893 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002894 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002895 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002896 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002897 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002898}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002899
Nicolas Capens157ba262019-12-10 17:49:14 -05002900Type *UInt2::getType()
2901{
2902 return T(Type_v2i32);
2903}
2904
Ben Clayton713b8d32019-12-17 20:37:56 +00002905Int4::Int4(RValue<Byte4> cast)
2906 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002907{
2908 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2909 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2910
2911 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00002912 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002913 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2914 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
2915
Ben Clayton713b8d32019-12-17 20:37:56 +00002916 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002917 Value *d = Nucleus::createBitCast(c, Short8::getType());
2918 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
2919
2920 Value *f = Nucleus::createBitCast(e, Int4::getType());
2921 storeValue(f);
2922}
2923
Ben Clayton713b8d32019-12-17 20:37:56 +00002924Int4::Int4(RValue<SByte4> cast)
2925 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002926{
2927 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2928 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2929
Ben Clayton713b8d32019-12-17 20:37:56 +00002930 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002931 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2932 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
2933
Ben Clayton713b8d32019-12-17 20:37:56 +00002934 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002935 Value *d = Nucleus::createBitCast(c, Short8::getType());
2936 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
2937
2938 *this = As<Int4>(e) >> 24;
2939}
2940
Ben Clayton713b8d32019-12-17 20:37:56 +00002941Int4::Int4(RValue<Short4> cast)
2942 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002943{
Ben Clayton713b8d32019-12-17 20:37:56 +00002944 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002945 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2946
2947 *this = As<Int4>(c) >> 16;
2948}
2949
Ben Clayton713b8d32019-12-17 20:37:56 +00002950Int4::Int4(RValue<UShort4> cast)
2951 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002952{
Ben Clayton713b8d32019-12-17 20:37:56 +00002953 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002954 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2955 Value *d = Nucleus::createBitCast(c, Int4::getType());
2956 storeValue(d);
2957}
2958
Ben Clayton713b8d32019-12-17 20:37:56 +00002959Int4::Int4(RValue<Int> rhs)
2960 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002961{
2962 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
2963
Ben Clayton713b8d32019-12-17 20:37:56 +00002964 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002965 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
2966
2967 storeValue(replicate);
2968}
2969
2970RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
2971{
2972 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002973 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002974 Int4 result;
2975 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2976 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
2977 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
2978 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002979
Nicolas Capens157ba262019-12-10 17:49:14 -05002980 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002981 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002982 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002983 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002984 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002985 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002986}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002987
Nicolas Capens157ba262019-12-10 17:49:14 -05002988RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
2989{
2990 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002991 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002992 Int4 result;
2993 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2994 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2995 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
2996 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05002997
Nicolas Capens157ba262019-12-10 17:49:14 -05002998 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002999 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003000 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003001 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003002 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003003 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003004}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003005
Nicolas Capens157ba262019-12-10 17:49:14 -05003006RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
3007{
3008 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
3009}
3010
3011RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
3012{
3013 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
3014}
3015
3016RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
3017{
3018 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
3019}
3020
3021RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
3022{
3023 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
3024}
3025
3026RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
3027{
3028 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
3029}
3030
3031RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
3032{
3033 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
3034}
3035
3036RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
3037{
3038 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3039 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3040 ::basicBlock->appendInst(cmp);
3041
3042 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3043 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3044 ::basicBlock->appendInst(select);
3045
3046 return RValue<Int4>(V(result));
3047}
3048
3049RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3050{
3051 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3052 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3053 ::basicBlock->appendInst(cmp);
3054
3055 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3056 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3057 ::basicBlock->appendInst(select);
3058
3059 return RValue<Int4>(V(result));
3060}
3061
3062RValue<Int4> RoundInt(RValue<Float4> cast)
3063{
3064 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003065 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003066 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3067 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003068 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003069 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003070 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003071 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003072 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003073 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3074 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3075 nearbyint->addArg(cast.value);
3076 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003077
3078 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003079 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003080}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003081
Nicolas Capens157ba262019-12-10 17:49:14 -05003082RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3083{
3084 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003085 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003086 Short8 result;
3087 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3088 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3089 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3090 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3091 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3092 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3093 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3094 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003095
Nicolas Capens157ba262019-12-10 17:49:14 -05003096 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003097 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003098 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003099 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003100 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003101 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003102 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3103 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3104 pack->addArg(x.value);
3105 pack->addArg(y.value);
3106 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003107
Nicolas Capens157ba262019-12-10 17:49:14 -05003108 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003109 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003110}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003111
Nicolas Capens157ba262019-12-10 17:49:14 -05003112RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3113{
3114 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003115 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003116 RValue<Int4> sx = As<Int4>(x);
3117 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003118
Nicolas Capens157ba262019-12-10 17:49:14 -05003119 RValue<Int4> sy = As<Int4>(y);
3120 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003121
Nicolas Capens157ba262019-12-10 17:49:14 -05003122 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003123 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003124 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003125 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003126 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003127 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003128 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3129 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3130 pack->addArg(x.value);
3131 pack->addArg(y.value);
3132 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003133
Nicolas Capens157ba262019-12-10 17:49:14 -05003134 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003135 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003136}
Nicolas Capens33438a62017-09-27 11:47:35 -04003137
Nicolas Capens157ba262019-12-10 17:49:14 -05003138RValue<Int> SignMask(RValue<Int4> x)
3139{
3140 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003141 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003142 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3143 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003144 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003145 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003146 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003147 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003148 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003149 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3150 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3151 movmsk->addArg(x.value);
3152 ::basicBlock->appendInst(movmsk);
3153
3154 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003155 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003156}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003157
Nicolas Capens157ba262019-12-10 17:49:14 -05003158Type *Int4::getType()
3159{
3160 return T(Ice::IceType_v4i32);
3161}
3162
Ben Clayton713b8d32019-12-17 20:37:56 +00003163UInt4::UInt4(RValue<Float4> cast)
3164 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003165{
3166 // Smallest positive value representable in UInt, but not in Int
3167 const unsigned int ustart = 0x80000000u;
3168 const float ustartf = float(ustart);
3169
3170 // Check if the value can be represented as an Int
3171 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3172 // If the value is too large, subtract ustart and re-add it after conversion.
3173 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003174 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003175 (~uiValue & Int4(cast));
3176 // If the value is negative, store 0, otherwise store the result of the conversion
3177 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3178}
3179
Ben Clayton713b8d32019-12-17 20:37:56 +00003180UInt4::UInt4(RValue<UInt> rhs)
3181 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003182{
3183 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3184
Ben Clayton713b8d32019-12-17 20:37:56 +00003185 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003186 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3187
3188 storeValue(replicate);
3189}
3190
3191RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3192{
3193 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003194 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003195 UInt4 result;
3196 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3197 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3198 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3199 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003200
Nicolas Capens157ba262019-12-10 17:49:14 -05003201 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003202 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003203 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003204 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003205 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003206 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003207}
Ben Clayton88816fa2019-05-15 17:08:14 +01003208
Nicolas Capens157ba262019-12-10 17:49:14 -05003209RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3210{
3211 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003212 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003213 UInt4 result;
3214 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3215 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3216 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3217 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003218
Nicolas Capens157ba262019-12-10 17:49:14 -05003219 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003220 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003221 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003222 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003223 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003224 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003225}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003226
Nicolas Capens157ba262019-12-10 17:49:14 -05003227RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3228{
3229 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3230}
3231
3232RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3233{
3234 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3235}
3236
3237RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3238{
3239 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3240}
3241
3242RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3243{
3244 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3245}
3246
3247RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3248{
3249 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3250}
3251
3252RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3253{
3254 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3255}
3256
3257RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3258{
3259 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3260 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3261 ::basicBlock->appendInst(cmp);
3262
3263 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3264 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3265 ::basicBlock->appendInst(select);
3266
3267 return RValue<UInt4>(V(result));
3268}
3269
3270RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3271{
3272 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3273 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3274 ::basicBlock->appendInst(cmp);
3275
3276 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3277 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3278 ::basicBlock->appendInst(select);
3279
3280 return RValue<UInt4>(V(result));
3281}
3282
3283Type *UInt4::getType()
3284{
3285 return T(Ice::IceType_v4i32);
3286}
3287
3288Type *Half::getType()
3289{
3290 return T(Ice::IceType_i16);
3291}
3292
3293RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3294{
3295 return 1.0f / x;
3296}
3297
3298RValue<Float> RcpSqrt_pp(RValue<Float> x)
3299{
3300 return Rcp_pp(Sqrt(x));
3301}
3302
3303RValue<Float> Sqrt(RValue<Float> x)
3304{
3305 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003306 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003307 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3308 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3309 sqrt->addArg(x.value);
3310 ::basicBlock->appendInst(sqrt);
3311
3312 return RValue<Float>(V(result));
3313}
3314
3315RValue<Float> Round(RValue<Float> x)
3316{
3317 return Float4(Round(Float4(x))).x;
3318}
3319
3320RValue<Float> Trunc(RValue<Float> x)
3321{
3322 return Float4(Trunc(Float4(x))).x;
3323}
3324
3325RValue<Float> Frac(RValue<Float> x)
3326{
3327 return Float4(Frac(Float4(x))).x;
3328}
3329
3330RValue<Float> Floor(RValue<Float> x)
3331{
3332 return Float4(Floor(Float4(x))).x;
3333}
3334
3335RValue<Float> Ceil(RValue<Float> x)
3336{
3337 return Float4(Ceil(Float4(x))).x;
3338}
3339
3340Type *Float::getType()
3341{
3342 return T(Ice::IceType_f32);
3343}
3344
3345Type *Float2::getType()
3346{
3347 return T(Type_v2f32);
3348}
3349
Ben Clayton713b8d32019-12-17 20:37:56 +00003350Float4::Float4(RValue<Float> rhs)
3351 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003352{
3353 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3354
Ben Clayton713b8d32019-12-17 20:37:56 +00003355 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003356 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3357
3358 storeValue(replicate);
3359}
3360
3361RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3362{
3363 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3364 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3365 ::basicBlock->appendInst(cmp);
3366
3367 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3368 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3369 ::basicBlock->appendInst(select);
3370
3371 return RValue<Float4>(V(result));
3372}
3373
3374RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3375{
3376 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3377 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3378 ::basicBlock->appendInst(cmp);
3379
3380 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3381 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3382 ::basicBlock->appendInst(select);
3383
3384 return RValue<Float4>(V(result));
3385}
3386
3387RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3388{
3389 return Float4(1.0f) / x;
3390}
3391
3392RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3393{
3394 return Rcp_pp(Sqrt(x));
3395}
3396
3397RValue<Float4> Sqrt(RValue<Float4> x)
3398{
3399 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003400 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003401 Float4 result;
3402 result.x = Sqrt(Float(Float4(x).x));
3403 result.y = Sqrt(Float(Float4(x).y));
3404 result.z = Sqrt(Float(Float4(x).z));
3405 result.w = Sqrt(Float(Float4(x).w));
3406
3407 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003408 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003409 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003410 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003411 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003412 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003413 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3414 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3415 sqrt->addArg(x.value);
3416 ::basicBlock->appendInst(sqrt);
3417
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003418 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003419 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003420}
Nicolas Capens157ba262019-12-10 17:49:14 -05003421
3422RValue<Int> SignMask(RValue<Float4> x)
3423{
3424 if(emulateIntrinsics || CPUID::ARM)
3425 {
3426 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3427 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3428 }
3429 else
3430 {
3431 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003432 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003433 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3434 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3435 movmsk->addArg(x.value);
3436 ::basicBlock->appendInst(movmsk);
3437
3438 return RValue<Int>(V(result));
3439 }
3440}
3441
3442RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3443{
3444 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3445}
3446
3447RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3448{
3449 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3450}
3451
3452RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3453{
3454 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3455}
3456
3457RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3458{
3459 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3460}
3461
3462RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3463{
3464 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3465}
3466
3467RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3468{
3469 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
3470}
3471
3472RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3473{
3474 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3475}
3476
3477RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3478{
3479 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3480}
3481
3482RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3483{
3484 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3485}
3486
3487RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3488{
3489 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3490}
3491
3492RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3493{
3494 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3495}
3496
3497RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3498{
3499 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3500}
3501
3502RValue<Float4> Round(RValue<Float4> x)
3503{
3504 if(emulateIntrinsics || CPUID::ARM)
3505 {
3506 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3507 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
3508 }
3509 else if(CPUID::SSE4_1)
3510 {
3511 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003512 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003513 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3514 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3515 round->addArg(x.value);
3516 round->addArg(::context->getConstantInt32(0));
3517 ::basicBlock->appendInst(round);
3518
3519 return RValue<Float4>(V(result));
3520 }
3521 else
3522 {
3523 return Float4(RoundInt(x));
3524 }
3525}
3526
3527RValue<Float4> Trunc(RValue<Float4> x)
3528{
3529 if(CPUID::SSE4_1)
3530 {
3531 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003532 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003533 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3534 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3535 round->addArg(x.value);
3536 round->addArg(::context->getConstantInt32(3));
3537 ::basicBlock->appendInst(round);
3538
3539 return RValue<Float4>(V(result));
3540 }
3541 else
3542 {
3543 return Float4(Int4(x));
3544 }
3545}
3546
3547RValue<Float4> Frac(RValue<Float4> x)
3548{
3549 Float4 frc;
3550
3551 if(CPUID::SSE4_1)
3552 {
3553 frc = x - Floor(x);
3554 }
3555 else
3556 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003557 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05003558
Ben Clayton713b8d32019-12-17 20:37:56 +00003559 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05003560 }
3561
3562 // x - floor(x) can be 1.0 for very small negative x.
3563 // Clamp against the value just below 1.0.
3564 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
3565}
3566
3567RValue<Float4> Floor(RValue<Float4> x)
3568{
3569 if(CPUID::SSE4_1)
3570 {
3571 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003572 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003573 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3574 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3575 round->addArg(x.value);
3576 round->addArg(::context->getConstantInt32(1));
3577 ::basicBlock->appendInst(round);
3578
3579 return RValue<Float4>(V(result));
3580 }
3581 else
3582 {
3583 return x - Frac(x);
3584 }
3585}
3586
3587RValue<Float4> Ceil(RValue<Float4> x)
3588{
3589 if(CPUID::SSE4_1)
3590 {
3591 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003592 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003593 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3594 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3595 round->addArg(x.value);
3596 round->addArg(::context->getConstantInt32(2));
3597 ::basicBlock->appendInst(round);
3598
3599 return RValue<Float4>(V(result));
3600 }
3601 else
3602 {
3603 return -Floor(-x);
3604 }
3605}
3606
3607Type *Float4::getType()
3608{
3609 return T(Ice::IceType_v4f32);
3610}
3611
3612RValue<Long> Ticks()
3613{
3614 UNIMPLEMENTED("RValue<Long> Ticks()");
3615 return Long(Int(0));
3616}
3617
Ben Clayton713b8d32019-12-17 20:37:56 +00003618RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05003619{
Antonio Maiorano02a39532020-01-21 15:15:34 -05003620 return RValue<Pointer<Byte>>{ V(sz::getConstantPointer(::context, ptr)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05003621}
3622
Ben Clayton713b8d32019-12-17 20:37:56 +00003623RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05003624{
Antonio Maiorano02a39532020-01-21 15:15:34 -05003625 return RValue<Pointer<Byte>>{ V(IceConstantData(data, size)) };
Nicolas Capens157ba262019-12-10 17:49:14 -05003626}
3627
Ben Clayton713b8d32019-12-17 20:37:56 +00003628Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05003629{
3630 Ice::Variable *ret = nullptr;
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003631 if(retTy != nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05003632 {
3633 ret = ::function->makeVariable(T(retTy));
3634 }
3635 auto call = Ice::InstCall::create(::function, args.size(), ret, V(fptr.value), false);
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003636 for(auto arg : args)
Nicolas Capens157ba262019-12-10 17:49:14 -05003637 {
3638 call->addArg(V(arg));
3639 }
3640 ::basicBlock->appendInst(call);
3641 return V(ret);
3642}
3643
3644void Breakpoint()
3645{
Ben Clayton713b8d32019-12-17 20:37:56 +00003646 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003647 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3648 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3649 ::basicBlock->appendInst(trap);
3650}
3651
Ben Clayton713b8d32019-12-17 20:37:56 +00003652void Nucleus::createFence(std::memory_order memoryOrder)
3653{
Antonio Maiorano370cba52019-12-31 11:36:07 -05003654 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3655 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3656 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3657 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
3658 inst->addArg(order);
3659 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00003660}
Antonio Maiorano370cba52019-12-31 11:36:07 -05003661
Ben Clayton713b8d32019-12-17 20:37:56 +00003662Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
3663{
3664 UNIMPLEMENTED("Subzero createMaskedLoad()");
3665 return nullptr;
3666}
3667void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
3668{
3669 UNIMPLEMENTED("Subzero createMaskedStore()");
3670}
Nicolas Capens157ba262019-12-10 17:49:14 -05003671
3672RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3673{
3674 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3675}
3676
3677RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3678{
3679 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3680}
3681
3682void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3683{
3684 return emulated::Scatter(base, val, offsets, mask, alignment);
3685}
3686
3687void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3688{
3689 return emulated::Scatter(base, val, offsets, mask, alignment);
3690}
3691
3692RValue<Float> Exp2(RValue<Float> x)
3693{
3694 return emulated::Exp2(x);
3695}
3696
3697RValue<Float> Log2(RValue<Float> x)
3698{
3699 return emulated::Log2(x);
3700}
3701
3702RValue<Float4> Sin(RValue<Float4> x)
3703{
3704 return emulated::Sin(x);
3705}
3706
3707RValue<Float4> Cos(RValue<Float4> x)
3708{
3709 return emulated::Cos(x);
3710}
3711
3712RValue<Float4> Tan(RValue<Float4> x)
3713{
3714 return emulated::Tan(x);
3715}
3716
3717RValue<Float4> Asin(RValue<Float4> x)
3718{
3719 return emulated::Asin(x);
3720}
3721
3722RValue<Float4> Acos(RValue<Float4> x)
3723{
3724 return emulated::Acos(x);
3725}
3726
3727RValue<Float4> Atan(RValue<Float4> x)
3728{
3729 return emulated::Atan(x);
3730}
3731
3732RValue<Float4> Sinh(RValue<Float4> x)
3733{
3734 return emulated::Sinh(x);
3735}
3736
3737RValue<Float4> Cosh(RValue<Float4> x)
3738{
3739 return emulated::Cosh(x);
3740}
3741
3742RValue<Float4> Tanh(RValue<Float4> x)
3743{
3744 return emulated::Tanh(x);
3745}
3746
3747RValue<Float4> Asinh(RValue<Float4> x)
3748{
3749 return emulated::Asinh(x);
3750}
3751
3752RValue<Float4> Acosh(RValue<Float4> x)
3753{
3754 return emulated::Acosh(x);
3755}
3756
3757RValue<Float4> Atanh(RValue<Float4> x)
3758{
3759 return emulated::Atanh(x);
3760}
3761
3762RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3763{
3764 return emulated::Atan2(x, y);
3765}
3766
3767RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3768{
3769 return emulated::Pow(x, y);
3770}
3771
3772RValue<Float4> Exp(RValue<Float4> x)
3773{
3774 return emulated::Exp(x);
3775}
3776
3777RValue<Float4> Log(RValue<Float4> x)
3778{
3779 return emulated::Log(x);
3780}
3781
3782RValue<Float4> Exp2(RValue<Float4> x)
3783{
3784 return emulated::Exp2(x);
3785}
3786
3787RValue<Float4> Log2(RValue<Float4> x)
3788{
3789 return emulated::Log2(x);
3790}
3791
3792RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
3793{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003794 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003795 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003796 UNIMPLEMENTED("Subzero Ctlz()");
3797 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003798 }
3799 else
3800 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003801 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05003802 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3803 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3804 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3805 ctlz->addArg(x.value);
3806 ::basicBlock->appendInst(ctlz);
3807
3808 return RValue<UInt>(V(result));
3809 }
3810}
3811
3812RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
3813{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003814 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003815 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003816 UNIMPLEMENTED("Subzero Ctlz()");
3817 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003818 }
3819 else
3820 {
3821 // TODO: implement vectorized version in Subzero
3822 UInt4 result;
3823 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
3824 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
3825 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
3826 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
3827 return result;
3828 }
3829}
3830
3831RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
3832{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003833 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003834 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003835 UNIMPLEMENTED("Subzero Cttz()");
3836 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003837 }
3838 else
3839 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003840 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05003841 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3842 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3843 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3844 ctlz->addArg(x.value);
3845 ::basicBlock->appendInst(ctlz);
3846
3847 return RValue<UInt>(V(result));
3848 }
3849}
3850
3851RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
3852{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003853 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003854 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003855 UNIMPLEMENTED("Subzero Cttz()");
3856 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003857 }
3858 else
3859 {
3860 // TODO: implement vectorized version in Subzero
3861 UInt4 result;
3862 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
3863 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
3864 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
3865 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
3866 return result;
3867 }
3868}
3869
Antonio Maiorano370cba52019-12-31 11:36:07 -05003870RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
3871{
3872 return emulated::MinAtomic(x, y, memoryOrder);
3873}
3874
3875RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
3876{
3877 return emulated::MinAtomic(x, y, memoryOrder);
3878}
3879
3880RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
3881{
3882 return emulated::MaxAtomic(x, y, memoryOrder);
3883}
3884
3885RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
3886{
3887 return emulated::MaxAtomic(x, y, memoryOrder);
3888}
3889
Nicolas Capens157ba262019-12-10 17:49:14 -05003890void EmitDebugLocation() {}
Ben Clayton713b8d32019-12-17 20:37:56 +00003891void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05003892void FlushDebug() {}
3893
Ben Clayton713b8d32019-12-17 20:37:56 +00003894void Nucleus::createCoroutine(Type *YieldType, std::vector<Type *> &Params)
Nicolas Capens157ba262019-12-10 17:49:14 -05003895{
3896 // Subzero currently only supports coroutines as functions (i.e. that do not yield)
3897 createFunction(YieldType, Params);
3898}
3899
Ben Clayton713b8d32019-12-17 20:37:56 +00003900static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
3901{
3902 return false;
3903}
Nicolas Capens157ba262019-12-10 17:49:14 -05003904static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle) {}
3905
3906std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
3907{
3908 // acquireRoutine sets the CoroutineEntryBegin entry
3909 auto coroutineEntry = acquireRoutine(name, cfgEdit);
3910
3911 // For now, set the await and destroy entries to stubs, until we add proper coroutine support to the Subzero backend
3912 auto routine = std::static_pointer_cast<ELFMemoryStreamer>(coroutineEntry);
Ben Clayton713b8d32019-12-17 20:37:56 +00003913 routine->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
3914 routine->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
Nicolas Capens157ba262019-12-10 17:49:14 -05003915
3916 return coroutineEntry;
3917}
3918
Ben Clayton713b8d32019-12-17 20:37:56 +00003919void Nucleus::yield(Value *val)
3920{
3921 UNIMPLEMENTED("Yield");
3922}
Nicolas Capens157ba262019-12-10 17:49:14 -05003923
3924} // namespace rr