blob: 3c2888f1c40def2ade97fccb29937aff4d8f4ad7 [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Ben Claytoneb50d252019-04-15 13:50:01 -040015#include "Debug.hpp"
Antonio Maioranoe6ab4702019-11-29 11:26:30 -050016#include "EmulatedReactor.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000017#include "Reactor.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040018
Nicolas Capens1a3ce872018-10-10 10:42:36 -040019#include "ExecutableMemory.hpp"
Ben Clayton713b8d32019-12-17 20:37:56 +000020#include "Optimizer.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040021
Nicolas Capens598f8d82016-09-26 15:09:10 -040022#include "src/IceCfg.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040023#include "src/IceCfgNode.h"
24#include "src/IceELFObjectWriter.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000025#include "src/IceELFStreamer.h"
26#include "src/IceGlobalContext.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040027#include "src/IceGlobalInits.h"
Ben Clayton713b8d32019-12-17 20:37:56 +000028#include "src/IceTypes.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040029
Ben Clayton713b8d32019-12-17 20:37:56 +000030#include "llvm/Support/Compiler.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040031#include "llvm/Support/FileSystem.h"
32#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040033
34#if __has_feature(memory_sanitizer)
Ben Clayton713b8d32019-12-17 20:37:56 +000035# include <sanitizer/msan_interface.h>
Nicolas Capens6a990f82018-07-06 15:54:07 -040036#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040037
Nicolas Capensbd65da92017-01-05 16:31:06 -050038#if defined(_WIN32)
Ben Clayton713b8d32019-12-17 20:37:56 +000039# ifndef WIN32_LEAN_AND_MEAN
40# define WIN32_LEAN_AND_MEAN
41# endif // !WIN32_LEAN_AND_MEAN
42# ifndef NOMINMAX
43# define NOMINMAX
44# endif // !NOMINMAX
45# include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050046#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040047
Nicolas Capens598f8d82016-09-26 15:09:10 -040048#include <iostream>
Ben Clayton713b8d32019-12-17 20:37:56 +000049#include <limits>
50#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Ben Clayton713b8d32019-12-17 20:37:56 +000052namespace rr {
53class ELFMemoryStreamer;
54}
Nicolas Capens157ba262019-12-10 17:49:14 -050055
56namespace {
57
58// Default configuration settings. Must be accessed under mutex lock.
59std::mutex defaultConfigLock;
60rr::Config &defaultConfig()
Ben Claytonb7eb3a82019-11-19 00:43:50 +000061{
Nicolas Capens157ba262019-12-10 17:49:14 -050062 // This uses a static in a function to avoid the cost of a global static
63 // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
64 static rr::Config config = rr::Config::Edit()
Ben Clayton713b8d32019-12-17 20:37:56 +000065 .apply({});
Nicolas Capens157ba262019-12-10 17:49:14 -050066 return config;
Ben Claytonb7eb3a82019-11-19 00:43:50 +000067}
68
Nicolas Capens157ba262019-12-10 17:49:14 -050069Ice::GlobalContext *context = nullptr;
70Ice::Cfg *function = nullptr;
71Ice::CfgNode *basicBlock = nullptr;
72Ice::CfgLocalAllocatorScope *allocator = nullptr;
73rr::ELFMemoryStreamer *routine = nullptr;
74
75std::mutex codegenMutex;
76
77Ice::ELFFileStreamer *elfFile = nullptr;
78Ice::Fdstream *out = nullptr;
79
80} // Anonymous namespace
81
82namespace {
83
84#if !defined(__i386__) && defined(_M_IX86)
Ben Clayton713b8d32019-12-17 20:37:56 +000085# define __i386__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -050086#endif
87
Ben Clayton713b8d32019-12-17 20:37:56 +000088#if !defined(__x86_64__) && (defined(_M_AMD64) || defined(_M_X64))
89# define __x86_64__ 1
Nicolas Capens157ba262019-12-10 17:49:14 -050090#endif
91
Antonio Maiorano370cba52019-12-31 11:36:07 -050092Ice::OptLevel toIce(rr::Optimization::Level level)
Nicolas Capens598f8d82016-09-26 15:09:10 -040093{
Nicolas Capens81bc9d92019-12-16 15:05:57 -050094 switch(level)
Ben Clayton55bc37a2019-07-04 12:17:12 +010095 {
Nicolas Capens157ba262019-12-10 17:49:14 -050096 // Note that Opt_0 and Opt_1 are not implemented by Subzero
Ben Clayton713b8d32019-12-17 20:37:56 +000097 case rr::Optimization::Level::None: return Ice::Opt_m1;
98 case rr::Optimization::Level::Less: return Ice::Opt_m1;
99 case rr::Optimization::Level::Default: return Ice::Opt_2;
Nicolas Capens157ba262019-12-10 17:49:14 -0500100 case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
101 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
Ben Clayton55bc37a2019-07-04 12:17:12 +0100102 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500103 return Ice::Opt_2;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400104}
105
Antonio Maiorano370cba52019-12-31 11:36:07 -0500106Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
107{
108 switch(memoryOrder)
109 {
110 case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
111 case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
112 case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
113 case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
114 case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
115 case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
116 }
117 return Ice::Intrinsics::MemoryOrderInvalid;
118}
119
Nicolas Capens157ba262019-12-10 17:49:14 -0500120class CPUID
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500121{
Nicolas Capens157ba262019-12-10 17:49:14 -0500122public:
123 const static bool ARM;
124 const static bool SSE4_1;
Nicolas Capens47dc8672017-04-25 12:54:39 -0400125
Nicolas Capens157ba262019-12-10 17:49:14 -0500126private:
127 static void cpuid(int registers[4], int info)
Ben Clayton55bc37a2019-07-04 12:17:12 +0100128 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000129#if defined(__i386__) || defined(__x86_64__)
130# if defined(_WIN32)
131 __cpuid(registers, info);
132# else
133 __asm volatile("cpuid"
134 : "=a"(registers[0]), "=b"(registers[1]), "=c"(registers[2]), "=d"(registers[3])
135 : "a"(info));
136# endif
137#else
138 registers[0] = 0;
139 registers[1] = 0;
140 registers[2] = 0;
141 registers[3] = 0;
142#endif
Ben Clayton55bc37a2019-07-04 12:17:12 +0100143 }
144
Nicolas Capens157ba262019-12-10 17:49:14 -0500145 static bool detectARM()
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500146 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000147#if defined(__arm__) || defined(__aarch64__)
148 return true;
149#elif defined(__i386__) || defined(__x86_64__)
150 return false;
151#elif defined(__mips__)
152 return false;
153#else
154# error "Unknown architecture"
155#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500156 }
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500157
Nicolas Capens157ba262019-12-10 17:49:14 -0500158 static bool detectSSE4_1()
159 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000160#if defined(__i386__) || defined(__x86_64__)
161 int registers[4];
162 cpuid(registers, 1);
163 return (registers[2] & 0x00080000) != 0;
164#else
165 return false;
166#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500167 }
168};
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500169
Nicolas Capens157ba262019-12-10 17:49:14 -0500170const bool CPUID::ARM = CPUID::detectARM();
171const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
172const bool emulateIntrinsics = false;
173const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400174
Nicolas Capens157ba262019-12-10 17:49:14 -0500175constexpr bool subzeroDumpEnabled = false;
176constexpr bool subzeroEmitTextAsm = false;
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500177
178#if !ALLOW_DUMP
Nicolas Capens157ba262019-12-10 17:49:14 -0500179static_assert(!subzeroDumpEnabled, "Compile Subzero with ALLOW_DUMP=1 for subzeroDumpEnabled");
180static_assert(!subzeroEmitTextAsm, "Compile Subzero with ALLOW_DUMP=1 for subzeroEmitTextAsm");
Antonio Maiorano05ac79a2019-11-20 15:45:13 -0500181#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500182
183} // anonymous namespace
184
185namespace rr {
186
Antonio Maioranoab210f92019-12-13 16:26:24 -0500187std::string BackendName()
188{
189 return "Subzero";
190}
191
Ben Clayton713b8d32019-12-17 20:37:56 +0000192const Capabilities Caps = {
193 false, // CoroutinesSupported
Nicolas Capens157ba262019-12-10 17:49:14 -0500194};
195
196enum EmulatedType
197{
198 EmulatedShift = 16,
199 EmulatedV2 = 2 << EmulatedShift,
200 EmulatedV4 = 4 << EmulatedShift,
201 EmulatedV8 = 8 << EmulatedShift,
202 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
203
204 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
205 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
206 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
Ben Clayton713b8d32019-12-17 20:37:56 +0000207 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
208 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens157ba262019-12-10 17:49:14 -0500209 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
210};
211
Ben Clayton713b8d32019-12-17 20:37:56 +0000212class Value : public Ice::Operand
213{};
214class SwitchCases : public Ice::InstSwitch
215{};
216class BasicBlock : public Ice::CfgNode
217{};
Nicolas Capens157ba262019-12-10 17:49:14 -0500218
219Ice::Type T(Type *t)
220{
221 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
222 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500223}
224
Nicolas Capens157ba262019-12-10 17:49:14 -0500225Type *T(Ice::Type t)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400226{
Ben Clayton713b8d32019-12-17 20:37:56 +0000227 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500228}
229
230Type *T(EmulatedType t)
231{
Ben Clayton713b8d32019-12-17 20:37:56 +0000232 return reinterpret_cast<Type *>(t);
Nicolas Capens157ba262019-12-10 17:49:14 -0500233}
234
235Value *V(Ice::Operand *v)
236{
Ben Clayton713b8d32019-12-17 20:37:56 +0000237 return reinterpret_cast<Value *>(v);
Nicolas Capens157ba262019-12-10 17:49:14 -0500238}
239
240BasicBlock *B(Ice::CfgNode *b)
241{
Ben Clayton713b8d32019-12-17 20:37:56 +0000242 return reinterpret_cast<BasicBlock *>(b);
Nicolas Capens157ba262019-12-10 17:49:14 -0500243}
244
245static size_t typeSize(Type *type)
246{
247 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
Ben Claytonc7904162019-04-17 17:35:48 -0400248 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500249 switch(reinterpret_cast<std::intptr_t>(type))
Nicolas Capens584088c2017-01-26 16:05:18 -0800250 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000251 case Type_v2i32: return 8;
252 case Type_v4i16: return 8;
253 case Type_v2i16: return 4;
254 case Type_v8i8: return 8;
255 case Type_v4i8: return 4;
256 case Type_v2f32: return 8;
257 default: ASSERT(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500258 }
259 }
260
261 return Ice::typeWidthInBytes(T(type));
262}
263
Ben Clayton713b8d32019-12-17 20:37:56 +0000264using ElfHeader = std::conditional<sizeof(void *) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
265using SectionHeader = std::conditional<sizeof(void *) == 8, Elf64_Shdr, Elf32_Shdr>::type;
Nicolas Capens157ba262019-12-10 17:49:14 -0500266
267inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
268{
Ben Clayton713b8d32019-12-17 20:37:56 +0000269 return reinterpret_cast<const SectionHeader *>((intptr_t)elfHeader + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500270}
271
272inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
273{
274 return &sectionHeader(elfHeader)[index];
275}
276
277static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
278{
279 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
280
281 uint32_t index = relocation.getSymbol();
282 int table = relocationTable.sh_link;
283 void *symbolValue = nullptr;
284
285 if(index != SHN_UNDEF)
286 {
287 if(table == SHN_UNDEF) return nullptr;
288 const SectionHeader *symbolTable = elfSection(elfHeader, table);
289
290 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
291 if(index >= symtab_entries)
292 {
293 ASSERT(index < symtab_entries && "Symbol Index out of range");
294 return nullptr;
Nicolas Capens584088c2017-01-26 16:05:18 -0800295 }
296
Nicolas Capens157ba262019-12-10 17:49:14 -0500297 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000298 Elf32_Sym &symbol = ((Elf32_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500299 uint16_t section = symbol.st_shndx;
Nicolas Capens584088c2017-01-26 16:05:18 -0800300
Nicolas Capens157ba262019-12-10 17:49:14 -0500301 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens66478362016-10-13 15:36:36 -0400302 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500303 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000304 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400305 }
306 else
307 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500308 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400309 }
Nicolas Capens66478362016-10-13 15:36:36 -0400310 }
311
Nicolas Capens157ba262019-12-10 17:49:14 -0500312 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000313 unaligned_ptr<int32_t> patchSite = (int32_t *)(address + relocation.r_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500314
315 if(CPUID::ARM)
Nicolas Capens66478362016-10-13 15:36:36 -0400316 {
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400317 switch(relocation.getType())
318 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000319 case R_ARM_NONE:
320 // No relocation
321 break;
322 case R_ARM_MOVW_ABS_NC:
Nicolas Capens157ba262019-12-10 17:49:14 -0500323 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000324 uint32_t thumb = 0; // Calls to Thumb code not supported.
Nicolas Capens157ba262019-12-10 17:49:14 -0500325 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
326 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
327 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400328 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000329 case R_ARM_MOVT_ABS:
Nicolas Capens157ba262019-12-10 17:49:14 -0500330 {
331 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
332 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
333 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400334 break;
Ben Clayton713b8d32019-12-17 20:37:56 +0000335 default:
336 ASSERT(false && "Unsupported relocation type");
337 return nullptr;
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400338 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500339 }
340 else
341 {
342 switch(relocation.getType())
343 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000344 case R_386_NONE:
345 // No relocation
346 break;
347 case R_386_32:
348 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
349 break;
350 case R_386_PC32:
351 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
352 break;
353 default:
354 ASSERT(false && "Unsupported relocation type");
355 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500356 }
Nicolas Capens66478362016-10-13 15:36:36 -0400357 }
358
Nicolas Capens157ba262019-12-10 17:49:14 -0500359 return symbolValue;
360}
Nicolas Capens598f8d82016-09-26 15:09:10 -0400361
Nicolas Capens157ba262019-12-10 17:49:14 -0500362static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
363{
364 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
365
366 uint32_t index = relocation.getSymbol();
367 int table = relocationTable.sh_link;
368 void *symbolValue = nullptr;
369
370 if(index != SHN_UNDEF)
371 {
372 if(table == SHN_UNDEF) return nullptr;
373 const SectionHeader *symbolTable = elfSection(elfHeader, table);
374
375 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
376 if(index >= symtab_entries)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400377 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500378 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400379 return nullptr;
380 }
381
Nicolas Capens157ba262019-12-10 17:49:14 -0500382 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000383 Elf64_Sym &symbol = ((Elf64_Sym *)symbolAddress)[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500384 uint16_t section = symbol.st_shndx;
Nicolas Capens66478362016-10-13 15:36:36 -0400385
Nicolas Capens157ba262019-12-10 17:49:14 -0500386 if(section != SHN_UNDEF && section < SHN_LORESERVE)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400387 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500388 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
Ben Clayton713b8d32019-12-17 20:37:56 +0000389 symbolValue = reinterpret_cast<void *>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
Nicolas Capens157ba262019-12-10 17:49:14 -0500390 }
391 else
392 {
393 return nullptr;
394 }
395 }
Nicolas Capens66478362016-10-13 15:36:36 -0400396
Nicolas Capens157ba262019-12-10 17:49:14 -0500397 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
Ben Clayton713b8d32019-12-17 20:37:56 +0000398 unaligned_ptr<int32_t> patchSite32 = (int32_t *)(address + relocation.r_offset);
399 unaligned_ptr<int64_t> patchSite64 = (int64_t *)(address + relocation.r_offset);
Nicolas Capens66478362016-10-13 15:36:36 -0400400
Nicolas Capens157ba262019-12-10 17:49:14 -0500401 switch(relocation.getType())
402 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000403 case R_X86_64_NONE:
404 // No relocation
405 break;
406 case R_X86_64_64:
407 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
408 break;
409 case R_X86_64_PC32:
410 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
411 break;
412 case R_X86_64_32S:
413 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
414 break;
415 default:
416 ASSERT(false && "Unsupported relocation type");
417 return nullptr;
Nicolas Capens157ba262019-12-10 17:49:14 -0500418 }
419
420 return symbolValue;
421}
422
423void *loadImage(uint8_t *const elfImage, size_t &codeSize)
424{
Ben Clayton713b8d32019-12-17 20:37:56 +0000425 ElfHeader *elfHeader = (ElfHeader *)elfImage;
Nicolas Capens157ba262019-12-10 17:49:14 -0500426
427 if(!elfHeader->checkMagic())
428 {
429 return nullptr;
430 }
431
432 // Expect ELF bitness to match platform
Ben Clayton713b8d32019-12-17 20:37:56 +0000433 ASSERT(sizeof(void *) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
434#if defined(__i386__)
435 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_386);
436#elif defined(__x86_64__)
437 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_X86_64);
438#elif defined(__arm__)
439 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_ARM);
440#elif defined(__aarch64__)
441 ASSERT(sizeof(void *) == 8 && elfHeader->e_machine == EM_AARCH64);
442#elif defined(__mips__)
443 ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS);
444#else
445# error "Unsupported platform"
446#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500447
Ben Clayton713b8d32019-12-17 20:37:56 +0000448 SectionHeader *sectionHeader = (SectionHeader *)(elfImage + elfHeader->e_shoff);
Nicolas Capens157ba262019-12-10 17:49:14 -0500449 void *entry = nullptr;
450
451 for(int i = 0; i < elfHeader->e_shnum; i++)
452 {
453 if(sectionHeader[i].sh_type == SHT_PROGBITS)
454 {
455 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
456 {
457 entry = elfImage + sectionHeader[i].sh_offset;
458 codeSize = sectionHeader[i].sh_size;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400459 }
460 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500461 else if(sectionHeader[i].sh_type == SHT_REL)
462 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000463 ASSERT(sizeof(void *) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens598f8d82016-09-26 15:09:10 -0400464
Nicolas Capens157ba262019-12-10 17:49:14 -0500465 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
466 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000467 const Elf32_Rel &relocation = ((const Elf32_Rel *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500468 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
469 }
470 }
471 else if(sectionHeader[i].sh_type == SHT_RELA)
472 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000473 ASSERT(sizeof(void *) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens157ba262019-12-10 17:49:14 -0500474
475 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
476 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000477 const Elf64_Rela &relocation = ((const Elf64_Rela *)(elfImage + sectionHeader[i].sh_offset))[index];
Nicolas Capens157ba262019-12-10 17:49:14 -0500478 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
479 }
480 }
481 }
482
483 return entry;
484}
485
486template<typename T>
487struct ExecutableAllocator
488{
489 ExecutableAllocator() {}
Ben Clayton713b8d32019-12-17 20:37:56 +0000490 template<class U>
491 ExecutableAllocator(const ExecutableAllocator<U> &other)
492 {}
Nicolas Capens157ba262019-12-10 17:49:14 -0500493
494 using value_type = T;
495 using size_type = std::size_t;
496
497 T *allocate(size_type n)
498 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000499 return (T *)allocateMemoryPages(
500 sizeof(T) * n, PERMISSION_READ | PERMISSION_WRITE, true);
Nicolas Capens157ba262019-12-10 17:49:14 -0500501 }
502
503 void deallocate(T *p, size_type n)
504 {
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800505 deallocateMemoryPages(p, sizeof(T) * n);
Nicolas Capens157ba262019-12-10 17:49:14 -0500506 }
507};
508
509class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
510{
511 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
512 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
513
514public:
Ben Clayton713b8d32019-12-17 20:37:56 +0000515 ELFMemoryStreamer()
516 : Routine()
Nicolas Capens157ba262019-12-10 17:49:14 -0500517 {
518 position = 0;
519 buffer.reserve(0x1000);
520 }
521
522 ~ELFMemoryStreamer() override
523 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500524 }
525
526 void write8(uint8_t Value) override
527 {
528 if(position == (uint64_t)buffer.size())
529 {
530 buffer.push_back(Value);
531 position++;
532 }
533 else if(position < (uint64_t)buffer.size())
534 {
535 buffer[position] = Value;
536 position++;
537 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000538 else
539 ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens157ba262019-12-10 17:49:14 -0500540 }
541
542 void writeBytes(llvm::StringRef Bytes) override
543 {
544 std::size_t oldSize = buffer.size();
545 buffer.resize(oldSize + Bytes.size());
546 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
547 position += Bytes.size();
548 }
549
550 uint64_t tell() const override { return position; }
551
552 void seek(uint64_t Off) override { position = Off; }
553
Ben Clayton713b8d32019-12-17 20:37:56 +0000554 const void *finalizeEntryBegin()
Nicolas Capens157ba262019-12-10 17:49:14 -0500555 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000556 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
Nicolas Capens157ba262019-12-10 17:49:14 -0500557
558 size_t codeSize = 0;
559 const void *entry = loadImage(&buffer[0], codeSize);
560
Sergey Ulanovebb0bec2019-12-12 11:53:04 -0800561 protectMemoryPages(&buffer[0], buffer.size(), PERMISSION_READ | PERMISSION_EXECUTE);
Nicolas Capens157ba262019-12-10 17:49:14 -0500562#if defined(_WIN32)
Nicolas Capens157ba262019-12-10 17:49:14 -0500563 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
564#else
Ben Clayton713b8d32019-12-17 20:37:56 +0000565 __builtin___clear_cache((char *)entry, (char *)entry + codeSize);
Nicolas Capens157ba262019-12-10 17:49:14 -0500566#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -0400567 return entry;
568 }
569
Ben Clayton713b8d32019-12-17 20:37:56 +0000570 void setEntry(int index, const void *func)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400571 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500572 ASSERT(func);
573 funcs[index] = func;
574 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400575
Nicolas Capens157ba262019-12-10 17:49:14 -0500576 const void *getEntry(int index) const override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400577 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500578 ASSERT(funcs[index]);
579 return funcs[index];
580 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400581
Ben Clayton713b8d32019-12-17 20:37:56 +0000582 const void *addConstantData(const void *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -0500583 {
584 auto buf = std::unique_ptr<uint8_t[]>(new uint8_t[size]);
585 memcpy(buf.get(), data, size);
586 auto ptr = buf.get();
587 constantData.emplace_back(std::move(buf));
588 return ptr;
589 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400590
Nicolas Capens157ba262019-12-10 17:49:14 -0500591private:
Ben Clayton713b8d32019-12-17 20:37:56 +0000592 std::array<const void *, Nucleus::CoroutineEntryCount> funcs = {};
Nicolas Capens157ba262019-12-10 17:49:14 -0500593 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
594 std::size_t position;
595 std::vector<std::unique_ptr<uint8_t[]>> constantData;
Nicolas Capens157ba262019-12-10 17:49:14 -0500596};
597
598Nucleus::Nucleus()
599{
Ben Clayton713b8d32019-12-17 20:37:56 +0000600 ::codegenMutex.lock(); // Reactor is currently not thread safe
Nicolas Capens157ba262019-12-10 17:49:14 -0500601
602 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
603 Ice::ClFlags::getParsedClFlags(Flags);
604
Ben Clayton713b8d32019-12-17 20:37:56 +0000605#if defined(__arm__)
606 Flags.setTargetArch(Ice::Target_ARM32);
607 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
608#elif defined(__mips__)
609 Flags.setTargetArch(Ice::Target_MIPS32);
610 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
611#else // x86
612 Flags.setTargetArch(sizeof(void *) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
613 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
614#endif
Nicolas Capens157ba262019-12-10 17:49:14 -0500615 Flags.setOutFileType(Ice::FT_Elf);
616 Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
617 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
618 Flags.setVerbose(subzeroDumpEnabled ? Ice::IceV_Most : Ice::IceV_None);
619 Flags.setDisableHybridAssembly(true);
620
621 static llvm::raw_os_ostream cout(std::cout);
622 static llvm::raw_os_ostream cerr(std::cerr);
623
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500624 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500625 {
626 // Decorate text asm with liveness info
627 Flags.setDecorateAsm(true);
628 }
629
Ben Clayton713b8d32019-12-17 20:37:56 +0000630 if(false) // Write out to a file
Nicolas Capens157ba262019-12-10 17:49:14 -0500631 {
632 std::error_code errorCode;
633 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
634 ::elfFile = new Ice::ELFFileStreamer(*out);
635 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
636 }
637 else
638 {
639 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
640 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
641 ::routine = elfMemory;
642 }
643}
644
645Nucleus::~Nucleus()
646{
647 delete ::routine;
648
649 delete ::allocator;
650 delete ::function;
651 delete ::context;
652
653 delete ::elfFile;
654 delete ::out;
655
656 ::codegenMutex.unlock();
657}
658
659void Nucleus::setDefaultConfig(const Config &cfg)
660{
661 std::unique_lock<std::mutex> lock(::defaultConfigLock);
662 ::defaultConfig() = cfg;
663}
664
665void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
666{
667 std::unique_lock<std::mutex> lock(::defaultConfigLock);
668 auto &config = ::defaultConfig();
669 config = cfgEdit.apply(config);
670}
671
672Config Nucleus::getDefaultConfig()
673{
674 std::unique_lock<std::mutex> lock(::defaultConfigLock);
675 return ::defaultConfig();
676}
677
678std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
679{
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500680 if(subzeroDumpEnabled)
Nicolas Capens157ba262019-12-10 17:49:14 -0500681 {
682 // Output dump strings immediately, rather than once buffer is full. Useful for debugging.
683 context->getStrDump().SetUnbuffered();
684 }
685
686 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
687 {
688 createRetVoid();
689 }
690
691 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, name));
692
693 rr::optimize(::function);
694
695 ::function->computeInOutEdges();
696 ASSERT(!::function->hasError());
697
698 ::function->translate();
699 ASSERT(!::function->hasError());
700
701 auto globals = ::function->getGlobalInits();
702
703 if(globals && !globals->empty())
704 {
705 ::context->getGlobals()->merge(globals.get());
706 }
707
708 ::context->emitFileHeader();
709
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500710 if(subzeroEmitTextAsm)
Nicolas Capens157ba262019-12-10 17:49:14 -0500711 {
712 ::function->emit();
713 }
714
715 ::function->emitIAS();
716 auto assembler = ::function->releaseAssembler();
717 auto objectWriter = ::context->getObjectWriter();
718 assembler->alignFunction();
719 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
720 ::context->lowerGlobals("last");
721 ::context->lowerConstants();
722 ::context->lowerJumpTables();
723 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
724 objectWriter->writeNonUserSections();
725
Ben Clayton713b8d32019-12-17 20:37:56 +0000726 const void *entryBegin = ::routine->finalizeEntryBegin();
Nicolas Capens157ba262019-12-10 17:49:14 -0500727 ::routine->setEntry(Nucleus::CoroutineEntryBegin, entryBegin);
728
729 Routine *handoffRoutine = ::routine;
730 ::routine = nullptr;
731
732 return std::shared_ptr<Routine>(handoffRoutine);
733}
734
735Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
736{
737 Ice::Type type = T(t);
738 int typeSize = Ice::typeWidthInBytes(type);
739 int totalSize = typeSize * (arraySize ? arraySize : 1);
740
741 auto bytes = Ice::ConstantInteger32::create(::context, Ice::IceType_i32, totalSize);
742 auto address = ::function->makeVariable(T(getPointerType(t)));
743 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
744 ::function->getEntryNode()->getInsts().push_front(alloca);
745
746 return V(address);
747}
748
749BasicBlock *Nucleus::createBasicBlock()
750{
751 return B(::function->makeNode());
752}
753
754BasicBlock *Nucleus::getInsertBlock()
755{
756 return B(::basicBlock);
757}
758
759void Nucleus::setInsertBlock(BasicBlock *basicBlock)
760{
Ben Clayton713b8d32019-12-17 20:37:56 +0000761 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens157ba262019-12-10 17:49:14 -0500762
763 Variable::materializeAll();
764
765 ::basicBlock = basicBlock;
766}
767
Ben Clayton713b8d32019-12-17 20:37:56 +0000768void Nucleus::createFunction(Type *ReturnType, std::vector<Type *> &Params)
Nicolas Capens157ba262019-12-10 17:49:14 -0500769{
770 uint32_t sequenceNumber = 0;
771 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
772 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
773
774 for(Type *type : Params)
775 {
776 Ice::Variable *arg = ::function->makeVariable(T(type));
777 ::function->addArg(arg);
778 }
779
780 Ice::CfgNode *node = ::function->makeNode();
781 ::function->setEntryNode(node);
782 ::basicBlock = node;
783}
784
785Value *Nucleus::getArgument(unsigned int index)
786{
787 return V(::function->getArgs()[index]);
788}
789
790void Nucleus::createRetVoid()
791{
792 // Code generated after this point is unreachable, so any variables
793 // being read can safely return an undefined value. We have to avoid
794 // materializing variables after the terminator ret instruction.
795 Variable::killUnmaterialized();
796
797 Ice::InstRet *ret = Ice::InstRet::create(::function);
798 ::basicBlock->appendInst(ret);
799}
800
801void Nucleus::createRet(Value *v)
802{
803 // Code generated after this point is unreachable, so any variables
804 // being read can safely return an undefined value. We have to avoid
805 // materializing variables after the terminator ret instruction.
806 Variable::killUnmaterialized();
807
808 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
809 ::basicBlock->appendInst(ret);
810}
811
812void Nucleus::createBr(BasicBlock *dest)
813{
814 Variable::materializeAll();
815
816 auto br = Ice::InstBr::create(::function, dest);
817 ::basicBlock->appendInst(br);
818}
819
820void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
821{
822 Variable::materializeAll();
823
824 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
825 ::basicBlock->appendInst(br);
826}
827
828static bool isCommutative(Ice::InstArithmetic::OpKind op)
829{
830 switch(op)
831 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000832 case Ice::InstArithmetic::Add:
833 case Ice::InstArithmetic::Fadd:
834 case Ice::InstArithmetic::Mul:
835 case Ice::InstArithmetic::Fmul:
836 case Ice::InstArithmetic::And:
837 case Ice::InstArithmetic::Or:
838 case Ice::InstArithmetic::Xor:
839 return true;
840 default:
841 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -0500842 }
843}
844
845static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
846{
847 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
848
849 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
850
851 Ice::Variable *result = ::function->makeVariable(lhs->getType());
852 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
853 ::basicBlock->appendInst(arithmetic);
854
855 return V(result);
856}
857
858Value *Nucleus::createAdd(Value *lhs, Value *rhs)
859{
860 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
861}
862
863Value *Nucleus::createSub(Value *lhs, Value *rhs)
864{
865 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
866}
867
868Value *Nucleus::createMul(Value *lhs, Value *rhs)
869{
870 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
871}
872
873Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
874{
875 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
876}
877
878Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
879{
880 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
881}
882
883Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
884{
885 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
886}
887
888Value *Nucleus::createFSub(Value *lhs, Value *rhs)
889{
890 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
891}
892
893Value *Nucleus::createFMul(Value *lhs, Value *rhs)
894{
895 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
896}
897
898Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
899{
900 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
901}
902
903Value *Nucleus::createURem(Value *lhs, Value *rhs)
904{
905 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
906}
907
908Value *Nucleus::createSRem(Value *lhs, Value *rhs)
909{
910 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
911}
912
913Value *Nucleus::createFRem(Value *lhs, Value *rhs)
914{
Antonio Maiorano5ef91b82020-01-21 15:10:22 -0500915 // TODO(b/148139679) Fix Subzero generating invalid code for FRem on vector types
916 // createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
917 UNIMPLEMENTED("Nucleus::createFRem");
918 return nullptr;
919}
920
921RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
922{
923 return emulated::FRem(lhs, rhs);
Nicolas Capens157ba262019-12-10 17:49:14 -0500924}
925
926Value *Nucleus::createShl(Value *lhs, Value *rhs)
927{
928 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
929}
930
931Value *Nucleus::createLShr(Value *lhs, Value *rhs)
932{
933 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
934}
935
936Value *Nucleus::createAShr(Value *lhs, Value *rhs)
937{
938 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
939}
940
941Value *Nucleus::createAnd(Value *lhs, Value *rhs)
942{
943 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
944}
945
946Value *Nucleus::createOr(Value *lhs, Value *rhs)
947{
948 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
949}
950
951Value *Nucleus::createXor(Value *lhs, Value *rhs)
952{
953 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
954}
955
956Value *Nucleus::createNeg(Value *v)
957{
958 return createSub(createNullValue(T(v->getType())), v);
959}
960
961Value *Nucleus::createFNeg(Value *v)
962{
Ben Clayton713b8d32019-12-17 20:37:56 +0000963 double c[4] = { -0.0, -0.0, -0.0, -0.0 };
964 Value *negativeZero = Ice::isVectorType(v->getType()) ? createConstantVector(c, T(v->getType())) : V(::context->getConstantFloat(-0.0f));
Nicolas Capens157ba262019-12-10 17:49:14 -0500965
966 return createFSub(negativeZero, v);
967}
968
969Value *Nucleus::createNot(Value *v)
970{
971 if(Ice::isScalarIntegerType(v->getType()))
972 {
973 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
974 }
Ben Clayton713b8d32019-12-17 20:37:56 +0000975 else // Vector
Nicolas Capens157ba262019-12-10 17:49:14 -0500976 {
Ben Clayton713b8d32019-12-17 20:37:56 +0000977 int64_t c[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
Nicolas Capens157ba262019-12-10 17:49:14 -0500978 return createXor(v, createConstantVector(c, T(v->getType())));
979 }
980}
981
982Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
983{
Ben Clayton713b8d32019-12-17 20:37:56 +0000984 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -0500985 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
986
987 int valueType = (int)reinterpret_cast<intptr_t>(type);
988 Ice::Variable *result = ::function->makeVariable(T(type));
989
Ben Clayton713b8d32019-12-17 20:37:56 +0000990 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -0500991 {
992 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400993 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500994 if(typeSize(type) == 4)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400995 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500996 auto pointer = RValue<Pointer<Byte>>(ptr);
997 Int x = *Pointer<Int>(pointer);
998
999 Int4 vector;
1000 vector = Insert(vector, x, 0);
1001
1002 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1003 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001004 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001005 else if(typeSize(type) == 8)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001006 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001007 auto pointer = RValue<Pointer<Byte>>(ptr);
1008 Int x = *Pointer<Int>(pointer);
1009 Int y = *Pointer<Int>(pointer + 4);
1010
1011 Int4 vector;
1012 vector = Insert(vector, x, 0);
1013 vector = Insert(vector, y, 1);
1014
1015 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
1016 ::basicBlock->appendInst(bitcast);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001017 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001018 else
1019 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001020 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001021 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001022 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001023 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001024 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1025 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1026 load->addArg(ptr);
1027 load->addArg(::context->getConstantInt32(typeSize(type)));
1028 ::basicBlock->appendInst(load);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001029 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001030 }
1031 else
1032 {
1033 auto load = Ice::InstLoad::create(::function, result, ptr, align);
1034 ::basicBlock->appendInst(load);
1035 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001036
Nicolas Capens157ba262019-12-10 17:49:14 -05001037 return V(result);
1038}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001039
Nicolas Capens157ba262019-12-10 17:49:14 -05001040Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
1041{
Ben Clayton713b8d32019-12-17 20:37:56 +00001042 ASSERT(!atomic); // Unimplemented
Nicolas Capens157ba262019-12-10 17:49:14 -05001043 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
Nicolas Capens598f8d82016-09-26 15:09:10 -04001044
Ben Clayton713b8d32019-12-17 20:37:56 +00001045#if __has_feature(memory_sanitizer)
1046 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
1047 if(align != 0)
1048 {
1049 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
1050 call->addArg(ptr);
1051 call->addArg(::context->getConstantInt64(typeSize(type)));
1052 ::basicBlock->appendInst(call);
1053 }
1054#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001055
Nicolas Capens157ba262019-12-10 17:49:14 -05001056 int valueType = (int)reinterpret_cast<intptr_t>(type);
1057
Ben Clayton713b8d32019-12-17 20:37:56 +00001058 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens157ba262019-12-10 17:49:14 -05001059 {
1060 if(emulateIntrinsics)
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001061 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001062 if(typeSize(type) == 4)
1063 {
1064 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1065 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1066 ::basicBlock->appendInst(bitcast);
1067
1068 RValue<Int4> v(V(vector));
1069
1070 auto pointer = RValue<Pointer<Byte>>(ptr);
1071 Int x = Extract(v, 0);
1072 *Pointer<Int>(pointer) = x;
1073 }
1074 else if(typeSize(type) == 8)
1075 {
1076 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
1077 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
1078 ::basicBlock->appendInst(bitcast);
1079
1080 RValue<Int4> v(V(vector));
1081
1082 auto pointer = RValue<Pointer<Byte>>(ptr);
1083 Int x = Extract(v, 0);
1084 *Pointer<Int>(pointer) = x;
1085 Int y = Extract(v, 1);
1086 *Pointer<Int>(pointer + 4) = y;
1087 }
Ben Clayton713b8d32019-12-17 20:37:56 +00001088 else
1089 UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001090 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001091 else
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001092 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001093 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
Nicolas Capens157ba262019-12-10 17:49:14 -05001094 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1095 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
1096 store->addArg(value);
1097 store->addArg(ptr);
1098 store->addArg(::context->getConstantInt32(typeSize(type)));
1099 ::basicBlock->appendInst(store);
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001100 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001101 }
1102 else
1103 {
1104 ASSERT(value->getType() == T(type));
Antonio Maiorano9c0617c2019-11-29 10:43:16 -05001105
Nicolas Capens157ba262019-12-10 17:49:14 -05001106 auto store = Ice::InstStore::create(::function, value, ptr, align);
1107 ::basicBlock->appendInst(store);
1108 }
1109
1110 return value;
1111}
1112
1113Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1114{
1115 ASSERT(index->getType() == Ice::IceType_i32);
1116
1117 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1118 {
1119 int32_t offset = constant->getValue() * (int)typeSize(type);
1120
1121 if(offset == 0)
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001122 {
Ben Claytonb7eb3a82019-11-19 00:43:50 +00001123 return ptr;
1124 }
1125
Nicolas Capens157ba262019-12-10 17:49:14 -05001126 return createAdd(ptr, createConstantInt(offset));
1127 }
Nicolas Capensbd65da92017-01-05 16:31:06 -05001128
Nicolas Capens157ba262019-12-10 17:49:14 -05001129 if(!Ice::isByteSizedType(T(type)))
1130 {
1131 index = createMul(index, createConstantInt((int)typeSize(type)));
1132 }
1133
Ben Clayton713b8d32019-12-17 20:37:56 +00001134 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001135 {
1136 if(unsignedIndex)
1137 {
1138 index = createZExt(index, T(Ice::IceType_i64));
1139 }
1140 else
1141 {
1142 index = createSExt(index, T(Ice::IceType_i64));
1143 }
1144 }
1145
1146 return createAdd(ptr, index);
1147}
1148
Antonio Maiorano370cba52019-12-31 11:36:07 -05001149static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
1150{
1151 Ice::Variable *result = ::function->makeVariable(value->getType());
1152
1153 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1154 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1155 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1156 auto op = ::context->getConstantInt32(rmwOp);
1157 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
1158 inst->addArg(op);
1159 inst->addArg(ptr);
1160 inst->addArg(value);
1161 inst->addArg(order);
1162 ::basicBlock->appendInst(inst);
1163
1164 return V(result);
1165}
1166
Nicolas Capens157ba262019-12-10 17:49:14 -05001167Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
1168{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001169 return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001170}
1171
1172Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
1173{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001174 return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001175}
1176
1177Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
1178{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001179 return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001180}
1181
1182Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
1183{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001184 return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001185}
1186
1187Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
1188{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001189 return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001190}
1191
1192Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
1193{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001194 return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
Nicolas Capens157ba262019-12-10 17:49:14 -05001195}
1196
1197Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
1198{
Antonio Maiorano370cba52019-12-31 11:36:07 -05001199 Ice::Variable *result = ::function->makeVariable(value->getType());
1200
1201 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
1202 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1203 auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
1204 auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
1205 auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
1206 inst->addArg(ptr);
1207 inst->addArg(compare);
1208 inst->addArg(value);
1209 inst->addArg(orderEq);
1210 inst->addArg(orderNeq);
1211 ::basicBlock->appendInst(inst);
1212
1213 return V(result);
Nicolas Capens157ba262019-12-10 17:49:14 -05001214}
1215
1216static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1217{
1218 if(v->getType() == T(destType))
1219 {
1220 return v;
1221 }
1222
1223 Ice::Variable *result = ::function->makeVariable(T(destType));
1224 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1225 ::basicBlock->appendInst(cast);
1226
1227 return V(result);
1228}
1229
1230Value *Nucleus::createTrunc(Value *v, Type *destType)
1231{
1232 return createCast(Ice::InstCast::Trunc, v, destType);
1233}
1234
1235Value *Nucleus::createZExt(Value *v, Type *destType)
1236{
1237 return createCast(Ice::InstCast::Zext, v, destType);
1238}
1239
1240Value *Nucleus::createSExt(Value *v, Type *destType)
1241{
1242 return createCast(Ice::InstCast::Sext, v, destType);
1243}
1244
1245Value *Nucleus::createFPToUI(Value *v, Type *destType)
1246{
1247 return createCast(Ice::InstCast::Fptoui, v, destType);
1248}
1249
1250Value *Nucleus::createFPToSI(Value *v, Type *destType)
1251{
1252 return createCast(Ice::InstCast::Fptosi, v, destType);
1253}
1254
1255Value *Nucleus::createSIToFP(Value *v, Type *destType)
1256{
1257 return createCast(Ice::InstCast::Sitofp, v, destType);
1258}
1259
1260Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1261{
1262 return createCast(Ice::InstCast::Fptrunc, v, destType);
1263}
1264
1265Value *Nucleus::createFPExt(Value *v, Type *destType)
1266{
1267 return createCast(Ice::InstCast::Fpext, v, destType);
1268}
1269
1270Value *Nucleus::createBitCast(Value *v, Type *destType)
1271{
1272 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1273 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1274 // emulate them by writing to the stack and reading back as the destination type.
1275 if(emulateMismatchedBitCast)
1276 {
1277 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1278 {
1279 Value *address = allocateStackVariable(destType);
1280 createStore(v, address, T(v->getType()));
1281 return createLoad(address, destType);
1282 }
1283 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1284 {
1285 Value *address = allocateStackVariable(T(v->getType()));
1286 createStore(v, address, T(v->getType()));
1287 return createLoad(address, destType);
1288 }
1289 }
1290
1291 return createCast(Ice::InstCast::Bitcast, v, destType);
1292}
1293
1294static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1295{
1296 ASSERT(lhs->getType() == rhs->getType());
1297
1298 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1299 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1300 ::basicBlock->appendInst(cmp);
1301
1302 return V(result);
1303}
1304
1305Value *Nucleus::createPtrEQ(Value *lhs, Value *rhs)
1306{
1307 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1308}
1309
1310Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1311{
1312 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1313}
1314
1315Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1316{
1317 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1318}
1319
1320Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1321{
1322 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1323}
1324
1325Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1326{
1327 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1328}
1329
1330Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1331{
1332 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1333}
1334
1335Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1336{
1337 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1338}
1339
1340Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1341{
1342 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1343}
1344
1345Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1346{
1347 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1348}
1349
1350Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1351{
1352 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1353}
1354
1355Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1356{
1357 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1358}
1359
1360static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1361{
1362 ASSERT(lhs->getType() == rhs->getType());
1363 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1364
1365 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1366 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1367 ::basicBlock->appendInst(cmp);
1368
1369 return V(result);
1370}
1371
1372Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1373{
1374 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1375}
1376
1377Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1378{
1379 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1380}
1381
1382Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1383{
1384 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1385}
1386
1387Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1388{
1389 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1390}
1391
1392Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1393{
1394 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1395}
1396
1397Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1398{
1399 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1400}
1401
1402Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1403{
1404 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1405}
1406
1407Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1408{
1409 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1410}
1411
1412Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1413{
1414 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1415}
1416
1417Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1418{
1419 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1420}
1421
1422Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1423{
1424 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1425}
1426
1427Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1428{
1429 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1430}
1431
1432Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1433{
1434 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1435}
1436
1437Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1438{
1439 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1440}
1441
1442Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1443{
1444 auto result = ::function->makeVariable(T(type));
1445 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1446 ::basicBlock->appendInst(extract);
1447
1448 return V(result);
1449}
1450
1451Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1452{
1453 auto result = ::function->makeVariable(vector->getType());
1454 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1455 ::basicBlock->appendInst(insert);
1456
1457 return V(result);
1458}
1459
1460Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1461{
1462 ASSERT(V1->getType() == V2->getType());
1463
1464 int size = Ice::typeNumElements(V1->getType());
1465 auto result = ::function->makeVariable(V1->getType());
1466 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1467
1468 for(int i = 0; i < size; i++)
1469 {
1470 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1471 }
1472
1473 ::basicBlock->appendInst(shuffle);
1474
1475 return V(result);
1476}
1477
1478Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1479{
1480 ASSERT(ifTrue->getType() == ifFalse->getType());
1481
1482 auto result = ::function->makeVariable(ifTrue->getType());
1483 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1484 ::basicBlock->appendInst(select);
1485
1486 return V(result);
1487}
1488
1489SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1490{
1491 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1492 ::basicBlock->appendInst(switchInst);
1493
Ben Clayton713b8d32019-12-17 20:37:56 +00001494 return reinterpret_cast<SwitchCases *>(switchInst);
Nicolas Capens157ba262019-12-10 17:49:14 -05001495}
1496
1497void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1498{
1499 switchCases->addBranch(label, label, branch);
1500}
1501
1502void Nucleus::createUnreachable()
1503{
1504 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1505 ::basicBlock->appendInst(unreachable);
1506}
1507
1508Type *Nucleus::getPointerType(Type *ElementType)
1509{
Ben Clayton713b8d32019-12-17 20:37:56 +00001510 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05001511 {
1512 return T(Ice::IceType_i64);
1513 }
1514 else
1515 {
1516 return T(Ice::IceType_i32);
1517 }
1518}
1519
1520Value *Nucleus::createNullValue(Type *Ty)
1521{
1522 if(Ice::isVectorType(T(Ty)))
1523 {
1524 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Ben Clayton713b8d32019-12-17 20:37:56 +00001525 int64_t c[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05001526 return createConstantVector(c, Ty);
1527 }
1528 else
1529 {
1530 return V(::context->getConstantZero(T(Ty)));
1531 }
1532}
1533
1534Value *Nucleus::createConstantLong(int64_t i)
1535{
1536 return V(::context->getConstantInt64(i));
1537}
1538
1539Value *Nucleus::createConstantInt(int i)
1540{
1541 return V(::context->getConstantInt32(i));
1542}
1543
1544Value *Nucleus::createConstantInt(unsigned int i)
1545{
1546 return V(::context->getConstantInt32(i));
1547}
1548
1549Value *Nucleus::createConstantBool(bool b)
1550{
1551 return V(::context->getConstantInt1(b));
1552}
1553
1554Value *Nucleus::createConstantByte(signed char i)
1555{
1556 return V(::context->getConstantInt8(i));
1557}
1558
1559Value *Nucleus::createConstantByte(unsigned char i)
1560{
1561 return V(::context->getConstantInt8(i));
1562}
1563
1564Value *Nucleus::createConstantShort(short i)
1565{
1566 return V(::context->getConstantInt16(i));
1567}
1568
1569Value *Nucleus::createConstantShort(unsigned short i)
1570{
1571 return V(::context->getConstantInt16(i));
1572}
1573
1574Value *Nucleus::createConstantFloat(float x)
1575{
1576 return V(::context->getConstantFloat(x));
1577}
1578
1579Value *Nucleus::createNullPointer(Type *Ty)
1580{
Ben Clayton713b8d32019-12-17 20:37:56 +00001581 return createNullValue(T(sizeof(void *) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens157ba262019-12-10 17:49:14 -05001582}
1583
1584Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1585{
1586 const int vectorSize = 16;
1587 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
1588 const int alignment = vectorSize;
1589 auto globalPool = ::function->getGlobalPool();
1590
1591 const int64_t *i = constants;
Ben Clayton713b8d32019-12-17 20:37:56 +00001592 const double *f = reinterpret_cast<const double *>(constants);
Nicolas Capens157ba262019-12-10 17:49:14 -05001593 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1594
1595 switch((int)reinterpret_cast<intptr_t>(type))
1596 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001597 case Ice::IceType_v4i32:
1598 case Ice::IceType_v4i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001599 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001600 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[2], (int)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001601 static_assert(sizeof(initializer) == vectorSize, "!");
Ben Clayton713b8d32019-12-17 20:37:56 +00001602 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char *)initializer, vectorSize);
Nicolas Capens157ba262019-12-10 17:49:14 -05001603 }
1604 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001605 case Ice::IceType_v4f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001606 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001607 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[2], (float)f[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001608 static_assert(sizeof(initializer) == vectorSize, "!");
Ben Clayton713b8d32019-12-17 20:37:56 +00001609 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char *)initializer, vectorSize);
Nicolas Capens157ba262019-12-10 17:49:14 -05001610 }
1611 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001612 case Ice::IceType_v8i16:
1613 case Ice::IceType_v8i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001614 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001615 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001616 static_assert(sizeof(initializer) == vectorSize, "!");
Ben Clayton713b8d32019-12-17 20:37:56 +00001617 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char *)initializer, vectorSize);
Nicolas Capens157ba262019-12-10 17:49:14 -05001618 }
1619 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001620 case Ice::IceType_v16i8:
1621 case Ice::IceType_v16i1:
Nicolas Capens157ba262019-12-10 17:49:14 -05001622 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001623 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001624 static_assert(sizeof(initializer) == vectorSize, "!");
Ben Clayton713b8d32019-12-17 20:37:56 +00001625 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char *)initializer, vectorSize);
Nicolas Capens157ba262019-12-10 17:49:14 -05001626 }
1627 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001628 case Type_v2i32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001629 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001630 const int initializer[4] = { (int)i[0], (int)i[1], (int)i[0], (int)i[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001631 static_assert(sizeof(initializer) == vectorSize, "!");
Ben Clayton713b8d32019-12-17 20:37:56 +00001632 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char *)initializer, vectorSize);
Nicolas Capens157ba262019-12-10 17:49:14 -05001633 }
1634 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001635 case Type_v2f32:
Nicolas Capens157ba262019-12-10 17:49:14 -05001636 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001637 const float initializer[4] = { (float)f[0], (float)f[1], (float)f[0], (float)f[1] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001638 static_assert(sizeof(initializer) == vectorSize, "!");
Ben Clayton713b8d32019-12-17 20:37:56 +00001639 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char *)initializer, vectorSize);
Nicolas Capens157ba262019-12-10 17:49:14 -05001640 }
1641 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001642 case Type_v4i16:
Nicolas Capens157ba262019-12-10 17:49:14 -05001643 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001644 const short initializer[8] = { (short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001645 static_assert(sizeof(initializer) == vectorSize, "!");
Ben Clayton713b8d32019-12-17 20:37:56 +00001646 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char *)initializer, vectorSize);
Nicolas Capens157ba262019-12-10 17:49:14 -05001647 }
1648 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001649 case Type_v8i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05001650 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001651 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001652 static_assert(sizeof(initializer) == vectorSize, "!");
Ben Clayton713b8d32019-12-17 20:37:56 +00001653 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char *)initializer, vectorSize);
Nicolas Capens157ba262019-12-10 17:49:14 -05001654 }
1655 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001656 case Type_v4i8:
Nicolas Capens157ba262019-12-10 17:49:14 -05001657 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001658 const char initializer[16] = { (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3] };
Nicolas Capens157ba262019-12-10 17:49:14 -05001659 static_assert(sizeof(initializer) == vectorSize, "!");
Ben Clayton713b8d32019-12-17 20:37:56 +00001660 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char *)initializer, vectorSize);
Nicolas Capens157ba262019-12-10 17:49:14 -05001661 }
1662 break;
Ben Clayton713b8d32019-12-17 20:37:56 +00001663 default:
1664 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens157ba262019-12-10 17:49:14 -05001665 }
1666
1667 auto name = Ice::GlobalString::createWithoutString(::context);
1668 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1669 variableDeclaration->setName(name);
1670 variableDeclaration->setAlignment(alignment);
1671 variableDeclaration->setIsConstant(true);
1672 variableDeclaration->addInitializer(dataInitializer);
1673
1674 ::function->addGlobal(variableDeclaration);
1675
1676 constexpr int32_t offset = 0;
1677 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1678
1679 Ice::Variable *result = ::function->makeVariable(T(type));
1680 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1681 ::basicBlock->appendInst(load);
1682
1683 return V(result);
1684}
1685
1686Value *Nucleus::createConstantVector(const double *constants, Type *type)
1687{
Ben Clayton713b8d32019-12-17 20:37:56 +00001688 return createConstantVector((const int64_t *)constants, type);
Nicolas Capens157ba262019-12-10 17:49:14 -05001689}
1690
1691Type *Void::getType()
1692{
1693 return T(Ice::IceType_void);
1694}
1695
1696Type *Bool::getType()
1697{
1698 return T(Ice::IceType_i1);
1699}
1700
1701Type *Byte::getType()
1702{
1703 return T(Ice::IceType_i8);
1704}
1705
1706Type *SByte::getType()
1707{
1708 return T(Ice::IceType_i8);
1709}
1710
1711Type *Short::getType()
1712{
1713 return T(Ice::IceType_i16);
1714}
1715
1716Type *UShort::getType()
1717{
1718 return T(Ice::IceType_i16);
1719}
1720
1721Type *Byte4::getType()
1722{
1723 return T(Type_v4i8);
1724}
1725
1726Type *SByte4::getType()
1727{
1728 return T(Type_v4i8);
1729}
1730
Ben Clayton713b8d32019-12-17 20:37:56 +00001731namespace {
1732RValue<Byte> SaturateUnsigned(RValue<Short> x)
Nicolas Capens157ba262019-12-10 17:49:14 -05001733{
Ben Clayton713b8d32019-12-17 20:37:56 +00001734 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
Nicolas Capens157ba262019-12-10 17:49:14 -05001735}
1736
Ben Clayton713b8d32019-12-17 20:37:56 +00001737RValue<Byte> Extract(RValue<Byte8> val, int i)
1738{
1739 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
1740}
1741
1742RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
1743{
1744 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
1745}
1746} // namespace
1747
Nicolas Capens157ba262019-12-10 17:49:14 -05001748RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
1749{
1750 if(emulateIntrinsics)
1751 {
1752 Byte8 result;
1753 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1754 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1755 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1756 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1757 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1758 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1759 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1760 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
1761
1762 return result;
1763 }
1764 else
1765 {
1766 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00001767 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001768 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1769 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1770 paddusb->addArg(x.value);
1771 paddusb->addArg(y.value);
1772 ::basicBlock->appendInst(paddusb);
1773
1774 return RValue<Byte8>(V(result));
1775 }
1776}
1777
1778RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
1779{
1780 if(emulateIntrinsics)
1781 {
1782 Byte8 result;
1783 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1784 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1785 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1786 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1787 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1788 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1789 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1790 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
1791
1792 return result;
1793 }
1794 else
1795 {
1796 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00001797 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001798 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1799 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1800 psubusw->addArg(x.value);
1801 psubusw->addArg(y.value);
1802 ::basicBlock->appendInst(psubusw);
1803
1804 return RValue<Byte8>(V(result));
1805 }
1806}
1807
1808RValue<SByte> Extract(RValue<SByte8> val, int i)
1809{
1810 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
1811}
1812
1813RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
1814{
1815 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
1816}
1817
1818RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1819{
1820 if(emulateIntrinsics)
1821 {
1822 SByte8 result;
1823 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
1824 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
1825 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
1826 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
1827 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
1828 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
1829 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
1830 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
1831
1832 return result;
1833 }
1834 else
1835 {
Ben Clayton713b8d32019-12-17 20:37:56 +00001836#if defined(__i386__) || defined(__x86_64__)
1837 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
1838 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
1839 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
Nicolas Capens157ba262019-12-10 17:49:14 -05001840
Ben Clayton713b8d32019-12-17 20:37:56 +00001841 return As<SByte8>(hi | lo);
1842#else
1843 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
1844#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -04001845 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001846}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001847
Nicolas Capens157ba262019-12-10 17:49:14 -05001848RValue<Int> SignMask(RValue<Byte8> x)
1849{
1850 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001851 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001852 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
1853 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001854 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001855 else
Ben Clayton55bc37a2019-07-04 12:17:12 +01001856 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001857 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00001858 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001859 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1860 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
1861 movmsk->addArg(x.value);
1862 ::basicBlock->appendInst(movmsk);
Ben Clayton55bc37a2019-07-04 12:17:12 +01001863
Nicolas Capens157ba262019-12-10 17:49:14 -05001864 return RValue<Int>(V(result)) & 0xFF;
Ben Clayton55bc37a2019-07-04 12:17:12 +01001865 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001866}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001867
1868// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
1869// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05001870// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001871// }
1872
Nicolas Capens157ba262019-12-10 17:49:14 -05001873RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
1874{
1875 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
1876}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001877
Nicolas Capens157ba262019-12-10 17:49:14 -05001878Type *Byte8::getType()
1879{
1880 return T(Type_v8i8);
1881}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001882
Nicolas Capens598f8d82016-09-26 15:09:10 -04001883// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
1884// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001885// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001886// }
1887
1888// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1889// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001890// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001891// }
1892
Nicolas Capens157ba262019-12-10 17:49:14 -05001893RValue<SByte> SaturateSigned(RValue<Short> x)
1894{
1895 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
1896}
1897
1898RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
1899{
1900 if(emulateIntrinsics)
Nicolas Capens98436732017-07-25 15:32:12 -04001901 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001902 SByte8 result;
1903 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1904 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1905 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1906 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1907 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1908 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1909 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1910 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capens98436732017-07-25 15:32:12 -04001911
Nicolas Capens157ba262019-12-10 17:49:14 -05001912 return result;
1913 }
1914 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001915 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001916 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00001917 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001918 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1919 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1920 paddsb->addArg(x.value);
1921 paddsb->addArg(y.value);
1922 ::basicBlock->appendInst(paddsb);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001923
Nicolas Capens157ba262019-12-10 17:49:14 -05001924 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001925 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001926}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001927
Nicolas Capens157ba262019-12-10 17:49:14 -05001928RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
1929{
1930 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001931 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001932 SByte8 result;
1933 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1934 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1935 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1936 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1937 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1938 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1939 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1940 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001941
Nicolas Capens157ba262019-12-10 17:49:14 -05001942 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04001943 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001944 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001945 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001946 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00001947 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001948 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1949 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1950 psubsb->addArg(x.value);
1951 psubsb->addArg(y.value);
1952 ::basicBlock->appendInst(psubsb);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04001953
Nicolas Capens157ba262019-12-10 17:49:14 -05001954 return RValue<SByte8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001955 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001956}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001957
Nicolas Capens157ba262019-12-10 17:49:14 -05001958RValue<Int> SignMask(RValue<SByte8> x)
1959{
1960 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001961 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001962 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
1963 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001964 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001965 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04001966 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001967 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00001968 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05001969 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1970 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
1971 movmsk->addArg(x.value);
1972 ::basicBlock->appendInst(movmsk);
1973
1974 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capens598f8d82016-09-26 15:09:10 -04001975 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001976}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001977
Nicolas Capens157ba262019-12-10 17:49:14 -05001978RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
1979{
1980 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
1981}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001982
Nicolas Capens157ba262019-12-10 17:49:14 -05001983RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
1984{
1985 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
1986}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001987
Nicolas Capens157ba262019-12-10 17:49:14 -05001988Type *SByte8::getType()
1989{
1990 return T(Type_v8i8);
1991}
Nicolas Capens598f8d82016-09-26 15:09:10 -04001992
Nicolas Capens157ba262019-12-10 17:49:14 -05001993Type *Byte16::getType()
1994{
1995 return T(Ice::IceType_v16i8);
1996}
Nicolas Capens16b5f152016-10-13 13:39:01 -04001997
Nicolas Capens157ba262019-12-10 17:49:14 -05001998Type *SByte16::getType()
1999{
2000 return T(Ice::IceType_v16i8);
2001}
Nicolas Capens16b5f152016-10-13 13:39:01 -04002002
Nicolas Capens157ba262019-12-10 17:49:14 -05002003Type *Short2::getType()
2004{
2005 return T(Type_v2i16);
2006}
Nicolas Capensd4227962016-11-09 14:24:25 -05002007
Nicolas Capens157ba262019-12-10 17:49:14 -05002008Type *UShort2::getType()
2009{
2010 return T(Type_v2i16);
2011}
Nicolas Capensd4227962016-11-09 14:24:25 -05002012
Nicolas Capens157ba262019-12-10 17:49:14 -05002013Short4::Short4(RValue<Int4> cast)
2014{
Ben Clayton713b8d32019-12-17 20:37:56 +00002015 int select[8] = { 0, 2, 4, 6, 0, 2, 4, 6 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002016 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2017 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2018
2019 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
2020 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2021
2022 storeValue(short4);
2023}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002024
2025// Short4::Short4(RValue<Float> cast)
2026// {
2027// }
2028
Nicolas Capens157ba262019-12-10 17:49:14 -05002029Short4::Short4(RValue<Float4> cast)
2030{
2031 UNIMPLEMENTED("Short4::Short4(RValue<Float4> cast)");
2032}
2033
2034RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2035{
2036 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002037 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002038 Short4 result;
2039 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2040 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2041 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2042 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
Chris Forbesaa8f6992019-03-01 14:18:30 -08002043
2044 return result;
2045 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002046 else
Chris Forbesaa8f6992019-03-01 14:18:30 -08002047 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002048 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2049 }
2050}
Chris Forbesaa8f6992019-03-01 14:18:30 -08002051
Nicolas Capens157ba262019-12-10 17:49:14 -05002052RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2053{
2054 if(emulateIntrinsics)
2055 {
2056 Short4 result;
2057 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2058 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2059 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2060 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2061
2062 return result;
2063 }
2064 else
2065 {
2066 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2067 }
2068}
2069
2070RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2071{
2072 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2073 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2074 ::basicBlock->appendInst(cmp);
2075
2076 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2077 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2078 ::basicBlock->appendInst(select);
2079
2080 return RValue<Short4>(V(result));
2081}
2082
2083RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2084{
2085 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2086 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2087 ::basicBlock->appendInst(cmp);
2088
2089 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2090 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2091 ::basicBlock->appendInst(select);
2092
2093 return RValue<Short4>(V(result));
2094}
2095
2096RValue<Short> SaturateSigned(RValue<Int> x)
2097{
2098 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
2099}
2100
2101RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2102{
2103 if(emulateIntrinsics)
2104 {
2105 Short4 result;
2106 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2107 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2108 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2109 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2110
2111 return result;
2112 }
2113 else
2114 {
2115 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002116 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002117 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2118 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2119 paddsw->addArg(x.value);
2120 paddsw->addArg(y.value);
2121 ::basicBlock->appendInst(paddsw);
2122
2123 return RValue<Short4>(V(result));
2124 }
2125}
2126
2127RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2128{
2129 if(emulateIntrinsics)
2130 {
2131 Short4 result;
2132 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2133 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2134 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2135 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2136
2137 return result;
2138 }
2139 else
2140 {
2141 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002142 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002143 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2144 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2145 psubsw->addArg(x.value);
2146 psubsw->addArg(y.value);
2147 ::basicBlock->appendInst(psubsw);
2148
2149 return RValue<Short4>(V(result));
2150 }
2151}
2152
2153RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2154{
2155 if(emulateIntrinsics)
2156 {
2157 Short4 result;
2158 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
2159 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
2160 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
2161 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
2162
2163 return result;
2164 }
2165 else
2166 {
2167 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002168 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002169 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2170 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2171 pmulhw->addArg(x.value);
2172 pmulhw->addArg(y.value);
2173 ::basicBlock->appendInst(pmulhw);
2174
2175 return RValue<Short4>(V(result));
2176 }
2177}
2178
2179RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2180{
2181 if(emulateIntrinsics)
2182 {
2183 Int2 result;
2184 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2185 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
2186
2187 return result;
2188 }
2189 else
2190 {
2191 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002192 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002193 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2194 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2195 pmaddwd->addArg(x.value);
2196 pmaddwd->addArg(y.value);
2197 ::basicBlock->appendInst(pmaddwd);
2198
2199 return As<Int2>(V(result));
2200 }
2201}
2202
2203RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2204{
2205 if(emulateIntrinsics)
2206 {
2207 SByte8 result;
2208 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2209 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2210 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2211 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2212 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2213 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2214 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2215 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
2216
2217 return result;
2218 }
2219 else
2220 {
2221 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002222 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002223 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2224 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2225 pack->addArg(x.value);
2226 pack->addArg(y.value);
2227 ::basicBlock->appendInst(pack);
2228
2229 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2230 }
2231}
2232
2233RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2234{
2235 if(emulateIntrinsics)
2236 {
2237 Byte8 result;
2238 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2239 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2240 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2241 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2242 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2243 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2244 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2245 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2246
2247 return result;
2248 }
2249 else
2250 {
2251 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
Ben Clayton713b8d32019-12-17 20:37:56 +00002252 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002253 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2254 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2255 pack->addArg(x.value);
2256 pack->addArg(y.value);
2257 ::basicBlock->appendInst(pack);
2258
2259 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x0202));
2260 }
2261}
2262
2263RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2264{
2265 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2266}
2267
2268RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2269{
2270 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
2271}
2272
2273Type *Short4::getType()
2274{
2275 return T(Type_v4i16);
2276}
2277
2278UShort4::UShort4(RValue<Float4> cast, bool saturate)
2279{
2280 if(saturate)
2281 {
2282 if(CPUID::SSE4_1)
Chris Forbesaa8f6992019-03-01 14:18:30 -08002283 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002284 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2285 // PackUnsigned takes care of 0x0000 saturation.
2286 Int4 int4(Min(cast, Float4(0xFFFF)));
2287 *this = As<UShort4>(PackUnsigned(int4, int4));
Chris Forbesaa8f6992019-03-01 14:18:30 -08002288 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002289 else if(CPUID::ARM)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002290 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002291 // ARM saturates the 32-bit integer result on overflow/undeflow.
2292 Int4 int4(cast);
2293 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002294 }
2295 else
2296 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002297 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002298 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002299 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002300 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002301 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002302 *this = Short4(Int4(cast));
2303 }
2304}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002305
Nicolas Capens157ba262019-12-10 17:49:14 -05002306RValue<UShort> Extract(RValue<UShort4> val, int i)
2307{
2308 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2309}
2310
2311RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2312{
2313 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2314}
2315
2316RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2317{
2318 if(emulateIntrinsics)
2319 {
2320 UShort4 result;
2321 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2322 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2323 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2324 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2325
2326 return result;
2327 }
2328 else
2329 {
2330 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2331 }
2332}
2333
2334RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2335{
2336 if(emulateIntrinsics)
2337 {
2338 UShort4 result;
2339 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2340 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2341 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2342 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2343
2344 return result;
2345 }
2346 else
2347 {
2348 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2349 }
2350}
2351
2352RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2353{
2354 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2355 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2356 ::basicBlock->appendInst(cmp);
2357
2358 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2359 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2360 ::basicBlock->appendInst(select);
2361
2362 return RValue<UShort4>(V(result));
2363}
2364
2365RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2366{
2367 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2368 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2369 ::basicBlock->appendInst(cmp);
2370
2371 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2372 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2373 ::basicBlock->appendInst(select);
2374
2375 return RValue<UShort4>(V(result));
2376}
2377
2378RValue<UShort> SaturateUnsigned(RValue<Int> x)
2379{
2380 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2381}
2382
2383RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2384{
2385 if(emulateIntrinsics)
2386 {
2387 UShort4 result;
2388 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2389 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2390 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2391 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
2392
2393 return result;
2394 }
2395 else
2396 {
2397 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002398 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002399 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2400 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2401 paddusw->addArg(x.value);
2402 paddusw->addArg(y.value);
2403 ::basicBlock->appendInst(paddusw);
2404
2405 return RValue<UShort4>(V(result));
2406 }
2407}
2408
2409RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2410{
2411 if(emulateIntrinsics)
2412 {
2413 UShort4 result;
2414 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2415 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2416 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2417 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
2418
2419 return result;
2420 }
2421 else
2422 {
2423 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002424 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002425 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2426 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2427 psubusw->addArg(x.value);
2428 psubusw->addArg(y.value);
2429 ::basicBlock->appendInst(psubusw);
2430
2431 return RValue<UShort4>(V(result));
2432 }
2433}
2434
2435RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2436{
2437 if(emulateIntrinsics)
2438 {
2439 UShort4 result;
2440 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2441 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2442 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2443 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
2444
2445 return result;
2446 }
2447 else
2448 {
2449 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00002450 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002451 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2452 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2453 pmulhuw->addArg(x.value);
2454 pmulhuw->addArg(y.value);
2455 ::basicBlock->appendInst(pmulhuw);
2456
2457 return RValue<UShort4>(V(result));
2458 }
2459}
2460
2461RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2462{
2463 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2464
2465 // Scalarized implementation.
2466 Int4 result;
2467 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2468 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2469 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2470 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2471
2472 return result;
2473}
2474
2475RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2476{
2477 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2478
2479 if(false) // Partial product based implementation.
2480 {
2481 auto xh = x >> 16;
2482 auto yh = y >> 16;
2483 auto xl = x & UInt4(0x0000FFFF);
2484 auto yl = y & UInt4(0x0000FFFF);
2485 auto xlyh = xl * yh;
2486 auto xhyl = xh * yl;
2487 auto xlyhh = xlyh >> 16;
2488 auto xhylh = xhyl >> 16;
2489 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2490 auto xhyll = xhyl & UInt4(0x0000FFFF);
2491 auto xlylh = (xl * yl) >> 16;
2492 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2493
2494 return (xh * yh) + (xlyhh + xhylh) + oflow;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002495 }
2496
Nicolas Capens157ba262019-12-10 17:49:14 -05002497 // Scalarized implementation.
2498 Int4 result;
2499 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
2500 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
2501 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
2502 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
2503
2504 return As<UInt4>(result);
2505}
2506
2507RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2508{
2509 UNIMPLEMENTED("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
2510 return UShort4(0);
2511}
2512
2513Type *UShort4::getType()
2514{
2515 return T(Type_v4i16);
2516}
2517
2518RValue<Short> Extract(RValue<Short8> val, int i)
2519{
2520 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2521}
2522
2523RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2524{
2525 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2526}
2527
2528RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2529{
2530 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002531 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002532 Short8 result;
2533 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2534 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2535 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2536 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
2537 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
2538 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
2539 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
2540 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002541
Nicolas Capens157ba262019-12-10 17:49:14 -05002542 return result;
2543 }
2544 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002545 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002546 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002547 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002548}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002549
Nicolas Capens157ba262019-12-10 17:49:14 -05002550RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
2551{
2552 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002553 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002554 Short8 result;
2555 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2556 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2557 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2558 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2559 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
2560 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
2561 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
2562 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002563
Nicolas Capens157ba262019-12-10 17:49:14 -05002564 return result;
2565 }
2566 else
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002567 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002568 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002569 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002570}
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002571
Nicolas Capens157ba262019-12-10 17:49:14 -05002572RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
2573{
2574 UNIMPLEMENTED("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
2575 return Int4(0);
2576}
2577
2578RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
2579{
2580 UNIMPLEMENTED("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
2581 return Short8(0);
2582}
2583
2584Type *Short8::getType()
2585{
2586 return T(Ice::IceType_v8i16);
2587}
2588
2589RValue<UShort> Extract(RValue<UShort8> val, int i)
2590{
2591 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2592}
2593
2594RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
2595{
2596 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
2597}
2598
2599RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
2600{
2601 if(emulateIntrinsics)
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002602 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002603 UShort8 result;
2604 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2605 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2606 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2607 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2608 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
2609 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
2610 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
2611 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002612
Nicolas Capens157ba262019-12-10 17:49:14 -05002613 return result;
2614 }
2615 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002616 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002617 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002618 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002619}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002620
Nicolas Capens157ba262019-12-10 17:49:14 -05002621RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
2622{
2623 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002624 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002625 UShort8 result;
2626 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2627 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2628 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2629 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2630 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
2631 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
2632 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
2633 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002634
Nicolas Capens157ba262019-12-10 17:49:14 -05002635 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002636 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002637 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002638 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002639 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002640 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002641}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002642
Nicolas Capens157ba262019-12-10 17:49:14 -05002643RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
2644{
2645 UNIMPLEMENTED("RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)");
2646 return UShort8(0);
2647}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002648
Nicolas Capens157ba262019-12-10 17:49:14 -05002649RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
2650{
2651 UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
2652 return UShort8(0);
2653}
2654
2655// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
Nicolas Capens598f8d82016-09-26 15:09:10 -04002656// RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
2657// {
Ben Claytoneb50d252019-04-15 13:50:01 -04002658// ASSERT(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002659// }
2660
Nicolas Capens157ba262019-12-10 17:49:14 -05002661Type *UShort8::getType()
2662{
2663 return T(Ice::IceType_v8i16);
2664}
2665
Ben Clayton713b8d32019-12-17 20:37:56 +00002666RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002667{
2668 RValue<Int> res = val;
2669 val += 1;
2670 return res;
2671}
2672
Ben Clayton713b8d32019-12-17 20:37:56 +00002673const Int &operator++(Int &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002674{
2675 val += 1;
2676 return val;
2677}
2678
Ben Clayton713b8d32019-12-17 20:37:56 +00002679RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002680{
2681 RValue<Int> res = val;
2682 val -= 1;
2683 return res;
2684}
2685
Ben Clayton713b8d32019-12-17 20:37:56 +00002686const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002687{
2688 val -= 1;
2689 return val;
2690}
2691
2692RValue<Int> RoundInt(RValue<Float> cast)
2693{
2694 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002695 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002696 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2697 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002698 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002699 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002700 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002701 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00002702 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05002703 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2704 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2705 nearbyint->addArg(cast.value);
2706 ::basicBlock->appendInst(nearbyint);
2707
2708 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002709 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002710}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002711
Nicolas Capens157ba262019-12-10 17:49:14 -05002712Type *Int::getType()
2713{
2714 return T(Ice::IceType_i32);
2715}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002716
Nicolas Capens157ba262019-12-10 17:49:14 -05002717Type *Long::getType()
2718{
2719 return T(Ice::IceType_i64);
2720}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002721
Nicolas Capens157ba262019-12-10 17:49:14 -05002722UInt::UInt(RValue<Float> cast)
2723{
2724 // Smallest positive value representable in UInt, but not in Int
2725 const unsigned int ustart = 0x80000000u;
2726 const float ustartf = float(ustart);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002727
Nicolas Capens157ba262019-12-10 17:49:14 -05002728 // If the value is negative, store 0, otherwise store the result of the conversion
2729 storeValue((~(As<Int>(cast) >> 31) &
Ben Clayton713b8d32019-12-17 20:37:56 +00002730 // Check if the value can be represented as an Int
2731 IfThenElse(cast >= ustartf,
2732 // If the value is too large, subtract ustart and re-add it after conversion.
2733 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2734 // Otherwise, just convert normally
2735 Int(cast)))
2736 .value);
Nicolas Capens157ba262019-12-10 17:49:14 -05002737}
Nicolas Capensa8086512016-11-07 17:32:17 -05002738
Ben Clayton713b8d32019-12-17 20:37:56 +00002739RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002740{
2741 RValue<UInt> res = val;
2742 val += 1;
2743 return res;
2744}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002745
Ben Clayton713b8d32019-12-17 20:37:56 +00002746const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens157ba262019-12-10 17:49:14 -05002747{
2748 val += 1;
2749 return val;
2750}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002751
Ben Clayton713b8d32019-12-17 20:37:56 +00002752RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002753{
2754 RValue<UInt> res = val;
2755 val -= 1;
2756 return res;
2757}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002758
Ben Clayton713b8d32019-12-17 20:37:56 +00002759const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens157ba262019-12-10 17:49:14 -05002760{
2761 val -= 1;
2762 return val;
2763}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002764
Nicolas Capens598f8d82016-09-26 15:09:10 -04002765// RValue<UInt> RoundUInt(RValue<Float> cast)
2766// {
Ben Claytoneb50d252019-04-15 13:50:01 -04002767// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002768// }
2769
Nicolas Capens157ba262019-12-10 17:49:14 -05002770Type *UInt::getType()
2771{
2772 return T(Ice::IceType_i32);
2773}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002774
2775// Int2::Int2(RValue<Int> cast)
2776// {
2777// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2778// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
2779//
2780// Constant *shuffle[2];
2781// shuffle[0] = Nucleus::createConstantInt(0);
2782// shuffle[1] = Nucleus::createConstantInt(0);
2783//
2784// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
2785//
2786// storeValue(replicate);
2787// }
2788
Nicolas Capens157ba262019-12-10 17:49:14 -05002789RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
2790{
2791 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002792 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002793 Int2 result;
2794 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2795 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002796
Nicolas Capens157ba262019-12-10 17:49:14 -05002797 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002798 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002799 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002800 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002801 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002802 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002803}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002804
Nicolas Capens157ba262019-12-10 17:49:14 -05002805RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
2806{
2807 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002808 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002809 Int2 result;
2810 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2811 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2812
2813 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002814 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002815 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002816 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002817 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002818 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002819}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002820
Nicolas Capens157ba262019-12-10 17:49:14 -05002821Type *Int2::getType()
2822{
2823 return T(Type_v2i32);
2824}
2825
2826RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
2827{
2828 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002829 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002830 UInt2 result;
2831 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
2832 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002833
Nicolas Capens157ba262019-12-10 17:49:14 -05002834 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002835 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002836 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002837 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002838 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002839 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002840}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002841
Nicolas Capens157ba262019-12-10 17:49:14 -05002842RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
2843{
2844 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002845 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002846 UInt2 result;
2847 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
2848 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
Nicolas Capensd4227962016-11-09 14:24:25 -05002849
Nicolas Capens157ba262019-12-10 17:49:14 -05002850 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002851 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002852 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002853 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002854 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002855 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002856}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002857
Nicolas Capens157ba262019-12-10 17:49:14 -05002858Type *UInt2::getType()
2859{
2860 return T(Type_v2i32);
2861}
2862
Ben Clayton713b8d32019-12-17 20:37:56 +00002863Int4::Int4(RValue<Byte4> cast)
2864 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002865{
2866 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2867 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2868
2869 Value *e;
Ben Clayton713b8d32019-12-17 20:37:56 +00002870 int swizzle[16] = { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002871 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2872 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
2873
Ben Clayton713b8d32019-12-17 20:37:56 +00002874 int swizzle2[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002875 Value *d = Nucleus::createBitCast(c, Short8::getType());
2876 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
2877
2878 Value *f = Nucleus::createBitCast(e, Int4::getType());
2879 storeValue(f);
2880}
2881
Ben Clayton713b8d32019-12-17 20:37:56 +00002882Int4::Int4(RValue<SByte4> cast)
2883 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002884{
2885 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2886 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2887
Ben Clayton713b8d32019-12-17 20:37:56 +00002888 int swizzle[16] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002889 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2890 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
2891
Ben Clayton713b8d32019-12-17 20:37:56 +00002892 int swizzle2[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002893 Value *d = Nucleus::createBitCast(c, Short8::getType());
2894 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
2895
2896 *this = As<Int4>(e) >> 24;
2897}
2898
Ben Clayton713b8d32019-12-17 20:37:56 +00002899Int4::Int4(RValue<Short4> cast)
2900 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002901{
Ben Clayton713b8d32019-12-17 20:37:56 +00002902 int swizzle[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002903 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
2904
2905 *this = As<Int4>(c) >> 16;
2906}
2907
Ben Clayton713b8d32019-12-17 20:37:56 +00002908Int4::Int4(RValue<UShort4> cast)
2909 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002910{
Ben Clayton713b8d32019-12-17 20:37:56 +00002911 int swizzle[8] = { 0, 8, 1, 9, 2, 10, 3, 11 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002912 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2913 Value *d = Nucleus::createBitCast(c, Int4::getType());
2914 storeValue(d);
2915}
2916
Ben Clayton713b8d32019-12-17 20:37:56 +00002917Int4::Int4(RValue<Int> rhs)
2918 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05002919{
2920 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
2921
Ben Clayton713b8d32019-12-17 20:37:56 +00002922 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05002923 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
2924
2925 storeValue(replicate);
2926}
2927
2928RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
2929{
2930 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002931 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002932 Int4 result;
2933 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2934 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
2935 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
2936 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002937
Nicolas Capens157ba262019-12-10 17:49:14 -05002938 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002939 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002940 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002941 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002942 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002943 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002944}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002945
Nicolas Capens157ba262019-12-10 17:49:14 -05002946RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
2947{
2948 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002949 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002950 Int4 result;
2951 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2952 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2953 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
2954 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
Nicolas Capensd4227962016-11-09 14:24:25 -05002955
Nicolas Capens157ba262019-12-10 17:49:14 -05002956 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002957 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002958 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04002959 {
Nicolas Capens157ba262019-12-10 17:49:14 -05002960 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002961 }
Nicolas Capens157ba262019-12-10 17:49:14 -05002962}
Nicolas Capens598f8d82016-09-26 15:09:10 -04002963
Nicolas Capens157ba262019-12-10 17:49:14 -05002964RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2965{
2966 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
2967}
2968
2969RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2970{
2971 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
2972}
2973
2974RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2975{
2976 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
2977}
2978
2979RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2980{
2981 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
2982}
2983
2984RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2985{
2986 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
2987}
2988
2989RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2990{
2991 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
2992}
2993
2994RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2995{
2996 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
2997 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2998 ::basicBlock->appendInst(cmp);
2999
3000 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3001 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3002 ::basicBlock->appendInst(select);
3003
3004 return RValue<Int4>(V(result));
3005}
3006
3007RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
3008{
3009 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3010 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3011 ::basicBlock->appendInst(cmp);
3012
3013 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3014 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3015 ::basicBlock->appendInst(select);
3016
3017 return RValue<Int4>(V(result));
3018}
3019
3020RValue<Int4> RoundInt(RValue<Float4> cast)
3021{
3022 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003023 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003024 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3025 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003026 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003027 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003028 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003029 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003030 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003031 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3032 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3033 nearbyint->addArg(cast.value);
3034 ::basicBlock->appendInst(nearbyint);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003035
3036 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003037 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003038}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003039
Nicolas Capens157ba262019-12-10 17:49:14 -05003040RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
3041{
3042 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003043 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003044 Short8 result;
3045 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
3046 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
3047 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
3048 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
3049 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
3050 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
3051 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
3052 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003053
Nicolas Capens157ba262019-12-10 17:49:14 -05003054 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003055 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003056 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003057 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003058 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003059 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003060 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3061 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3062 pack->addArg(x.value);
3063 pack->addArg(y.value);
3064 ::basicBlock->appendInst(pack);
Nicolas Capensa8086512016-11-07 17:32:17 -05003065
Nicolas Capens157ba262019-12-10 17:49:14 -05003066 return RValue<Short8>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003067 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003068}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003069
Nicolas Capens157ba262019-12-10 17:49:14 -05003070RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
3071{
3072 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
Nicolas Capens598f8d82016-09-26 15:09:10 -04003073 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003074 RValue<Int4> sx = As<Int4>(x);
3075 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
Nicolas Capensec54a172016-10-25 17:32:37 -04003076
Nicolas Capens157ba262019-12-10 17:49:14 -05003077 RValue<Int4> sy = As<Int4>(y);
3078 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
Nicolas Capens8960fbf2017-07-25 15:32:12 -04003079
Nicolas Capens157ba262019-12-10 17:49:14 -05003080 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003081 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003082 else
Nicolas Capens33438a62017-09-27 11:47:35 -04003083 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003084 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
Ben Clayton713b8d32019-12-17 20:37:56 +00003085 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003086 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3087 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3088 pack->addArg(x.value);
3089 pack->addArg(y.value);
3090 ::basicBlock->appendInst(pack);
Nicolas Capens091f3502017-10-03 14:56:49 -04003091
Nicolas Capens157ba262019-12-10 17:49:14 -05003092 return RValue<UShort8>(V(result));
Nicolas Capens33438a62017-09-27 11:47:35 -04003093 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003094}
Nicolas Capens33438a62017-09-27 11:47:35 -04003095
Nicolas Capens157ba262019-12-10 17:49:14 -05003096RValue<Int> SignMask(RValue<Int4> x)
3097{
3098 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003099 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003100 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3101 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003102 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003103 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003104 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003105 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003106 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003107 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3108 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3109 movmsk->addArg(x.value);
3110 ::basicBlock->appendInst(movmsk);
3111
3112 return RValue<Int>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003113 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003114}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003115
Nicolas Capens157ba262019-12-10 17:49:14 -05003116Type *Int4::getType()
3117{
3118 return T(Ice::IceType_v4i32);
3119}
3120
Ben Clayton713b8d32019-12-17 20:37:56 +00003121UInt4::UInt4(RValue<Float4> cast)
3122 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003123{
3124 // Smallest positive value representable in UInt, but not in Int
3125 const unsigned int ustart = 0x80000000u;
3126 const float ustartf = float(ustart);
3127
3128 // Check if the value can be represented as an Int
3129 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
3130 // If the value is too large, subtract ustart and re-add it after conversion.
3131 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
Ben Clayton713b8d32019-12-17 20:37:56 +00003132 // Otherwise, just convert normally
Nicolas Capens157ba262019-12-10 17:49:14 -05003133 (~uiValue & Int4(cast));
3134 // If the value is negative, store 0, otherwise store the result of the conversion
3135 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
3136}
3137
Ben Clayton713b8d32019-12-17 20:37:56 +00003138UInt4::UInt4(RValue<UInt> rhs)
3139 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003140{
3141 Value *vector = Nucleus::createBitCast(rhs.value, UInt4::getType());
3142
Ben Clayton713b8d32019-12-17 20:37:56 +00003143 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003144 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3145
3146 storeValue(replicate);
3147}
3148
3149RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
3150{
3151 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003152 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003153 UInt4 result;
3154 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
3155 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
3156 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
3157 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
Nicolas Capensc70a1162016-12-03 00:16:14 -05003158
Nicolas Capens157ba262019-12-10 17:49:14 -05003159 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003160 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003161 else
Ben Clayton88816fa2019-05-15 17:08:14 +01003162 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003163 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Ben Clayton88816fa2019-05-15 17:08:14 +01003164 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003165}
Ben Clayton88816fa2019-05-15 17:08:14 +01003166
Nicolas Capens157ba262019-12-10 17:49:14 -05003167RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
3168{
3169 if(emulateIntrinsics)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003170 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003171 UInt4 result;
3172 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
3173 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
3174 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
3175 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04003176
Nicolas Capens157ba262019-12-10 17:49:14 -05003177 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003178 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003179 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003180 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003181 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003182 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003183}
Nicolas Capens598f8d82016-09-26 15:09:10 -04003184
Nicolas Capens157ba262019-12-10 17:49:14 -05003185RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
3186{
3187 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
3188}
3189
3190RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3191{
3192 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
3193}
3194
3195RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3196{
3197 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
3198}
3199
3200RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3201{
3202 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
3203}
3204
3205RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3206{
3207 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
3208}
3209
3210RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3211{
3212 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
3213}
3214
3215RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3216{
3217 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3218 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3219 ::basicBlock->appendInst(cmp);
3220
3221 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3222 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3223 ::basicBlock->appendInst(select);
3224
3225 return RValue<UInt4>(V(result));
3226}
3227
3228RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3229{
3230 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3231 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3232 ::basicBlock->appendInst(cmp);
3233
3234 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3235 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3236 ::basicBlock->appendInst(select);
3237
3238 return RValue<UInt4>(V(result));
3239}
3240
3241Type *UInt4::getType()
3242{
3243 return T(Ice::IceType_v4i32);
3244}
3245
3246Type *Half::getType()
3247{
3248 return T(Ice::IceType_i16);
3249}
3250
3251RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3252{
3253 return 1.0f / x;
3254}
3255
3256RValue<Float> RcpSqrt_pp(RValue<Float> x)
3257{
3258 return Rcp_pp(Sqrt(x));
3259}
3260
3261RValue<Float> Sqrt(RValue<Float> x)
3262{
3263 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003264 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003265 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3266 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3267 sqrt->addArg(x.value);
3268 ::basicBlock->appendInst(sqrt);
3269
3270 return RValue<Float>(V(result));
3271}
3272
3273RValue<Float> Round(RValue<Float> x)
3274{
3275 return Float4(Round(Float4(x))).x;
3276}
3277
3278RValue<Float> Trunc(RValue<Float> x)
3279{
3280 return Float4(Trunc(Float4(x))).x;
3281}
3282
3283RValue<Float> Frac(RValue<Float> x)
3284{
3285 return Float4(Frac(Float4(x))).x;
3286}
3287
3288RValue<Float> Floor(RValue<Float> x)
3289{
3290 return Float4(Floor(Float4(x))).x;
3291}
3292
3293RValue<Float> Ceil(RValue<Float> x)
3294{
3295 return Float4(Ceil(Float4(x))).x;
3296}
3297
3298Type *Float::getType()
3299{
3300 return T(Ice::IceType_f32);
3301}
3302
3303Type *Float2::getType()
3304{
3305 return T(Type_v2f32);
3306}
3307
Ben Clayton713b8d32019-12-17 20:37:56 +00003308Float4::Float4(RValue<Float> rhs)
3309 : XYZW(this)
Nicolas Capens157ba262019-12-10 17:49:14 -05003310{
3311 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
3312
Ben Clayton713b8d32019-12-17 20:37:56 +00003313 int swizzle[4] = { 0, 0, 0, 0 };
Nicolas Capens157ba262019-12-10 17:49:14 -05003314 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
3315
3316 storeValue(replicate);
3317}
3318
3319RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3320{
3321 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3322 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
3323 ::basicBlock->appendInst(cmp);
3324
3325 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3326 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3327 ::basicBlock->appendInst(select);
3328
3329 return RValue<Float4>(V(result));
3330}
3331
3332RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3333{
3334 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3335 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
3336 ::basicBlock->appendInst(cmp);
3337
3338 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3339 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
3340 ::basicBlock->appendInst(select);
3341
3342 return RValue<Float4>(V(result));
3343}
3344
3345RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3346{
3347 return Float4(1.0f) / x;
3348}
3349
3350RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3351{
3352 return Rcp_pp(Sqrt(x));
3353}
3354
3355RValue<Float4> Sqrt(RValue<Float4> x)
3356{
3357 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003358 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003359 Float4 result;
3360 result.x = Sqrt(Float(Float4(x).x));
3361 result.y = Sqrt(Float(Float4(x).y));
3362 result.z = Sqrt(Float(Float4(x).z));
3363 result.w = Sqrt(Float(Float4(x).w));
3364
3365 return result;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003366 }
Nicolas Capens157ba262019-12-10 17:49:14 -05003367 else
Nicolas Capens598f8d82016-09-26 15:09:10 -04003368 {
Nicolas Capens157ba262019-12-10 17:49:14 -05003369 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003370 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capensd52e9362016-10-31 23:23:15 -04003371 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3372 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3373 sqrt->addArg(x.value);
3374 ::basicBlock->appendInst(sqrt);
3375
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003376 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003377 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003378}
Nicolas Capens157ba262019-12-10 17:49:14 -05003379
3380RValue<Int> SignMask(RValue<Float4> x)
3381{
3382 if(emulateIntrinsics || CPUID::ARM)
3383 {
3384 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3385 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3386 }
3387 else
3388 {
3389 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003390 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003391 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3392 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3393 movmsk->addArg(x.value);
3394 ::basicBlock->appendInst(movmsk);
3395
3396 return RValue<Int>(V(result));
3397 }
3398}
3399
3400RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3401{
3402 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
3403}
3404
3405RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3406{
3407 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
3408}
3409
3410RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3411{
3412 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
3413}
3414
3415RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3416{
3417 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
3418}
3419
3420RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3421{
3422 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
3423}
3424
3425RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3426{
3427 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
3428}
3429
3430RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3431{
3432 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3433}
3434
3435RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3436{
3437 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3438}
3439
3440RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3441{
3442 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3443}
3444
3445RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3446{
3447 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3448}
3449
3450RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3451{
3452 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3453}
3454
3455RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3456{
3457 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3458}
3459
3460RValue<Float4> Round(RValue<Float4> x)
3461{
3462 if(emulateIntrinsics || CPUID::ARM)
3463 {
3464 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3465 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
3466 }
3467 else if(CPUID::SSE4_1)
3468 {
3469 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003470 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003471 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3472 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3473 round->addArg(x.value);
3474 round->addArg(::context->getConstantInt32(0));
3475 ::basicBlock->appendInst(round);
3476
3477 return RValue<Float4>(V(result));
3478 }
3479 else
3480 {
3481 return Float4(RoundInt(x));
3482 }
3483}
3484
3485RValue<Float4> Trunc(RValue<Float4> x)
3486{
3487 if(CPUID::SSE4_1)
3488 {
3489 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003490 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003491 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3492 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3493 round->addArg(x.value);
3494 round->addArg(::context->getConstantInt32(3));
3495 ::basicBlock->appendInst(round);
3496
3497 return RValue<Float4>(V(result));
3498 }
3499 else
3500 {
3501 return Float4(Int4(x));
3502 }
3503}
3504
3505RValue<Float4> Frac(RValue<Float4> x)
3506{
3507 Float4 frc;
3508
3509 if(CPUID::SSE4_1)
3510 {
3511 frc = x - Floor(x);
3512 }
3513 else
3514 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003515 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens157ba262019-12-10 17:49:14 -05003516
Ben Clayton713b8d32019-12-17 20:37:56 +00003517 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens157ba262019-12-10 17:49:14 -05003518 }
3519
3520 // x - floor(x) can be 1.0 for very small negative x.
3521 // Clamp against the value just below 1.0.
3522 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
3523}
3524
3525RValue<Float4> Floor(RValue<Float4> x)
3526{
3527 if(CPUID::SSE4_1)
3528 {
3529 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003530 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003531 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3532 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3533 round->addArg(x.value);
3534 round->addArg(::context->getConstantInt32(1));
3535 ::basicBlock->appendInst(round);
3536
3537 return RValue<Float4>(V(result));
3538 }
3539 else
3540 {
3541 return x - Frac(x);
3542 }
3543}
3544
3545RValue<Float4> Ceil(RValue<Float4> x)
3546{
3547 if(CPUID::SSE4_1)
3548 {
3549 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Ben Clayton713b8d32019-12-17 20:37:56 +00003550 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003551 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3552 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3553 round->addArg(x.value);
3554 round->addArg(::context->getConstantInt32(2));
3555 ::basicBlock->appendInst(round);
3556
3557 return RValue<Float4>(V(result));
3558 }
3559 else
3560 {
3561 return -Floor(-x);
3562 }
3563}
3564
3565Type *Float4::getType()
3566{
3567 return T(Ice::IceType_v4f32);
3568}
3569
3570RValue<Long> Ticks()
3571{
3572 UNIMPLEMENTED("RValue<Long> Ticks()");
3573 return Long(Int(0));
3574}
3575
Ben Clayton713b8d32019-12-17 20:37:56 +00003576RValue<Pointer<Byte>> ConstantPointer(void const *ptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05003577{
Ben Clayton713b8d32019-12-17 20:37:56 +00003578 if(sizeof(void *) == 8)
Nicolas Capens157ba262019-12-10 17:49:14 -05003579 {
3580 return RValue<Pointer<Byte>>(V(::context->getConstantInt64(reinterpret_cast<intptr_t>(ptr))));
3581 }
3582 else
3583 {
3584 return RValue<Pointer<Byte>>(V(::context->getConstantInt32(reinterpret_cast<intptr_t>(ptr))));
3585 }
3586}
3587
Ben Clayton713b8d32019-12-17 20:37:56 +00003588RValue<Pointer<Byte>> ConstantData(void const *data, size_t size)
Nicolas Capens157ba262019-12-10 17:49:14 -05003589{
3590 // TODO: Try to use Ice::VariableDeclaration::DataInitializer and
3591 // getConstantSym instead of tagging data on the routine.
3592 return ConstantPointer(::routine->addConstantData(data, size));
3593}
3594
Ben Clayton713b8d32019-12-17 20:37:56 +00003595Value *Call(RValue<Pointer<Byte>> fptr, Type *retTy, std::initializer_list<Value *> args, std::initializer_list<Type *> argTys)
Nicolas Capens157ba262019-12-10 17:49:14 -05003596{
3597 Ice::Variable *ret = nullptr;
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003598 if(retTy != nullptr)
Nicolas Capens157ba262019-12-10 17:49:14 -05003599 {
3600 ret = ::function->makeVariable(T(retTy));
3601 }
3602 auto call = Ice::InstCall::create(::function, args.size(), ret, V(fptr.value), false);
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003603 for(auto arg : args)
Nicolas Capens157ba262019-12-10 17:49:14 -05003604 {
3605 call->addArg(V(arg));
3606 }
3607 ::basicBlock->appendInst(call);
3608 return V(ret);
3609}
3610
3611void Breakpoint()
3612{
Ben Clayton713b8d32019-12-17 20:37:56 +00003613 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Trap, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
Nicolas Capens157ba262019-12-10 17:49:14 -05003614 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3615 auto trap = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3616 ::basicBlock->appendInst(trap);
3617}
3618
Ben Clayton713b8d32019-12-17 20:37:56 +00003619void Nucleus::createFence(std::memory_order memoryOrder)
3620{
Antonio Maiorano370cba52019-12-31 11:36:07 -05003621 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3622 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3623 auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
3624 auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
3625 inst->addArg(order);
3626 ::basicBlock->appendInst(inst);
Ben Clayton713b8d32019-12-17 20:37:56 +00003627}
Antonio Maiorano370cba52019-12-31 11:36:07 -05003628
Ben Clayton713b8d32019-12-17 20:37:56 +00003629Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
3630{
3631 UNIMPLEMENTED("Subzero createMaskedLoad()");
3632 return nullptr;
3633}
3634void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
3635{
3636 UNIMPLEMENTED("Subzero createMaskedStore()");
3637}
Nicolas Capens157ba262019-12-10 17:49:14 -05003638
3639RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3640{
3641 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3642}
3643
3644RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
3645{
3646 return emulated::Gather(base, offsets, mask, alignment, zeroMaskedLanes);
3647}
3648
3649void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3650{
3651 return emulated::Scatter(base, val, offsets, mask, alignment);
3652}
3653
3654void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
3655{
3656 return emulated::Scatter(base, val, offsets, mask, alignment);
3657}
3658
3659RValue<Float> Exp2(RValue<Float> x)
3660{
3661 return emulated::Exp2(x);
3662}
3663
3664RValue<Float> Log2(RValue<Float> x)
3665{
3666 return emulated::Log2(x);
3667}
3668
3669RValue<Float4> Sin(RValue<Float4> x)
3670{
3671 return emulated::Sin(x);
3672}
3673
3674RValue<Float4> Cos(RValue<Float4> x)
3675{
3676 return emulated::Cos(x);
3677}
3678
3679RValue<Float4> Tan(RValue<Float4> x)
3680{
3681 return emulated::Tan(x);
3682}
3683
3684RValue<Float4> Asin(RValue<Float4> x)
3685{
3686 return emulated::Asin(x);
3687}
3688
3689RValue<Float4> Acos(RValue<Float4> x)
3690{
3691 return emulated::Acos(x);
3692}
3693
3694RValue<Float4> Atan(RValue<Float4> x)
3695{
3696 return emulated::Atan(x);
3697}
3698
3699RValue<Float4> Sinh(RValue<Float4> x)
3700{
3701 return emulated::Sinh(x);
3702}
3703
3704RValue<Float4> Cosh(RValue<Float4> x)
3705{
3706 return emulated::Cosh(x);
3707}
3708
3709RValue<Float4> Tanh(RValue<Float4> x)
3710{
3711 return emulated::Tanh(x);
3712}
3713
3714RValue<Float4> Asinh(RValue<Float4> x)
3715{
3716 return emulated::Asinh(x);
3717}
3718
3719RValue<Float4> Acosh(RValue<Float4> x)
3720{
3721 return emulated::Acosh(x);
3722}
3723
3724RValue<Float4> Atanh(RValue<Float4> x)
3725{
3726 return emulated::Atanh(x);
3727}
3728
3729RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
3730{
3731 return emulated::Atan2(x, y);
3732}
3733
3734RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
3735{
3736 return emulated::Pow(x, y);
3737}
3738
3739RValue<Float4> Exp(RValue<Float4> x)
3740{
3741 return emulated::Exp(x);
3742}
3743
3744RValue<Float4> Log(RValue<Float4> x)
3745{
3746 return emulated::Log(x);
3747}
3748
3749RValue<Float4> Exp2(RValue<Float4> x)
3750{
3751 return emulated::Exp2(x);
3752}
3753
3754RValue<Float4> Log2(RValue<Float4> x)
3755{
3756 return emulated::Log2(x);
3757}
3758
3759RValue<UInt> Ctlz(RValue<UInt> x, bool isZeroUndef)
3760{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003761 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003762 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003763 UNIMPLEMENTED("Subzero Ctlz()");
3764 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003765 }
3766 else
3767 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003768 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05003769 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Ctlz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3770 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3771 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3772 ctlz->addArg(x.value);
3773 ::basicBlock->appendInst(ctlz);
3774
3775 return RValue<UInt>(V(result));
3776 }
3777}
3778
3779RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef)
3780{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003781 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003782 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003783 UNIMPLEMENTED("Subzero Ctlz()");
3784 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003785 }
3786 else
3787 {
3788 // TODO: implement vectorized version in Subzero
3789 UInt4 result;
3790 result = Insert(result, Ctlz(Extract(x, 0), isZeroUndef), 0);
3791 result = Insert(result, Ctlz(Extract(x, 1), isZeroUndef), 1);
3792 result = Insert(result, Ctlz(Extract(x, 2), isZeroUndef), 2);
3793 result = Insert(result, Ctlz(Extract(x, 3), isZeroUndef), 3);
3794 return result;
3795 }
3796}
3797
3798RValue<UInt> Cttz(RValue<UInt> x, bool isZeroUndef)
3799{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003800 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003801 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003802 UNIMPLEMENTED("Subzero Cttz()");
3803 return UInt(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003804 }
3805 else
3806 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003807 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
Nicolas Capens157ba262019-12-10 17:49:14 -05003808 const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::Cttz, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
3809 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3810 auto ctlz = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3811 ctlz->addArg(x.value);
3812 ::basicBlock->appendInst(ctlz);
3813
3814 return RValue<UInt>(V(result));
3815 }
3816}
3817
3818RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef)
3819{
Nicolas Capens81bc9d92019-12-16 15:05:57 -05003820 if(emulateIntrinsics)
Nicolas Capens157ba262019-12-10 17:49:14 -05003821 {
Ben Clayton713b8d32019-12-17 20:37:56 +00003822 UNIMPLEMENTED("Subzero Cttz()");
3823 return UInt4(0);
Nicolas Capens157ba262019-12-10 17:49:14 -05003824 }
3825 else
3826 {
3827 // TODO: implement vectorized version in Subzero
3828 UInt4 result;
3829 result = Insert(result, Cttz(Extract(x, 0), isZeroUndef), 0);
3830 result = Insert(result, Cttz(Extract(x, 1), isZeroUndef), 1);
3831 result = Insert(result, Cttz(Extract(x, 2), isZeroUndef), 2);
3832 result = Insert(result, Cttz(Extract(x, 3), isZeroUndef), 3);
3833 return result;
3834 }
3835}
3836
Antonio Maiorano370cba52019-12-31 11:36:07 -05003837RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
3838{
3839 return emulated::MinAtomic(x, y, memoryOrder);
3840}
3841
3842RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
3843{
3844 return emulated::MinAtomic(x, y, memoryOrder);
3845}
3846
3847RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
3848{
3849 return emulated::MaxAtomic(x, y, memoryOrder);
3850}
3851
3852RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
3853{
3854 return emulated::MaxAtomic(x, y, memoryOrder);
3855}
3856
Nicolas Capens157ba262019-12-10 17:49:14 -05003857void EmitDebugLocation() {}
Ben Clayton713b8d32019-12-17 20:37:56 +00003858void EmitDebugVariable(Value *value) {}
Nicolas Capens157ba262019-12-10 17:49:14 -05003859void FlushDebug() {}
3860
Ben Clayton713b8d32019-12-17 20:37:56 +00003861void Nucleus::createCoroutine(Type *YieldType, std::vector<Type *> &Params)
Nicolas Capens157ba262019-12-10 17:49:14 -05003862{
3863 // Subzero currently only supports coroutines as functions (i.e. that do not yield)
3864 createFunction(YieldType, Params);
3865}
3866
Ben Clayton713b8d32019-12-17 20:37:56 +00003867static bool coroutineEntryAwaitStub(Nucleus::CoroutineHandle, void *yieldValue)
3868{
3869 return false;
3870}
Nicolas Capens157ba262019-12-10 17:49:14 -05003871static void coroutineEntryDestroyStub(Nucleus::CoroutineHandle) {}
3872
3873std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
3874{
3875 // acquireRoutine sets the CoroutineEntryBegin entry
3876 auto coroutineEntry = acquireRoutine(name, cfgEdit);
3877
3878 // For now, set the await and destroy entries to stubs, until we add proper coroutine support to the Subzero backend
3879 auto routine = std::static_pointer_cast<ELFMemoryStreamer>(coroutineEntry);
Ben Clayton713b8d32019-12-17 20:37:56 +00003880 routine->setEntry(Nucleus::CoroutineEntryAwait, reinterpret_cast<const void *>(&coroutineEntryAwaitStub));
3881 routine->setEntry(Nucleus::CoroutineEntryDestroy, reinterpret_cast<const void *>(&coroutineEntryDestroyStub));
Nicolas Capens157ba262019-12-10 17:49:14 -05003882
3883 return coroutineEntry;
3884}
3885
Ben Clayton713b8d32019-12-17 20:37:56 +00003886void Nucleus::yield(Value *val)
3887{
3888 UNIMPLEMENTED("Yield");
3889}
Nicolas Capens157ba262019-12-10 17:49:14 -05003890
3891} // namespace rr