blob: c30a419366b9083e261c43cc1e38ca870ca4c4dc [file] [log] [blame]
Nicolas Capens598f8d82016-09-26 15:09:10 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Nicolas Capens598f8d82016-09-26 15:09:10 -040015#include "Reactor.hpp"
Ben Claytoneb50d252019-04-15 13:50:01 -040016#include "Debug.hpp"
Nicolas Capens598f8d82016-09-26 15:09:10 -040017
Nicolas Capens2ae9d742016-11-24 14:43:05 -050018#include "Optimizer.hpp"
Nicolas Capens1a3ce872018-10-10 10:42:36 -040019#include "ExecutableMemory.hpp"
Nicolas Capensa062f322018-09-06 15:34:46 -040020
Nicolas Capens598f8d82016-09-26 15:09:10 -040021#include "src/IceTypes.h"
22#include "src/IceCfg.h"
23#include "src/IceELFStreamer.h"
24#include "src/IceGlobalContext.h"
25#include "src/IceCfgNode.h"
26#include "src/IceELFObjectWriter.h"
Nicolas Capens8dfd9a72016-10-13 17:44:51 -040027#include "src/IceGlobalInits.h"
Nicolas Capens598f8d82016-09-26 15:09:10 -040028
29#include "llvm/Support/FileSystem.h"
30#include "llvm/Support/raw_os_ostream.h"
Nicolas Capens6a990f82018-07-06 15:54:07 -040031#include "llvm/Support/Compiler.h"
32
33#if __has_feature(memory_sanitizer)
34#include <sanitizer/msan_interface.h>
35#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040036
Nicolas Capensbd65da92017-01-05 16:31:06 -050037#if defined(_WIN32)
Alexis Hetu113e33a2017-01-19 10:49:19 -050038#ifndef WIN32_LEAN_AND_MEAN
Nicolas Capens598f8d82016-09-26 15:09:10 -040039#define WIN32_LEAN_AND_MEAN
Alexis Hetu113e33a2017-01-19 10:49:19 -050040#endif // !WIN32_LEAN_AND_MEAN
41#ifndef NOMINMAX
Nicolas Capens598f8d82016-09-26 15:09:10 -040042#define NOMINMAX
Alexis Hetu113e33a2017-01-19 10:49:19 -050043#endif // !NOMINMAX
Nicolas Capens598f8d82016-09-26 15:09:10 -040044#include <Windows.h>
Nicolas Capensbd65da92017-01-05 16:31:06 -050045#else
46#include <sys/mman.h>
Nicolas Capens411273e2017-01-26 15:13:36 -080047#if !defined(MAP_ANONYMOUS)
48#define MAP_ANONYMOUS MAP_ANON
Nicolas Capens8b275742017-01-20 17:11:41 -050049#endif
Nicolas Capensbd65da92017-01-05 16:31:06 -050050#endif
Nicolas Capens598f8d82016-09-26 15:09:10 -040051
Nicolas Capensc07dc4b2018-08-06 14:20:45 -040052#include <mutex>
Nicolas Capens598f8d82016-09-26 15:09:10 -040053#include <limits>
54#include <iostream>
Nicolas Capens598f8d82016-09-26 15:09:10 -040055
56namespace
57{
58 Ice::GlobalContext *context = nullptr;
59 Ice::Cfg *function = nullptr;
60 Ice::CfgNode *basicBlock = nullptr;
61 Ice::CfgLocalAllocatorScope *allocator = nullptr;
Nicolas Capens48461502018-08-06 14:20:45 -040062 rr::Routine *routine = nullptr;
Nicolas Capens598f8d82016-09-26 15:09:10 -040063
64 std::mutex codegenMutex;
65
66 Ice::ELFFileStreamer *elfFile = nullptr;
67 Ice::Fdstream *out = nullptr;
68}
69
Nicolas Capensccd5ecb2017-01-14 12:52:55 -050070namespace
71{
Nicolas Capens47dc8672017-04-25 12:54:39 -040072 #if !defined(__i386__) && defined(_M_IX86)
73 #define __i386__ 1
74 #endif
75
76 #if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
77 #define __x86_64__ 1
78 #endif
79
Nicolas Capensccd5ecb2017-01-14 12:52:55 -050080 class CPUID
81 {
82 public:
Nicolas Capensf7b75882017-04-26 09:30:47 -040083 const static bool ARM;
Nicolas Capensccd5ecb2017-01-14 12:52:55 -050084 const static bool SSE4_1;
85
86 private:
87 static void cpuid(int registers[4], int info)
88 {
Nicolas Capens47dc8672017-04-25 12:54:39 -040089 #if defined(__i386__) || defined(__x86_64__)
90 #if defined(_WIN32)
91 __cpuid(registers, info);
92 #else
93 __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
94 #endif
Nicolas Capensccd5ecb2017-01-14 12:52:55 -050095 #else
Nicolas Capens47dc8672017-04-25 12:54:39 -040096 registers[0] = 0;
97 registers[1] = 0;
98 registers[2] = 0;
99 registers[3] = 0;
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500100 #endif
101 }
102
Nicolas Capensf7b75882017-04-26 09:30:47 -0400103 static bool detectARM()
104 {
Stephen Lanhamfe796492018-09-07 11:59:54 -0700105 #if defined(__arm__) || defined(__aarch64__)
Nicolas Capensf7b75882017-04-26 09:30:47 -0400106 return true;
107 #elif defined(__i386__) || defined(__x86_64__)
108 return false;
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200109 #elif defined(__mips__)
110 return false;
Nicolas Capensf7b75882017-04-26 09:30:47 -0400111 #else
112 #error "Unknown architecture"
113 #endif
114 }
115
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500116 static bool detectSSE4_1()
117 {
Nicolas Capens47dc8672017-04-25 12:54:39 -0400118 #if defined(__i386__) || defined(__x86_64__)
119 int registers[4];
120 cpuid(registers, 1);
121 return (registers[2] & 0x00080000) != 0;
122 #else
123 return false;
124 #endif
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500125 }
126 };
127
Nicolas Capensf7b75882017-04-26 09:30:47 -0400128 const bool CPUID::ARM = CPUID::detectARM();
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500129 const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
Nicolas Capens091f3502017-10-03 14:56:49 -0400130 const bool emulateIntrinsics = false;
Nicolas Capens2d8c3702017-07-25 13:56:46 -0400131 const bool emulateMismatchedBitCast = CPUID::ARM;
Nicolas Capensccd5ecb2017-01-14 12:52:55 -0500132}
133
Nicolas Capens48461502018-08-06 14:20:45 -0400134namespace rr
Nicolas Capens598f8d82016-09-26 15:09:10 -0400135{
Ben Claytonc7904162019-04-17 17:35:48 -0400136 const Capabilities Caps =
137 {
138 false, // CallSupported
139 };
140
Nicolas Capens23d99a42016-09-30 14:57:16 -0400141 enum EmulatedType
142 {
143 EmulatedShift = 16,
144 EmulatedV2 = 2 << EmulatedShift,
145 EmulatedV4 = 4 << EmulatedShift,
146 EmulatedV8 = 8 << EmulatedShift,
147 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
148
149 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
150 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
151 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
152 Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
153 Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
Nicolas Capens4cfd4572016-10-20 01:00:19 -0400154 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
Nicolas Capens23d99a42016-09-30 14:57:16 -0400155 };
156
Nicolas Capens15060bb2016-12-05 22:17:19 -0500157 class Value : public Ice::Operand {};
Nicolas Capensb98fe5c2016-11-09 12:24:06 -0500158 class SwitchCases : public Ice::InstSwitch {};
Nicolas Capens598f8d82016-09-26 15:09:10 -0400159 class BasicBlock : public Ice::CfgNode {};
160
161 Ice::Type T(Type *t)
162 {
Alexis Hetu113e33a2017-01-19 10:49:19 -0500163 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
Nicolas Capens23d99a42016-09-30 14:57:16 -0400164 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400165 }
166
167 Type *T(Ice::Type t)
168 {
169 return reinterpret_cast<Type*>(t);
170 }
171
Nicolas Capens23d99a42016-09-30 14:57:16 -0400172 Type *T(EmulatedType t)
173 {
174 return reinterpret_cast<Type*>(t);
175 }
176
Nicolas Capens15060bb2016-12-05 22:17:19 -0500177 Value *V(Ice::Operand *v)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400178 {
179 return reinterpret_cast<Value*>(v);
180 }
181
Nicolas Capens611642a2016-09-28 16:45:04 -0400182 BasicBlock *B(Ice::CfgNode *b)
183 {
184 return reinterpret_cast<BasicBlock*>(b);
185 }
186
Nicolas Capens584088c2017-01-26 16:05:18 -0800187 static size_t typeSize(Type *type)
188 {
189 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
190 {
191 switch(reinterpret_cast<std::intptr_t>(type))
192 {
193 case Type_v2i32: return 8;
194 case Type_v4i16: return 8;
195 case Type_v2i16: return 4;
196 case Type_v8i8: return 8;
197 case Type_v4i8: return 4;
198 case Type_v2f32: return 8;
Ben Claytoneb50d252019-04-15 13:50:01 -0400199 default: ASSERT(false);
Nicolas Capens584088c2017-01-26 16:05:18 -0800200 }
201 }
202
203 return Ice::typeWidthInBytes(T(type));
204 }
205
Nicolas Capens598f8d82016-09-26 15:09:10 -0400206 Optimization optimization[10] = {InstructionCombining, Disabled};
207
Nicolas Capens66478362016-10-13 15:36:36 -0400208 using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
209 using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
210
211 inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
212 {
213 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
214 }
Nicolas Capens87852e12016-11-24 14:45:06 -0500215
Nicolas Capens66478362016-10-13 15:36:36 -0400216 inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
217 {
218 return &sectionHeader(elfHeader)[index];
219 }
220
221 static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
222 {
223 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
Nicolas Capens87852e12016-11-24 14:45:06 -0500224
Nicolas Capens66478362016-10-13 15:36:36 -0400225 uint32_t index = relocation.getSymbol();
226 int table = relocationTable.sh_link;
227 void *symbolValue = nullptr;
Nicolas Capens87852e12016-11-24 14:45:06 -0500228
Nicolas Capens66478362016-10-13 15:36:36 -0400229 if(index != SHN_UNDEF)
230 {
231 if(table == SHN_UNDEF) return nullptr;
232 const SectionHeader *symbolTable = elfSection(elfHeader, table);
Nicolas Capens87852e12016-11-24 14:45:06 -0500233
Nicolas Capens66478362016-10-13 15:36:36 -0400234 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
235 if(index >= symtab_entries)
236 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400237 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens66478362016-10-13 15:36:36 -0400238 return nullptr;
239 }
Nicolas Capens87852e12016-11-24 14:45:06 -0500240
Nicolas Capens66478362016-10-13 15:36:36 -0400241 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
242 Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
243 uint16_t section = symbol.st_shndx;
244
245 if(section != SHN_UNDEF && section < SHN_LORESERVE)
246 {
247 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
248 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
249 }
250 else
251 {
252 return nullptr;
253 }
254 }
255
Nicolas Capens8d2cf752018-11-22 11:13:45 -0500256 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
257 unaligned_ptr<int32_t> patchSite = (int32_t*)(address + relocation.r_offset);
258
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400259 if(CPUID::ARM)
260 {
261 switch(relocation.getType())
262 {
263 case R_ARM_NONE:
264 // No relocation
265 break;
266 case R_ARM_MOVW_ABS_NC:
267 {
268 uint32_t thumb = 0; // Calls to Thumb code not supported.
269 uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
270 *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
271 }
272 break;
273 case R_ARM_MOVT_ABS:
274 {
275 uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
276 *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
277 }
278 break;
279 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400280 ASSERT(false && "Unsupported relocation type");
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400281 return nullptr;
282 }
283 }
284 else
285 {
Nicolas Capens30cd7d42017-04-25 15:17:25 -0400286 switch(relocation.getType())
287 {
288 case R_386_NONE:
289 // No relocation
290 break;
291 case R_386_32:
292 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
293 break;
294 // case R_386_PC32:
295 // *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
296 // break;
297 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400298 ASSERT(false && "Unsupported relocation type");
Nicolas Capens30cd7d42017-04-25 15:17:25 -0400299 return nullptr;
300 }
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400301 }
302
Nicolas Capens66478362016-10-13 15:36:36 -0400303 return symbolValue;
304 }
305
306 static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
307 {
308 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
Nicolas Capens87852e12016-11-24 14:45:06 -0500309
Nicolas Capens66478362016-10-13 15:36:36 -0400310 uint32_t index = relocation.getSymbol();
311 int table = relocationTable.sh_link;
312 void *symbolValue = nullptr;
313
314 if(index != SHN_UNDEF)
315 {
316 if(table == SHN_UNDEF) return nullptr;
317 const SectionHeader *symbolTable = elfSection(elfHeader, table);
Nicolas Capens87852e12016-11-24 14:45:06 -0500318
Nicolas Capens66478362016-10-13 15:36:36 -0400319 uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
320 if(index >= symtab_entries)
321 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400322 ASSERT(index < symtab_entries && "Symbol Index out of range");
Nicolas Capens66478362016-10-13 15:36:36 -0400323 return nullptr;
324 }
Nicolas Capens87852e12016-11-24 14:45:06 -0500325
Nicolas Capens66478362016-10-13 15:36:36 -0400326 intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
327 Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
328 uint16_t section = symbol.st_shndx;
329
330 if(section != SHN_UNDEF && section < SHN_LORESERVE)
331 {
332 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
333 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
334 }
335 else
336 {
337 return nullptr;
338 }
339 }
340
Nicolas Capens8d2cf752018-11-22 11:13:45 -0500341 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
342 unaligned_ptr<int32_t> patchSite32 = (int32_t*)(address + relocation.r_offset);
343 unaligned_ptr<int64_t> patchSite64 = (int64_t*)(address + relocation.r_offset);
344
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400345 switch(relocation.getType())
346 {
347 case R_X86_64_NONE:
348 // No relocation
349 break;
350 case R_X86_64_64:
Nicolas Capens8d2cf752018-11-22 11:13:45 -0500351 *patchSite64 = (int64_t)((intptr_t)symbolValue + *patchSite64 + relocation.r_addend);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400352 break;
353 case R_X86_64_PC32:
Nicolas Capens8d2cf752018-11-22 11:13:45 -0500354 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 - (intptr_t)patchSite32 + relocation.r_addend);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400355 break;
356 case R_X86_64_32S:
Nicolas Capens8d2cf752018-11-22 11:13:45 -0500357 *patchSite32 = (int32_t)((intptr_t)symbolValue + *patchSite32 + relocation.r_addend);
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400358 break;
359 default:
Ben Claytoneb50d252019-04-15 13:50:01 -0400360 ASSERT(false && "Unsupported relocation type");
Nicolas Capensf110e4d2017-05-03 15:33:49 -0400361 return nullptr;
362 }
Nicolas Capens66478362016-10-13 15:36:36 -0400363
364 return symbolValue;
365 }
366
Nicolas Capens1cc44382017-04-25 10:52:16 -0400367 void *loadImage(uint8_t *const elfImage, size_t &codeSize)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400368 {
Nicolas Capens598f8d82016-09-26 15:09:10 -0400369 ElfHeader *elfHeader = (ElfHeader*)elfImage;
370
371 if(!elfHeader->checkMagic())
372 {
373 return nullptr;
374 }
375
Nicolas Capens66478362016-10-13 15:36:36 -0400376 // Expect ELF bitness to match platform
Ben Claytoneb50d252019-04-15 13:50:01 -0400377 ASSERT(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
Nicolas Capens30cd7d42017-04-25 15:17:25 -0400378 #if defined(__i386__)
Ben Claytoneb50d252019-04-15 13:50:01 -0400379 ASSERT(sizeof(void*) == 4 && elfHeader->e_machine == EM_386);
Nicolas Capens30cd7d42017-04-25 15:17:25 -0400380 #elif defined(__x86_64__)
Ben Claytoneb50d252019-04-15 13:50:01 -0400381 ASSERT(sizeof(void*) == 8 && elfHeader->e_machine == EM_X86_64);
Nicolas Capens30cd7d42017-04-25 15:17:25 -0400382 #elif defined(__arm__)
Ben Claytoneb50d252019-04-15 13:50:01 -0400383 ASSERT(sizeof(void*) == 4 && elfHeader->e_machine == EM_ARM);
Stephen Lanhamfe796492018-09-07 11:59:54 -0700384 #elif defined(__aarch64__)
Ben Claytoneb50d252019-04-15 13:50:01 -0400385 ASSERT(sizeof(void*) == 8 && elfHeader->e_machine == EM_AARCH64);
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200386 #elif defined(__mips__)
Ben Claytoneb50d252019-04-15 13:50:01 -0400387 ASSERT(sizeof(void*) == 4 && elfHeader->e_machine == EM_MIPS);
Nicolas Capens30cd7d42017-04-25 15:17:25 -0400388 #else
389 #error "Unsupported platform"
390 #endif
Nicolas Capens66478362016-10-13 15:36:36 -0400391
Nicolas Capens598f8d82016-09-26 15:09:10 -0400392 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
393 void *entry = nullptr;
394
395 for(int i = 0; i < elfHeader->e_shnum; i++)
396 {
Nicolas Capens66478362016-10-13 15:36:36 -0400397 if(sectionHeader[i].sh_type == SHT_PROGBITS)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400398 {
Nicolas Capens66478362016-10-13 15:36:36 -0400399 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
400 {
401 entry = elfImage + sectionHeader[i].sh_offset;
Nicolas Capens1cc44382017-04-25 10:52:16 -0400402 codeSize = sectionHeader[i].sh_size;
Nicolas Capens66478362016-10-13 15:36:36 -0400403 }
404 }
405 else if(sectionHeader[i].sh_type == SHT_REL)
406 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400407 ASSERT(sizeof(void*) == 4 && "UNIMPLEMENTED"); // Only expected/implemented for 32-bit code
Nicolas Capens66478362016-10-13 15:36:36 -0400408
Alexis Hetu113e33a2017-01-19 10:49:19 -0500409 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
Nicolas Capens66478362016-10-13 15:36:36 -0400410 {
411 const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
Alexis Hetu113e33a2017-01-19 10:49:19 -0500412 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
Nicolas Capens66478362016-10-13 15:36:36 -0400413 }
414 }
415 else if(sectionHeader[i].sh_type == SHT_RELA)
416 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400417 ASSERT(sizeof(void*) == 8 && "UNIMPLEMENTED"); // Only expected/implemented for 64-bit code
Nicolas Capens66478362016-10-13 15:36:36 -0400418
Alexis Hetu113e33a2017-01-19 10:49:19 -0500419 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
Nicolas Capens66478362016-10-13 15:36:36 -0400420 {
421 const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
Alexis Hetu113e33a2017-01-19 10:49:19 -0500422 relocateSymbol(elfHeader, relocation, sectionHeader[i]);
Nicolas Capens66478362016-10-13 15:36:36 -0400423 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400424 }
425 }
426
427 return entry;
428 }
429
430 template<typename T>
431 struct ExecutableAllocator
432 {
433 ExecutableAllocator() {};
434 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {};
435
436 using value_type = T;
437 using size_type = std::size_t;
438
439 T *allocate(size_type n)
440 {
Nicolas Capensc07dc4b2018-08-06 14:20:45 -0400441 return (T*)allocateExecutable(sizeof(T) * n);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400442 }
443
444 void deallocate(T *p, size_type n)
445 {
Nicolas Capensc07dc4b2018-08-06 14:20:45 -0400446 deallocateExecutable(p, sizeof(T) * n);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400447 }
448 };
449
450 class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
451 {
452 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
453 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
454
455 public:
Nicolas Capens58274b52016-10-19 23:45:19 -0400456 ELFMemoryStreamer() : Routine(), entry(nullptr)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400457 {
458 position = 0;
459 buffer.reserve(0x1000);
460 }
461
Nicolas Capens81aa97b2017-06-27 17:08:08 -0400462 ~ELFMemoryStreamer() override
Nicolas Capens598f8d82016-09-26 15:09:10 -0400463 {
Nicolas Capensbd65da92017-01-05 16:31:06 -0500464 #if defined(_WIN32)
465 if(buffer.size() != 0)
466 {
467 DWORD exeProtection;
468 VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
469 }
470 #endif
Nicolas Capens598f8d82016-09-26 15:09:10 -0400471 }
472
473 void write8(uint8_t Value) override
474 {
475 if(position == (uint64_t)buffer.size())
476 {
477 buffer.push_back(Value);
478 position++;
479 }
480 else if(position < (uint64_t)buffer.size())
481 {
482 buffer[position] = Value;
483 position++;
484 }
Ben Claytoneb50d252019-04-15 13:50:01 -0400485 else ASSERT(false && "UNIMPLEMENTED");
Nicolas Capens598f8d82016-09-26 15:09:10 -0400486 }
487
488 void writeBytes(llvm::StringRef Bytes) override
489 {
490 std::size_t oldSize = buffer.size();
491 buffer.resize(oldSize + Bytes.size());
492 memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
493 position += Bytes.size();
494 }
495
496 uint64_t tell() const override { return position; }
497
498 void seek(uint64_t Off) override { position = Off; }
499
500 const void *getEntry() override
501 {
Nicolas Capens58274b52016-10-19 23:45:19 -0400502 if(!entry)
503 {
Nicolas Capensbd65da92017-01-05 16:31:06 -0500504 position = std::numeric_limits<std::size_t>::max(); // Can't stream more data after this
Nicolas Capens598f8d82016-09-26 15:09:10 -0400505
Nicolas Capens1cc44382017-04-25 10:52:16 -0400506 size_t codeSize = 0;
507 entry = loadImage(&buffer[0], codeSize);
508
509 #if defined(_WIN32)
Nicolas Capense745f5a2017-05-29 10:00:32 -0400510 VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READ, &oldProtection);
Nicolas Capens1cc44382017-04-25 10:52:16 -0400511 FlushInstructionCache(GetCurrentProcess(), NULL, 0);
512 #else
Nicolas Capense745f5a2017-05-29 10:00:32 -0400513 mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_EXEC);
Nicolas Capens1cc44382017-04-25 10:52:16 -0400514 __builtin___clear_cache((char*)entry, (char*)entry + codeSize);
515 #endif
Nicolas Capens58274b52016-10-19 23:45:19 -0400516 }
517
518 return entry;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400519 }
520
521 private:
Nicolas Capens58274b52016-10-19 23:45:19 -0400522 void *entry;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400523 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
524 std::size_t position;
Nicolas Capensbd65da92017-01-05 16:31:06 -0500525
526 #if defined(_WIN32)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400527 DWORD oldProtection;
Nicolas Capensbd65da92017-01-05 16:31:06 -0500528 #endif
Nicolas Capens598f8d82016-09-26 15:09:10 -0400529 };
530
531 Nucleus::Nucleus()
532 {
533 ::codegenMutex.lock(); // Reactor is currently not thread safe
534
Nicolas Capens66478362016-10-13 15:36:36 -0400535 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
536 Ice::ClFlags::getParsedClFlags(Flags);
537
Nicolas Capens30cd7d42017-04-25 15:17:25 -0400538 #if defined(__arm__)
539 Flags.setTargetArch(Ice::Target_ARM32);
540 Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
Gordana Cmiljanovic082dfec2018-10-19 11:36:15 +0200541 #elif defined(__mips__)
542 Flags.setTargetArch(Ice::Target_MIPS32);
543 Flags.setTargetInstructionSet(Ice::BaseInstructionSet);
Nicolas Capens30cd7d42017-04-25 15:17:25 -0400544 #else // x86
545 Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
546 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
547 #endif
Nicolas Capens66478362016-10-13 15:36:36 -0400548 Flags.setOutFileType(Ice::FT_Elf);
549 Flags.setOptLevel(Ice::Opt_2);
550 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
Nicolas Capens30cd7d42017-04-25 15:17:25 -0400551 Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None);
552 Flags.setDisableHybridAssembly(true);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400553
Nicolas Capens65047112016-11-07 13:01:07 -0500554 static llvm::raw_os_ostream cout(std::cout);
555 static llvm::raw_os_ostream cerr(std::cerr);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400556
557 if(false) // Write out to a file
558 {
559 std::error_code errorCode;
560 ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
561 ::elfFile = new Ice::ELFFileStreamer(*out);
Nicolas Capens65047112016-11-07 13:01:07 -0500562 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400563 }
564 else
565 {
566 ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
Nicolas Capens65047112016-11-07 13:01:07 -0500567 ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400568 ::routine = elfMemory;
569 }
570 }
571
572 Nucleus::~Nucleus()
573 {
Nicolas Capens619a8c52017-07-05 14:10:46 -0400574 delete ::routine;
575
Nicolas Capens598f8d82016-09-26 15:09:10 -0400576 delete ::allocator;
577 delete ::function;
578 delete ::context;
579
580 delete ::elfFile;
581 delete ::out;
582
583 ::codegenMutex.unlock();
584 }
585
Chris Forbes878d4b02019-01-21 10:48:35 -0800586 Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400587 {
588 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
589 {
590 createRetVoid();
591 }
592
Chris Forbes878d4b02019-01-21 10:48:35 -0800593 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, name));
Nicolas Capens598f8d82016-09-26 15:09:10 -0400594
Nicolas Capens2ae9d742016-11-24 14:43:05 -0500595 optimize();
596
Nicolas Capens598f8d82016-09-26 15:09:10 -0400597 ::function->translate();
Ben Claytoneb50d252019-04-15 13:50:01 -0400598 ASSERT(!::function->hasError());
Nicolas Capensde19f392016-10-19 10:29:49 -0400599
Nicolas Capens83a6bb92017-07-05 15:04:00 -0400600 auto globals = ::function->getGlobalInits();
Nicolas Capens66478362016-10-13 15:36:36 -0400601
602 if(globals && !globals->empty())
603 {
Nicolas Capens83a6bb92017-07-05 15:04:00 -0400604 ::context->getGlobals()->merge(globals.get());
Nicolas Capens66478362016-10-13 15:36:36 -0400605 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400606
607 ::context->emitFileHeader();
608 ::function->emitIAS();
609 auto assembler = ::function->releaseAssembler();
Nicolas Capens66478362016-10-13 15:36:36 -0400610 auto objectWriter = ::context->getObjectWriter();
611 assembler->alignFunction();
612 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
613 ::context->lowerGlobals("last");
Nicolas Capens73dd7a22016-10-20 13:20:34 -0400614 ::context->lowerConstants();
Nicolas Capensb98fe5c2016-11-09 12:24:06 -0500615 ::context->lowerJumpTables();
Nicolas Capens66478362016-10-13 15:36:36 -0400616 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
617 objectWriter->writeNonUserSections();
Nicolas Capens598f8d82016-09-26 15:09:10 -0400618
Nicolas Capens619a8c52017-07-05 14:10:46 -0400619 Routine *handoffRoutine = ::routine;
620 ::routine = nullptr;
621
622 return handoffRoutine;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400623 }
624
625 void Nucleus::optimize()
626 {
Nicolas Capens48461502018-08-06 14:20:45 -0400627 rr::optimize(::function);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400628 }
629
630 Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
631 {
632 Ice::Type type = T(t);
Nicolas Capensa8f98632016-10-20 11:25:55 -0400633 int typeSize = Ice::typeWidthInBytes(type);
634 int totalSize = typeSize * (arraySize ? arraySize : 1);
Nicolas Capense12780d2016-09-27 14:18:07 -0400635
Nicolas Capensa8f98632016-10-20 11:25:55 -0400636 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
Nicolas Capense12780d2016-09-27 14:18:07 -0400637 auto address = ::function->makeVariable(T(getPointerType(t)));
Nicolas Capensa8f98632016-10-20 11:25:55 -0400638 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
Nicolas Capense12780d2016-09-27 14:18:07 -0400639 ::function->getEntryNode()->getInsts().push_front(alloca);
640
641 return V(address);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400642 }
643
644 BasicBlock *Nucleus::createBasicBlock()
645 {
Nicolas Capens611642a2016-09-28 16:45:04 -0400646 return B(::function->makeNode());
Nicolas Capens598f8d82016-09-26 15:09:10 -0400647 }
648
649 BasicBlock *Nucleus::getInsertBlock()
650 {
Nicolas Capens611642a2016-09-28 16:45:04 -0400651 return B(::basicBlock);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400652 }
653
654 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
655 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400656 // ASSERT(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
Nicolas Capens0192d152019-03-27 14:46:07 -0400657
658 Variable::materializeAll();
659
Nicolas Capens611642a2016-09-28 16:45:04 -0400660 ::basicBlock = basicBlock;
Nicolas Capens598f8d82016-09-26 15:09:10 -0400661 }
662
Nicolas Capens598f8d82016-09-26 15:09:10 -0400663 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
664 {
665 uint32_t sequenceNumber = 0;
666 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
667 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
668
669 for(Type *type : Params)
670 {
671 Ice::Variable *arg = ::function->makeVariable(T(type));
672 ::function->addArg(arg);
673 }
674
675 Ice::CfgNode *node = ::function->makeNode();
676 ::function->setEntryNode(node);
677 ::basicBlock = node;
678 }
679
680 Value *Nucleus::getArgument(unsigned int index)
681 {
682 return V(::function->getArgs()[index]);
683 }
684
685 void Nucleus::createRetVoid()
686 {
Nicolas Capens0192d152019-03-27 14:46:07 -0400687 // Code generated after this point is unreachable, so any variables
688 // being read can safely return an undefined value. We have to avoid
689 // materializing variables after the terminator ret instruction.
690 Variable::killUnmaterialized();
691
Nicolas Capensfdcca2d2016-10-20 11:31:36 -0400692 Ice::InstRet *ret = Ice::InstRet::create(::function);
693 ::basicBlock->appendInst(ret);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400694 }
695
696 void Nucleus::createRet(Value *v)
697 {
Nicolas Capens0192d152019-03-27 14:46:07 -0400698 // Code generated after this point is unreachable, so any variables
699 // being read can safely return an undefined value. We have to avoid
700 // materializing variables after the terminator ret instruction.
701 Variable::killUnmaterialized();
702
Nicolas Capensfdcca2d2016-10-20 11:31:36 -0400703 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
704 ::basicBlock->appendInst(ret);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400705 }
706
707 void Nucleus::createBr(BasicBlock *dest)
708 {
Nicolas Capens0192d152019-03-27 14:46:07 -0400709 Variable::materializeAll();
710
Nicolas Capens611642a2016-09-28 16:45:04 -0400711 auto br = Ice::InstBr::create(::function, dest);
712 ::basicBlock->appendInst(br);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400713 }
714
715 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
716 {
Nicolas Capens0192d152019-03-27 14:46:07 -0400717 Variable::materializeAll();
718
Nicolas Capens611642a2016-09-28 16:45:04 -0400719 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
720 ::basicBlock->appendInst(br);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400721 }
722
Nicolas Capensf8360ba2017-01-25 11:35:00 -0800723 static bool isCommutative(Ice::InstArithmetic::OpKind op)
724 {
725 switch(op)
726 {
727 case Ice::InstArithmetic::Add:
728 case Ice::InstArithmetic::Fadd:
729 case Ice::InstArithmetic::Mul:
730 case Ice::InstArithmetic::Fmul:
731 case Ice::InstArithmetic::And:
732 case Ice::InstArithmetic::Or:
733 case Ice::InstArithmetic::Xor:
734 return true;
735 default:
736 return false;
737 }
738 }
739
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400740 static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
741 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400742 ASSERT(lhs->getType() == rhs->getType() || llvm::isa<Ice::Constant>(rhs));
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400743
Nicolas Capensf8360ba2017-01-25 11:35:00 -0800744 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
745
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400746 Ice::Variable *result = ::function->makeVariable(lhs->getType());
Nicolas Capensf8360ba2017-01-25 11:35:00 -0800747 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400748 ::basicBlock->appendInst(arithmetic);
749
750 return V(result);
751 }
752
Nicolas Capens598f8d82016-09-26 15:09:10 -0400753 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
754 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400755 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400756 }
757
758 Value *Nucleus::createSub(Value *lhs, Value *rhs)
759 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400760 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400761 }
762
763 Value *Nucleus::createMul(Value *lhs, Value *rhs)
764 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400765 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400766 }
767
768 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
769 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400770 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400771 }
772
773 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
774 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400775 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400776 }
777
778 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
779 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400780 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400781 }
782
783 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
784 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400785 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400786 }
787
788 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
789 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400790 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400791 }
792
793 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
794 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400795 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400796 }
797
798 Value *Nucleus::createURem(Value *lhs, Value *rhs)
799 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400800 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400801 }
802
803 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
804 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400805 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400806 }
807
808 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
809 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400810 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400811 }
812
813 Value *Nucleus::createShl(Value *lhs, Value *rhs)
814 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400815 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400816 }
817
818 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
819 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400820 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400821 }
822
823 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
824 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400825 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400826 }
827
828 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
829 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400830 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400831 }
832
833 Value *Nucleus::createOr(Value *lhs, Value *rhs)
834 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400835 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400836 }
837
838 Value *Nucleus::createXor(Value *lhs, Value *rhs)
839 {
Nicolas Capens7d9f76d2016-09-29 13:39:44 -0400840 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400841 }
842
843 Value *Nucleus::createNeg(Value *v)
844 {
Nicolas Capensc5c0c332016-11-08 11:37:01 -0500845 return createSub(createNullValue(T(v->getType())), v);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400846 }
847
848 Value *Nucleus::createFNeg(Value *v)
849 {
Nicolas Capensc5c0c332016-11-08 11:37:01 -0500850 double c[4] = {-0.0, -0.0, -0.0, -0.0};
851 Value *negativeZero = Ice::isVectorType(v->getType()) ?
852 createConstantVector(c, T(v->getType())) :
Nicolas Capens15060bb2016-12-05 22:17:19 -0500853 V(::context->getConstantFloat(-0.0f));
Nicolas Capensc5c0c332016-11-08 11:37:01 -0500854
855 return createFSub(negativeZero, v);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400856 }
857
858 Value *Nucleus::createNot(Value *v)
859 {
Nicolas Capensc5c0c332016-11-08 11:37:01 -0500860 if(Ice::isScalarIntegerType(v->getType()))
861 {
Nicolas Capens15060bb2016-12-05 22:17:19 -0500862 return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
Nicolas Capensc5c0c332016-11-08 11:37:01 -0500863 }
864 else // Vector
865 {
Nicolas Capensf34d1ac2017-05-08 17:06:11 -0400866 int64_t c[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
Nicolas Capensc5c0c332016-11-08 11:37:01 -0500867 return createXor(v, createConstantVector(c, T(v->getType())));
868 }
Nicolas Capens598f8d82016-09-26 15:09:10 -0400869 }
870
Nicolas Capens86509d92019-03-21 13:23:50 -0400871 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400872 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400873 ASSERT(!atomic); // Unimplemented
874 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
Nicolas Capens86509d92019-03-21 13:23:50 -0400875
Nicolas Capens23d99a42016-09-30 14:57:16 -0400876 int valueType = (int)reinterpret_cast<intptr_t>(type);
877 Ice::Variable *result = ::function->makeVariable(T(type));
878
Nicolas Capensf4c4eca2017-10-03 14:26:07 -0400879 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens23d99a42016-09-30 14:57:16 -0400880 {
Nicolas Capens070d9f42017-04-26 13:36:33 -0400881 if(emulateIntrinsics)
882 {
883 if(typeSize(type) == 4)
884 {
885 auto pointer = RValue<Pointer<Byte>>(ptr);
Nicolas Capens1894cfa2017-07-27 14:21:46 -0400886 Int x = *Pointer<Int>(pointer);
Nicolas Capens070d9f42017-04-26 13:36:33 -0400887
888 Int4 vector;
889 vector = Insert(vector, x, 0);
890
891 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
892 ::basicBlock->appendInst(bitcast);
893 }
894 else if(typeSize(type) == 8)
895 {
896 auto pointer = RValue<Pointer<Byte>>(ptr);
Nicolas Capens1894cfa2017-07-27 14:21:46 -0400897 Int x = *Pointer<Int>(pointer);
Nicolas Capens070d9f42017-04-26 13:36:33 -0400898 Int y = *Pointer<Int>(pointer + 4);
899
900 Int4 vector;
901 vector = Insert(vector, x, 0);
902 vector = Insert(vector, y, 1);
903
904 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
905 ::basicBlock->appendInst(bitcast);
906 }
Ben Claytoneb50d252019-04-15 13:50:01 -0400907 else UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens070d9f42017-04-26 13:36:33 -0400908 }
909 else
910 {
911 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
912 auto target = ::context->getConstantUndef(Ice::IceType_i32);
913 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
914 load->addArg(ptr);
915 load->addArg(::context->getConstantInt32(typeSize(type)));
916 ::basicBlock->appendInst(load);
917 }
Nicolas Capens23d99a42016-09-30 14:57:16 -0400918 }
919 else
920 {
921 auto load = Ice::InstLoad::create(::function, result, ptr, align);
922 ::basicBlock->appendInst(load);
923 }
924
925 return V(result);
Nicolas Capens598f8d82016-09-26 15:09:10 -0400926 }
927
Nicolas Capens86509d92019-03-21 13:23:50 -0400928 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
Nicolas Capens598f8d82016-09-26 15:09:10 -0400929 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400930 ASSERT(!atomic); // Unimplemented
931 ASSERT(memoryOrder == std::memory_order_relaxed); // Unimplemented
Nicolas Capens86509d92019-03-21 13:23:50 -0400932
Nicolas Capens6a990f82018-07-06 15:54:07 -0400933 #if __has_feature(memory_sanitizer)
934 // Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
935 if(align != 0)
936 {
937 auto call = Ice::InstCall::create(::function, 2, nullptr, ::context->getConstantInt64(reinterpret_cast<intptr_t>(__msan_unpoison)), false);
938 call->addArg(ptr);
939 call->addArg(::context->getConstantInt64(typeSize(type)));
940 ::basicBlock->appendInst(call);
941 }
942 #endif
943
Nicolas Capens23d99a42016-09-30 14:57:16 -0400944 int valueType = (int)reinterpret_cast<intptr_t>(type);
945
Nicolas Capensf4c4eca2017-10-03 14:26:07 -0400946 if((valueType & EmulatedBits) && (align != 0)) // Narrow vector not stored on stack.
Nicolas Capens23d99a42016-09-30 14:57:16 -0400947 {
Nicolas Capens070d9f42017-04-26 13:36:33 -0400948 if(emulateIntrinsics)
949 {
950 if(typeSize(type) == 4)
951 {
952 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
953 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
954 ::basicBlock->appendInst(bitcast);
955
956 RValue<Int4> v(V(vector));
957
958 auto pointer = RValue<Pointer<Byte>>(ptr);
959 Int x = Extract(v, 0);
960 *Pointer<Int>(pointer) = x;
961 }
962 else if(typeSize(type) == 8)
963 {
964 Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
965 auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
966 ::basicBlock->appendInst(bitcast);
967
968 RValue<Int4> v(V(vector));
969
970 auto pointer = RValue<Pointer<Byte>>(ptr);
971 Int x = Extract(v, 0);
972 *Pointer<Int>(pointer) = x;
973 Int y = Extract(v, 1);
974 *Pointer<Int>(pointer + 4) = y;
975 }
Ben Claytoneb50d252019-04-15 13:50:01 -0400976 else UNREACHABLE("typeSize(type): %d", int(typeSize(type)));
Nicolas Capens070d9f42017-04-26 13:36:33 -0400977 }
978 else
979 {
980 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
981 auto target = ::context->getConstantUndef(Ice::IceType_i32);
982 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
983 store->addArg(value);
984 store->addArg(ptr);
985 store->addArg(::context->getConstantInt32(typeSize(type)));
986 ::basicBlock->appendInst(store);
987 }
Nicolas Capens23d99a42016-09-30 14:57:16 -0400988 }
989 else
990 {
Ben Claytoneb50d252019-04-15 13:50:01 -0400991 ASSERT(value->getType() == T(type));
Nicolas Capens23d99a42016-09-30 14:57:16 -0400992
993 auto store = Ice::InstStore::create(::function, value, ptr, align);
994 ::basicBlock->appendInst(store);
995 }
996
Nicolas Capens598f8d82016-09-26 15:09:10 -0400997 return value;
998 }
999
Nicolas Capensd294def2017-01-26 17:44:37 -08001000 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001001 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001002 ASSERT(index->getType() == Ice::IceType_i32);
Nicolas Capens8820f642016-09-30 04:42:43 -04001003
Nicolas Capens15060bb2016-12-05 22:17:19 -05001004 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
1005 {
Nicolas Capens584088c2017-01-26 16:05:18 -08001006 int32_t offset = constant->getValue() * (int)typeSize(type);
Nicolas Capens15060bb2016-12-05 22:17:19 -05001007
1008 if(offset == 0)
1009 {
1010 return ptr;
1011 }
1012
1013 return createAdd(ptr, createConstantInt(offset));
1014 }
1015
Nicolas Capens8820f642016-09-30 04:42:43 -04001016 if(!Ice::isByteSizedType(T(type)))
1017 {
Nicolas Capens584088c2017-01-26 16:05:18 -08001018 index = createMul(index, createConstantInt((int)typeSize(type)));
Nicolas Capens8820f642016-09-30 04:42:43 -04001019 }
1020
1021 if(sizeof(void*) == 8)
1022 {
Nicolas Capensd294def2017-01-26 17:44:37 -08001023 if(unsignedIndex)
1024 {
1025 index = createZExt(index, T(Ice::IceType_i64));
1026 }
1027 else
1028 {
1029 index = createSExt(index, T(Ice::IceType_i64));
1030 }
Nicolas Capens8820f642016-09-30 04:42:43 -04001031 }
1032
1033 return createAdd(ptr, index);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001034 }
1035
1036 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
1037 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001038 UNIMPLEMENTED("createAtomicAdd");
1039 return nullptr;
Nicolas Capens598f8d82016-09-26 15:09:10 -04001040 }
1041
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001042 static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
1043 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04001044 if(v->getType() == T(destType))
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001045 {
1046 return v;
1047 }
1048
1049 Ice::Variable *result = ::function->makeVariable(T(destType));
1050 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1051 ::basicBlock->appendInst(cast);
1052
1053 return V(result);
1054 }
1055
Nicolas Capens598f8d82016-09-26 15:09:10 -04001056 Value *Nucleus::createTrunc(Value *v, Type *destType)
1057 {
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001058 return createCast(Ice::InstCast::Trunc, v, destType);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001059 }
1060
1061 Value *Nucleus::createZExt(Value *v, Type *destType)
1062 {
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001063 return createCast(Ice::InstCast::Zext, v, destType);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001064 }
1065
1066 Value *Nucleus::createSExt(Value *v, Type *destType)
1067 {
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001068 return createCast(Ice::InstCast::Sext, v, destType);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001069 }
1070
1071 Value *Nucleus::createFPToSI(Value *v, Type *destType)
1072 {
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001073 return createCast(Ice::InstCast::Fptosi, v, destType);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001074 }
1075
Nicolas Capens598f8d82016-09-26 15:09:10 -04001076 Value *Nucleus::createSIToFP(Value *v, Type *destType)
1077 {
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001078 return createCast(Ice::InstCast::Sitofp, v, destType);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001079 }
1080
1081 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1082 {
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001083 return createCast(Ice::InstCast::Fptrunc, v, destType);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001084 }
1085
1086 Value *Nucleus::createFPExt(Value *v, Type *destType)
1087 {
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001088 return createCast(Ice::InstCast::Fpext, v, destType);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001089 }
1090
1091 Value *Nucleus::createBitCast(Value *v, Type *destType)
1092 {
Nicolas Capens2d8c3702017-07-25 13:56:46 -04001093 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1094 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1095 // emulate them by writing to the stack and reading back as the destination type.
1096 if(emulateMismatchedBitCast)
1097 {
1098 if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1099 {
1100 Value *address = allocateStackVariable(destType);
1101 createStore(v, address, T(v->getType()));
1102 return createLoad(address, destType);
1103 }
1104 else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1105 {
1106 Value *address = allocateStackVariable(T(v->getType()));
1107 createStore(v, address, T(v->getType()));
1108 return createLoad(address, destType);
1109 }
1110 }
1111
Nicolas Capensa0c2fc52016-09-30 05:04:21 -04001112 return createCast(Ice::InstCast::Bitcast, v, destType);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001113 }
1114
Nicolas Capens43dc6292016-10-20 00:01:38 -04001115 static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001116 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001117 ASSERT(lhs->getType() == rhs->getType());
Nicolas Capens611642a2016-09-28 16:45:04 -04001118
Nicolas Capens43dc6292016-10-20 00:01:38 -04001119 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1120 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
Nicolas Capens611642a2016-09-28 16:45:04 -04001121 ::basicBlock->appendInst(cmp);
1122
1123 return V(result);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001124 }
1125
Nicolas Capens43dc6292016-10-20 00:01:38 -04001126 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1127 {
1128 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1129 }
1130
1131 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1132 {
1133 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1134 }
1135
1136 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1137 {
1138 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1139 }
1140
1141 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1142 {
1143 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1144 }
1145
1146 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1147 {
1148 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1149 }
1150
1151 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1152 {
1153 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1154 }
1155
1156 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1157 {
1158 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1159 }
1160
1161 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1162 {
1163 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1164 }
1165
1166 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1167 {
1168 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1169 }
1170
Nicolas Capens598f8d82016-09-26 15:09:10 -04001171 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1172 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001173 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1174 }
1175
1176 static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1177 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001178 ASSERT(lhs->getType() == rhs->getType());
1179 ASSERT(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
Nicolas Capens43dc6292016-10-20 00:01:38 -04001180
1181 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1182 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1183 ::basicBlock->appendInst(cmp);
1184
1185 return V(result);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001186 }
1187
1188 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1189 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001190 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001191 }
1192
1193 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1194 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001195 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001196 }
1197
1198 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1199 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001200 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001201 }
1202
1203 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1204 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001205 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001206 }
1207
1208 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1209 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001210 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001211 }
1212
1213 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1214 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001215 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001216 }
1217
1218 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1219 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001220 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001221 }
1222
1223 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1224 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001225 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001226 }
1227
1228 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1229 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001230 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001231 }
1232
1233 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1234 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001235 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001236 }
1237
1238 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1239 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001240 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001241 }
1242
1243 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1244 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001245 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001246 }
1247
1248 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1249 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001250 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001251 }
1252
1253 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1254 {
Nicolas Capens43dc6292016-10-20 00:01:38 -04001255 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001256 }
1257
Nicolas Capense95d5342016-09-30 11:37:28 -04001258 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001259 {
Nicolas Capens9709d4f2016-09-30 11:44:14 -04001260 auto result = ::function->makeVariable(T(type));
1261 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1262 ::basicBlock->appendInst(extract);
1263
1264 return V(result);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001265 }
1266
1267 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1268 {
Nicolas Capens9709d4f2016-09-30 11:44:14 -04001269 auto result = ::function->makeVariable(vector->getType());
1270 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1271 ::basicBlock->appendInst(insert);
1272
1273 return V(result);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001274 }
1275
Nicolas Capense89cd582016-09-30 14:23:47 -04001276 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001277 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001278 ASSERT(V1->getType() == V2->getType());
Nicolas Capens619c0ab2016-09-30 14:46:24 -04001279
1280 int size = Ice::typeNumElements(V1->getType());
1281 auto result = ::function->makeVariable(V1->getType());
1282 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1283
1284 for(int i = 0; i < size; i++)
1285 {
1286 shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1287 }
1288
1289 ::basicBlock->appendInst(shuffle);
1290
1291 return V(result);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001292 }
1293
1294 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1295 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001296 ASSERT(ifTrue->getType() == ifFalse->getType());
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04001297
1298 auto result = ::function->makeVariable(ifTrue->getType());
1299 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1300 ::basicBlock->appendInst(select);
1301
1302 return V(result);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001303 }
1304
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001305 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001306 {
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001307 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1308 ::basicBlock->appendInst(switchInst);
1309
1310 return reinterpret_cast<SwitchCases*>(switchInst);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001311 }
1312
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001313 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001314 {
Nicolas Capensb98fe5c2016-11-09 12:24:06 -05001315 switchCases->addBranch(label, label, branch);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001316 }
1317
1318 void Nucleus::createUnreachable()
1319 {
Nicolas Capensfdcca2d2016-10-20 11:31:36 -04001320 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1321 ::basicBlock->appendInst(unreachable);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001322 }
1323
Nicolas Capens598f8d82016-09-26 15:09:10 -04001324 Type *Nucleus::getPointerType(Type *ElementType)
1325 {
Nicolas Capense12780d2016-09-27 14:18:07 -04001326 if(sizeof(void*) == 8)
1327 {
1328 return T(Ice::IceType_i64);
1329 }
1330 else
1331 {
1332 return T(Ice::IceType_i32);
1333 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001334 }
1335
Nicolas Capens13ac2322016-10-13 14:52:12 -04001336 Value *Nucleus::createNullValue(Type *Ty)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001337 {
Nicolas Capens73dd7a22016-10-20 13:20:34 -04001338 if(Ice::isVectorType(T(Ty)))
1339 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001340 ASSERT(Ice::typeNumElements(T(Ty)) <= 16);
Nicolas Capens30385f02017-04-18 13:03:47 -04001341 int64_t c[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
Nicolas Capens73dd7a22016-10-20 13:20:34 -04001342 return createConstantVector(c, Ty);
1343 }
1344 else
1345 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001346 return V(::context->getConstantZero(T(Ty)));
Nicolas Capens73dd7a22016-10-20 13:20:34 -04001347 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001348 }
1349
Nicolas Capens13ac2322016-10-13 14:52:12 -04001350 Value *Nucleus::createConstantLong(int64_t i)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001351 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001352 return V(::context->getConstantInt64(i));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001353 }
1354
Nicolas Capens13ac2322016-10-13 14:52:12 -04001355 Value *Nucleus::createConstantInt(int i)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001356 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001357 return V(::context->getConstantInt32(i));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001358 }
1359
Nicolas Capens13ac2322016-10-13 14:52:12 -04001360 Value *Nucleus::createConstantInt(unsigned int i)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001361 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001362 return V(::context->getConstantInt32(i));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001363 }
1364
Nicolas Capens13ac2322016-10-13 14:52:12 -04001365 Value *Nucleus::createConstantBool(bool b)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001366 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001367 return V(::context->getConstantInt1(b));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001368 }
1369
Nicolas Capens13ac2322016-10-13 14:52:12 -04001370 Value *Nucleus::createConstantByte(signed char i)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001371 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001372 return V(::context->getConstantInt8(i));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001373 }
1374
Nicolas Capens13ac2322016-10-13 14:52:12 -04001375 Value *Nucleus::createConstantByte(unsigned char i)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001376 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001377 return V(::context->getConstantInt8(i));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001378 }
1379
Nicolas Capens13ac2322016-10-13 14:52:12 -04001380 Value *Nucleus::createConstantShort(short i)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001381 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001382 return V(::context->getConstantInt16(i));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001383 }
1384
Nicolas Capens13ac2322016-10-13 14:52:12 -04001385 Value *Nucleus::createConstantShort(unsigned short i)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001386 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001387 return V(::context->getConstantInt16(i));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001388 }
1389
Nicolas Capens13ac2322016-10-13 14:52:12 -04001390 Value *Nucleus::createConstantFloat(float x)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001391 {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001392 return V(::context->getConstantFloat(x));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001393 }
1394
Nicolas Capens13ac2322016-10-13 14:52:12 -04001395 Value *Nucleus::createNullPointer(Type *Ty)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001396 {
Nicolas Capensa29d6532016-12-05 21:38:09 -05001397 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001398 }
1399
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001400 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
Nicolas Capens13ac2322016-10-13 14:52:12 -04001401 {
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001402 const int vectorSize = 16;
Ben Claytoneb50d252019-04-15 13:50:01 -04001403 ASSERT(Ice::typeWidthInBytes(T(type)) == vectorSize);
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001404 const int alignment = vectorSize;
1405 auto globalPool = ::function->getGlobalPool();
1406
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001407 const int64_t *i = constants;
1408 const double *f = reinterpret_cast<const double*>(constants);
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001409 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001410
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001411 switch((int)reinterpret_cast<intptr_t>(type))
1412 {
1413 case Ice::IceType_v4i32:
Nicolas Capensa4c30b02016-11-08 15:43:17 -05001414 case Ice::IceType_v4i1:
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001415 {
1416 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1417 static_assert(sizeof(initializer) == vectorSize, "!");
1418 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1419 }
1420 break;
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001421 case Ice::IceType_v4f32:
1422 {
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001423 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001424 static_assert(sizeof(initializer) == vectorSize, "!");
1425 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1426 }
1427 break;
1428 case Ice::IceType_v8i16:
Nicolas Capensa4c30b02016-11-08 15:43:17 -05001429 case Ice::IceType_v8i1:
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001430 {
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001431 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001432 static_assert(sizeof(initializer) == vectorSize, "!");
1433 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1434 }
1435 break;
1436 case Ice::IceType_v16i8:
Nicolas Capensa4c30b02016-11-08 15:43:17 -05001437 case Ice::IceType_v16i1:
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001438 {
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001439 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001440 static_assert(sizeof(initializer) == vectorSize, "!");
1441 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1442 }
1443 break;
1444 case Type_v2i32:
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001445 {
1446 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1447 static_assert(sizeof(initializer) == vectorSize, "!");
1448 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1449 }
1450 break;
Nicolas Capens4cfd4572016-10-20 01:00:19 -04001451 case Type_v2f32:
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001452 {
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001453 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001454 static_assert(sizeof(initializer) == vectorSize, "!");
1455 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1456 }
1457 break;
1458 case Type_v4i16:
1459 {
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001460 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001461 static_assert(sizeof(initializer) == vectorSize, "!");
1462 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1463 }
1464 break;
1465 case Type_v8i8:
1466 {
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001467 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001468 static_assert(sizeof(initializer) == vectorSize, "!");
1469 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1470 }
1471 break;
1472 case Type_v4i8:
1473 {
Nicolas Capens7f3f69c2016-10-20 01:29:33 -04001474 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001475 static_assert(sizeof(initializer) == vectorSize, "!");
1476 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1477 }
1478 break;
1479 default:
Ben Claytoneb50d252019-04-15 13:50:01 -04001480 UNREACHABLE("Unknown constant vector type: %d", (int)reinterpret_cast<intptr_t>(type));
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001481 }
1482
1483 auto name = Ice::GlobalString::createWithoutString(::context);
1484 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1485 variableDeclaration->setName(name);
1486 variableDeclaration->setAlignment(alignment);
1487 variableDeclaration->setIsConstant(true);
1488 variableDeclaration->addInitializer(dataInitializer);
Nicolas Capens87852e12016-11-24 14:45:06 -05001489
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001490 ::function->addGlobal(variableDeclaration);
1491
1492 constexpr int32_t offset = 0;
1493 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1494
1495 Ice::Variable *result = ::function->makeVariable(T(type));
1496 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1497 ::basicBlock->appendInst(load);
1498
1499 return V(result);
Nicolas Capens13ac2322016-10-13 14:52:12 -04001500 }
1501
1502 Value *Nucleus::createConstantVector(const double *constants, Type *type)
Nicolas Capens598f8d82016-09-26 15:09:10 -04001503 {
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04001504 return createConstantVector((const int64_t*)constants, type);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001505 }
1506
1507 Type *Void::getType()
1508 {
1509 return T(Ice::IceType_void);
1510 }
1511
Nicolas Capens598f8d82016-09-26 15:09:10 -04001512 Type *Bool::getType()
1513 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04001514 return T(Ice::IceType_i1);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001515 }
1516
Nicolas Capens598f8d82016-09-26 15:09:10 -04001517 Type *Byte::getType()
1518 {
Nicolas Capens6d738712016-09-30 04:15:22 -04001519 return T(Ice::IceType_i8);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001520 }
1521
Nicolas Capens598f8d82016-09-26 15:09:10 -04001522 Type *SByte::getType()
1523 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04001524 return T(Ice::IceType_i8);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001525 }
1526
Nicolas Capens598f8d82016-09-26 15:09:10 -04001527 Type *Short::getType()
1528 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04001529 return T(Ice::IceType_i16);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001530 }
1531
Nicolas Capens598f8d82016-09-26 15:09:10 -04001532 Type *UShort::getType()
1533 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04001534 return T(Ice::IceType_i16);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001535 }
1536
1537 Type *Byte4::getType()
1538 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04001539 return T(Type_v4i8);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001540 }
1541
1542 Type *SByte4::getType()
1543 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04001544 return T(Type_v4i8);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001545 }
1546
Nicolas Capensb6d4ce32019-03-12 23:00:24 -04001547 namespace
Nicolas Capens598f8d82016-09-26 15:09:10 -04001548 {
Nicolas Capensb6d4ce32019-03-12 23:00:24 -04001549 RValue<Byte> SaturateUnsigned(RValue<Short> x)
1550 {
1551 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), IfThenElse(Int(x) < 0, Int(0), Int(x))));
1552 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001553
Nicolas Capensb6d4ce32019-03-12 23:00:24 -04001554 RValue<Byte> Extract(RValue<Byte8> val, int i)
1555 {
1556 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
1557 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001558
Nicolas Capensb6d4ce32019-03-12 23:00:24 -04001559 RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
1560 {
1561 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
1562 }
Nicolas Capens98436732017-07-25 15:32:12 -04001563 }
1564
Nicolas Capens598f8d82016-09-26 15:09:10 -04001565 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
1566 {
Nicolas Capens98436732017-07-25 15:32:12 -04001567 if(emulateIntrinsics)
1568 {
1569 Byte8 result;
Nicolas Capens33438a62017-09-27 11:47:35 -04001570 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1571 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1572 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1573 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1574 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1575 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1576 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1577 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001578
Nicolas Capens98436732017-07-25 15:32:12 -04001579 return result;
1580 }
1581 else
1582 {
1583 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1584 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1585 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1586 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1587 paddusb->addArg(x.value);
1588 paddusb->addArg(y.value);
1589 ::basicBlock->appendInst(paddusb);
1590
1591 return RValue<Byte8>(V(result));
1592 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001593 }
1594
1595 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
1596 {
Nicolas Capens98436732017-07-25 15:32:12 -04001597 if(emulateIntrinsics)
1598 {
1599 Byte8 result;
Nicolas Capens33438a62017-09-27 11:47:35 -04001600 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1601 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1602 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1603 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1604 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1605 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1606 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1607 result = Insert(result, SaturateUnsigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001608
Nicolas Capens98436732017-07-25 15:32:12 -04001609 return result;
1610 }
1611 else
1612 {
1613 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1614 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1615 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1616 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1617 psubusw->addArg(x.value);
1618 psubusw->addArg(y.value);
1619 ::basicBlock->appendInst(psubusw);
1620
1621 return RValue<Byte8>(V(result));
1622 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001623 }
1624
Nicolas Capensd6cacad2017-07-25 15:32:12 -04001625 RValue<SByte> Extract(RValue<SByte8> val, int i)
1626 {
1627 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
1628 }
1629
1630 RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
1631 {
1632 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
1633 }
1634
1635 RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1636 {
1637 if(emulateIntrinsics)
1638 {
1639 SByte8 result;
1640 result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
1641 result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
1642 result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
1643 result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
1644 result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
1645 result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
1646 result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
1647 result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
1648
1649 return result;
1650 }
1651 else
1652 {
1653 #if defined(__i386__) || defined(__x86_64__)
1654 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
Alexis Hetue18c5302017-08-04 11:48:17 -04001655 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00u);
Nicolas Capensd6cacad2017-07-25 15:32:12 -04001656 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
1657
1658 return As<SByte8>(hi | lo);
1659 #else
1660 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
1661 #endif
1662 }
1663 }
1664
Nicolas Capens598f8d82016-09-26 15:09:10 -04001665 RValue<Int> SignMask(RValue<Byte8> x)
1666 {
Nicolas Capens091f3502017-10-03 14:56:49 -04001667 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capensd6cacad2017-07-25 15:32:12 -04001668 {
1669 Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
1670 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
1671 }
1672 else
1673 {
1674 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
1675 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1676 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1677 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
1678 movmsk->addArg(x.value);
1679 ::basicBlock->appendInst(movmsk);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001680
Nicolas Capens0f70a7f2017-07-26 13:50:04 -04001681 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capensd6cacad2017-07-25 15:32:12 -04001682 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001683 }
1684
1685// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
1686// {
Nicolas Capens2f970b62016-11-08 14:28:59 -05001687// return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001688// }
1689
1690 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
1691 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05001692 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001693 }
1694
1695 Type *Byte8::getType()
1696 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04001697 return T(Type_v8i8);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001698 }
1699
Nicolas Capens598f8d82016-09-26 15:09:10 -04001700// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
1701// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001702// return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001703// }
1704
1705// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
1706// {
Nicolas Capens15060bb2016-12-05 22:17:19 -05001707// return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001708// }
1709
Nicolas Capens33438a62017-09-27 11:47:35 -04001710 RValue<SByte> SaturateSigned(RValue<Short> x)
Nicolas Capens98436732017-07-25 15:32:12 -04001711 {
1712 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
1713 }
1714
Nicolas Capens598f8d82016-09-26 15:09:10 -04001715 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
1716 {
Nicolas Capens98436732017-07-25 15:32:12 -04001717 if(emulateIntrinsics)
1718 {
1719 SByte8 result;
Nicolas Capens33438a62017-09-27 11:47:35 -04001720 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) + Int(Extract(y, 0)))), 0);
1721 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) + Int(Extract(y, 1)))), 1);
1722 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) + Int(Extract(y, 2)))), 2);
1723 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) + Int(Extract(y, 3)))), 3);
1724 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) + Int(Extract(y, 4)))), 4);
1725 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) + Int(Extract(y, 5)))), 5);
1726 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) + Int(Extract(y, 6)))), 6);
1727 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) + Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001728
Nicolas Capens98436732017-07-25 15:32:12 -04001729 return result;
1730 }
1731 else
1732 {
1733 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1734 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1735 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1736 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1737 paddsb->addArg(x.value);
1738 paddsb->addArg(y.value);
1739 ::basicBlock->appendInst(paddsb);
1740
1741 return RValue<SByte8>(V(result));
1742 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001743 }
1744
1745 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
1746 {
Nicolas Capens98436732017-07-25 15:32:12 -04001747 if(emulateIntrinsics)
1748 {
1749 SByte8 result;
Nicolas Capens33438a62017-09-27 11:47:35 -04001750 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 0)) - Int(Extract(y, 0)))), 0);
1751 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 1)) - Int(Extract(y, 1)))), 1);
1752 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 2)) - Int(Extract(y, 2)))), 2);
1753 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 3)) - Int(Extract(y, 3)))), 3);
1754 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 4)) - Int(Extract(y, 4)))), 4);
1755 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 5)) - Int(Extract(y, 5)))), 5);
1756 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 6)) - Int(Extract(y, 6)))), 6);
1757 result = Insert(result, SaturateSigned(Short(Int(Extract(x, 7)) - Int(Extract(y, 7)))), 7);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001758
Nicolas Capens98436732017-07-25 15:32:12 -04001759 return result;
1760 }
1761 else
1762 {
1763 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
1764 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1765 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1766 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1767 psubsb->addArg(x.value);
1768 psubsb->addArg(y.value);
1769 ::basicBlock->appendInst(psubsb);
1770
1771 return RValue<SByte8>(V(result));
1772 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001773 }
1774
Nicolas Capens598f8d82016-09-26 15:09:10 -04001775 RValue<Int> SignMask(RValue<SByte8> x)
1776 {
Nicolas Capens091f3502017-10-03 14:56:49 -04001777 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capensd6cacad2017-07-25 15:32:12 -04001778 {
1779 SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
1780 return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
1781 }
1782 else
1783 {
1784 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
1785 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1786 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1787 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
1788 movmsk->addArg(x.value);
1789 ::basicBlock->appendInst(movmsk);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04001790
Nicolas Capens0f70a7f2017-07-26 13:50:04 -04001791 return RValue<Int>(V(result)) & 0xFF;
Nicolas Capensd6cacad2017-07-25 15:32:12 -04001792 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001793 }
1794
1795 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
1796 {
Nicolas Capens2f970b62016-11-08 14:28:59 -05001797 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001798 }
1799
1800 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
1801 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05001802 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001803 }
1804
1805 Type *SByte8::getType()
1806 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04001807 return T(Type_v8i8);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001808 }
1809
Nicolas Capens598f8d82016-09-26 15:09:10 -04001810 Type *Byte16::getType()
1811 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04001812 return T(Ice::IceType_v16i8);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001813 }
1814
1815 Type *SByte16::getType()
1816 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04001817 return T(Ice::IceType_v16i8);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001818 }
1819
Nicolas Capens16b5f152016-10-13 13:39:01 -04001820 Type *Short2::getType()
1821 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04001822 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04001823 }
1824
Nicolas Capens16b5f152016-10-13 13:39:01 -04001825 Type *UShort2::getType()
1826 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04001827 return T(Type_v2i16);
Nicolas Capens16b5f152016-10-13 13:39:01 -04001828 }
1829
Nicolas Capens598f8d82016-09-26 15:09:10 -04001830 Short4::Short4(RValue<Int4> cast)
1831 {
Nicolas Capensf8beb4b2017-01-27 02:55:44 -08001832 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
1833 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
1834 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
Nicolas Capensd4227962016-11-09 14:24:25 -05001835
Nicolas Capensbea4dce2017-07-24 16:54:44 -04001836 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
Nicolas Capensd4227962016-11-09 14:24:25 -05001837 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
1838
1839 storeValue(short4);
Nicolas Capens598f8d82016-09-26 15:09:10 -04001840 }
1841
1842// Short4::Short4(RValue<Float> cast)
1843// {
1844// }
1845
1846 Short4::Short4(RValue<Float4> cast)
1847 {
Ben Claytoneb50d252019-04-15 13:50:01 -04001848 UNIMPLEMENTED("Short4::Short4(RValue<Float4> cast)");
Nicolas Capens598f8d82016-09-26 15:09:10 -04001849 }
1850
Nicolas Capens598f8d82016-09-26 15:09:10 -04001851 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
1852 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04001853 if(emulateIntrinsics)
1854 {
1855 Short4 result;
1856 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
1857 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
1858 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
1859 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
1860
1861 return result;
1862 }
1863 else
1864 {
1865 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
1866 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001867 }
1868
1869 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
1870 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04001871 if(emulateIntrinsics)
1872 {
1873 Short4 result;
1874 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
1875 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
1876 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
1877 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
1878
1879 return result;
1880 }
1881 else
1882 {
1883 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
1884 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001885 }
1886
Nicolas Capens598f8d82016-09-26 15:09:10 -04001887 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
1888 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04001889 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
1890 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
1891 ::basicBlock->appendInst(cmp);
1892
1893 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
1894 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
1895 ::basicBlock->appendInst(select);
1896
1897 return RValue<Short4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001898 }
1899
1900 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
1901 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04001902 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
1903 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
1904 ::basicBlock->appendInst(cmp);
1905
1906 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
1907 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
1908 ::basicBlock->appendInst(select);
1909
1910 return RValue<Short4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04001911 }
1912
Nicolas Capens33438a62017-09-27 11:47:35 -04001913 RValue<Short> SaturateSigned(RValue<Int> x)
Nicolas Capens98436732017-07-25 15:32:12 -04001914 {
1915 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
1916 }
1917
Nicolas Capens598f8d82016-09-26 15:09:10 -04001918 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
1919 {
Nicolas Capens98436732017-07-25 15:32:12 -04001920 if(emulateIntrinsics)
1921 {
1922 Short4 result;
Nicolas Capens33438a62017-09-27 11:47:35 -04001923 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
1924 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
1925 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
1926 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001927
Nicolas Capens98436732017-07-25 15:32:12 -04001928 return result;
1929 }
1930 else
1931 {
1932 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
1933 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1934 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1935 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1936 paddsw->addArg(x.value);
1937 paddsw->addArg(y.value);
1938 ::basicBlock->appendInst(paddsw);
1939
1940 return RValue<Short4>(V(result));
1941 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001942 }
1943
1944 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
1945 {
Nicolas Capens98436732017-07-25 15:32:12 -04001946 if(emulateIntrinsics)
1947 {
1948 Short4 result;
Nicolas Capens33438a62017-09-27 11:47:35 -04001949 result = Insert(result, SaturateSigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
1950 result = Insert(result, SaturateSigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
1951 result = Insert(result, SaturateSigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
1952 result = Insert(result, SaturateSigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001953
Nicolas Capens98436732017-07-25 15:32:12 -04001954 return result;
1955 }
1956 else
1957 {
1958 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
1959 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1960 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1961 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1962 psubsw->addArg(x.value);
1963 psubsw->addArg(y.value);
1964 ::basicBlock->appendInst(psubsw);
1965
1966 return RValue<Short4>(V(result));
1967 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001968 }
1969
1970 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
1971 {
Nicolas Capens6c157442017-07-25 15:32:12 -04001972 if(emulateIntrinsics)
1973 {
1974 Short4 result;
1975 result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
1976 result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
1977 result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
1978 result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
Nicolas Capensc71bed22016-11-07 22:25:14 -05001979
Nicolas Capens6c157442017-07-25 15:32:12 -04001980 return result;
1981 }
1982 else
1983 {
1984 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
1985 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
1986 auto target = ::context->getConstantUndef(Ice::IceType_i32);
1987 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
1988 pmulhw->addArg(x.value);
1989 pmulhw->addArg(y.value);
1990 ::basicBlock->appendInst(pmulhw);
1991
1992 return RValue<Short4>(V(result));
1993 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04001994 }
1995
1996 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
1997 {
Nicolas Capensafe27e92017-07-25 15:32:12 -04001998 if(emulateIntrinsics)
1999 {
2000 Int2 result;
2001 result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
2002 result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002003
Nicolas Capensafe27e92017-07-25 15:32:12 -04002004 return result;
2005 }
2006 else
2007 {
2008 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2009 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2010 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2011 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2012 pmaddwd->addArg(x.value);
2013 pmaddwd->addArg(y.value);
2014 ::basicBlock->appendInst(pmaddwd);
2015
2016 return As<Int2>(V(result));
2017 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002018 }
2019
Nicolas Capens33438a62017-09-27 11:47:35 -04002020 RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002021 {
Nicolas Capens8960fbf2017-07-25 15:32:12 -04002022 if(emulateIntrinsics)
2023 {
2024 SByte8 result;
Nicolas Capens33438a62017-09-27 11:47:35 -04002025 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2026 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2027 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2028 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2029 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2030 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2031 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2032 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capensec54a172016-10-25 17:32:37 -04002033
Nicolas Capens8960fbf2017-07-25 15:32:12 -04002034 return result;
2035 }
2036 else
2037 {
2038 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2039 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2040 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2041 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2042 pack->addArg(x.value);
2043 pack->addArg(y.value);
2044 ::basicBlock->appendInst(pack);
2045
2046 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
2047 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002048 }
2049
Nicolas Capens33438a62017-09-27 11:47:35 -04002050 RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2051 {
2052 if(emulateIntrinsics)
2053 {
2054 Byte8 result;
2055 result = Insert(result, SaturateUnsigned(Extract(x, 0)), 0);
2056 result = Insert(result, SaturateUnsigned(Extract(x, 1)), 1);
2057 result = Insert(result, SaturateUnsigned(Extract(x, 2)), 2);
2058 result = Insert(result, SaturateUnsigned(Extract(x, 3)), 3);
2059 result = Insert(result, SaturateUnsigned(Extract(y, 0)), 4);
2060 result = Insert(result, SaturateUnsigned(Extract(y, 1)), 5);
2061 result = Insert(result, SaturateUnsigned(Extract(y, 2)), 6);
2062 result = Insert(result, SaturateUnsigned(Extract(y, 3)), 7);
2063
2064 return result;
2065 }
2066 else
2067 {
2068 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2069 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2070 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2071 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2072 pack->addArg(x.value);
2073 pack->addArg(y.value);
2074 ::basicBlock->appendInst(pack);
2075
2076 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
2077 }
2078 }
2079
Nicolas Capens598f8d82016-09-26 15:09:10 -04002080 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2081 {
Nicolas Capens2f970b62016-11-08 14:28:59 -05002082 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002083 }
2084
2085 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2086 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05002087 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002088 }
2089
2090 Type *Short4::getType()
2091 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04002092 return T(Type_v4i16);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002093 }
2094
Nicolas Capens598f8d82016-09-26 15:09:10 -04002095 UShort4::UShort4(RValue<Float4> cast, bool saturate)
2096 {
Nicolas Capensd4227962016-11-09 14:24:25 -05002097 if(saturate)
2098 {
Nicolas Capens9ca48d52017-01-14 12:52:55 -05002099 if(CPUID::SSE4_1)
Nicolas Capensd4227962016-11-09 14:24:25 -05002100 {
Nicolas Capens091f3502017-10-03 14:56:49 -04002101 // x86 produces 0x80000000 on 32-bit integer overflow/underflow.
2102 // PackUnsigned takes care of 0x0000 saturation.
2103 Int4 int4(Min(cast, Float4(0xFFFF)));
2104 *this = As<UShort4>(PackUnsigned(int4, int4));
2105 }
2106 else if(CPUID::ARM)
2107 {
2108 // ARM saturates the 32-bit integer result on overflow/undeflow.
2109 Int4 int4(cast);
Nicolas Capens33438a62017-09-27 11:47:35 -04002110 *this = As<UShort4>(PackUnsigned(int4, int4));
Nicolas Capensd4227962016-11-09 14:24:25 -05002111 }
2112 else
2113 {
2114 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
2115 }
2116 }
2117 else
2118 {
2119 *this = Short4(Int4(cast));
2120 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002121 }
2122
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002123 RValue<UShort> Extract(RValue<UShort4> val, int i)
2124 {
2125 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2126 }
2127
2128 RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
2129 {
2130 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
2131 }
2132
Nicolas Capens598f8d82016-09-26 15:09:10 -04002133 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
2134 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002135 if(emulateIntrinsics)
2136 {
2137 UShort4 result;
2138 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2139 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2140 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2141 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2142
2143 return result;
2144 }
2145 else
2146 {
2147 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2148 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002149 }
2150
2151 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
2152 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002153 if(emulateIntrinsics)
2154 {
2155 UShort4 result;
2156 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2157 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2158 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2159 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2160
2161 return result;
2162 }
2163 else
2164 {
2165 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2166 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002167 }
2168
Nicolas Capens598f8d82016-09-26 15:09:10 -04002169 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
2170 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04002171 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2172 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
2173 ::basicBlock->appendInst(cmp);
2174
2175 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2176 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2177 ::basicBlock->appendInst(select);
2178
2179 return RValue<UShort4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002180 }
2181
2182 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
2183 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04002184 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
2185 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
2186 ::basicBlock->appendInst(cmp);
2187
2188 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2189 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2190 ::basicBlock->appendInst(select);
2191
2192 return RValue<UShort4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002193 }
2194
Nicolas Capens7f301812017-10-02 17:32:34 -04002195 RValue<UShort> SaturateUnsigned(RValue<Int> x)
Nicolas Capens98436732017-07-25 15:32:12 -04002196 {
2197 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
2198 }
2199
Nicolas Capens598f8d82016-09-26 15:09:10 -04002200 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
2201 {
Nicolas Capens98436732017-07-25 15:32:12 -04002202 if(emulateIntrinsics)
2203 {
2204 UShort4 result;
Nicolas Capens7f301812017-10-02 17:32:34 -04002205 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
2206 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
2207 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
2208 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002209
Nicolas Capens98436732017-07-25 15:32:12 -04002210 return result;
2211 }
2212 else
2213 {
2214 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2215 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2216 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2217 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2218 paddusw->addArg(x.value);
2219 paddusw->addArg(y.value);
2220 ::basicBlock->appendInst(paddusw);
2221
2222 return RValue<UShort4>(V(result));
2223 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002224 }
2225
2226 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
2227 {
Nicolas Capens98436732017-07-25 15:32:12 -04002228 if(emulateIntrinsics)
2229 {
2230 UShort4 result;
Nicolas Capens7f301812017-10-02 17:32:34 -04002231 result = Insert(result, SaturateUnsigned(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
2232 result = Insert(result, SaturateUnsigned(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
2233 result = Insert(result, SaturateUnsigned(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
2234 result = Insert(result, SaturateUnsigned(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002235
Nicolas Capens98436732017-07-25 15:32:12 -04002236 return result;
2237 }
2238 else
2239 {
2240 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2241 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2242 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2243 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2244 psubusw->addArg(x.value);
2245 psubusw->addArg(y.value);
2246 ::basicBlock->appendInst(psubusw);
2247
2248 return RValue<UShort4>(V(result));
2249 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002250 }
2251
2252 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
2253 {
Nicolas Capens6c157442017-07-25 15:32:12 -04002254 if(emulateIntrinsics)
2255 {
2256 UShort4 result;
2257 result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
2258 result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
2259 result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
2260 result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
Nicolas Capensc71bed22016-11-07 22:25:14 -05002261
Nicolas Capens6c157442017-07-25 15:32:12 -04002262 return result;
2263 }
2264 else
2265 {
2266 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2267 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2268 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2269 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2270 pmulhuw->addArg(x.value);
2271 pmulhuw->addArg(y.value);
2272 ::basicBlock->appendInst(pmulhuw);
2273
2274 return RValue<UShort4>(V(result));
2275 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002276 }
2277
Chris Forbesaa8f6992019-03-01 14:18:30 -08002278 RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
2279 {
2280 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2281
2282 // Scalarized implementation.
2283 Int4 result;
2284 result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
2285 result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
2286 result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
2287 result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
2288
2289 return result;
2290 }
2291
2292 RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
2293 {
2294 // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
2295
2296 if(false) // Partial product based implementation.
2297 {
2298 auto xh = x >> 16;
2299 auto yh = y >> 16;
2300 auto xl = x & UInt4(0x0000FFFF);
2301 auto yl = y & UInt4(0x0000FFFF);
2302 auto xlyh = xl * yh;
2303 auto xhyl = xh * yl;
2304 auto xlyhh = xlyh >> 16;
2305 auto xhylh = xhyl >> 16;
2306 auto xlyhl = xlyh & UInt4(0x0000FFFF);
2307 auto xhyll = xhyl & UInt4(0x0000FFFF);
2308 auto xlylh = (xl * yl) >> 16;
2309 auto oflow = (xlyhl + xhyll + xlylh) >> 16;
2310
2311 return (xh * yh) + (xlyhh + xhylh) + oflow;
2312 }
2313
2314 // Scalarized implementation.
2315 Int4 result;
2316 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
2317 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
2318 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
2319 result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
2320
2321 return As<UInt4>(result);
2322 }
2323
Nicolas Capens598f8d82016-09-26 15:09:10 -04002324 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
2325 {
Ben Claytoneb50d252019-04-15 13:50:01 -04002326 UNIMPLEMENTED("RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)");
2327 return UShort4(0);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002328 }
2329
Nicolas Capens598f8d82016-09-26 15:09:10 -04002330 Type *UShort4::getType()
2331 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04002332 return T(Type_v4i16);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002333 }
2334
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002335 RValue<Short> Extract(RValue<Short8> val, int i)
2336 {
2337 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2338 }
2339
2340 RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
2341 {
2342 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
2343 }
2344
Nicolas Capens598f8d82016-09-26 15:09:10 -04002345 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
2346 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002347 if(emulateIntrinsics)
2348 {
2349 Short8 result;
2350 result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
2351 result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
2352 result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
2353 result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
2354 result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
2355 result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
2356 result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
2357 result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
2358
2359 return result;
2360 }
2361 else
2362 {
2363 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2364 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002365 }
2366
2367 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
2368 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002369 if(emulateIntrinsics)
2370 {
2371 Short8 result;
2372 result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
2373 result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
2374 result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
2375 result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
2376 result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
2377 result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
2378 result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
2379 result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
2380
2381 return result;
2382 }
2383 else
2384 {
2385 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2386 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002387 }
2388
2389 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
2390 {
Ben Claytoneb50d252019-04-15 13:50:01 -04002391 UNIMPLEMENTED("RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)");
2392 return Int4(0);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002393 }
2394
Nicolas Capens598f8d82016-09-26 15:09:10 -04002395 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
2396 {
Ben Claytoneb50d252019-04-15 13:50:01 -04002397 UNIMPLEMENTED("RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)");
2398 return Short8(0);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002399 }
2400
2401 Type *Short8::getType()
2402 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04002403 return T(Ice::IceType_v8i16);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002404 }
2405
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002406 RValue<UShort> Extract(RValue<UShort8> val, int i)
2407 {
2408 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
2409 }
2410
2411 RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
2412 {
2413 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
2414 }
2415
Nicolas Capens598f8d82016-09-26 15:09:10 -04002416 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
2417 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002418 if(emulateIntrinsics)
2419 {
2420 UShort8 result;
2421 result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
2422 result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
2423 result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
2424 result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
2425 result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
2426 result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
2427 result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
2428 result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
2429
2430 return result;
2431 }
2432 else
2433 {
2434 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2435 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002436 }
2437
2438 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
2439 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002440 if(emulateIntrinsics)
2441 {
2442 UShort8 result;
2443 result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
2444 result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
2445 result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
2446 result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
2447 result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
2448 result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
2449 result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
2450 result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
2451
2452 return result;
2453 }
2454 else
2455 {
2456 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2457 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002458 }
2459
Nicolas Capens598f8d82016-09-26 15:09:10 -04002460 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
2461 {
Ben Claytoneb50d252019-04-15 13:50:01 -04002462 UNIMPLEMENTED("RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)");
2463 return UShort8(0);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002464 }
2465
2466 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
2467 {
Ben Claytoneb50d252019-04-15 13:50:01 -04002468 UNIMPLEMENTED("RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)");
2469 return UShort8(0);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002470 }
2471
2472 // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
2473// RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
2474// {
Ben Claytoneb50d252019-04-15 13:50:01 -04002475// ASSERT(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002476// }
2477
2478 Type *UShort8::getType()
2479 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04002480 return T(Ice::IceType_v8i16);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002481 }
2482
Nicolas Capens96d4e092016-11-18 14:22:38 -05002483 RValue<Int> operator++(Int &val, int) // Post-increment
Nicolas Capens598f8d82016-09-26 15:09:10 -04002484 {
Nicolas Capens5b41ba32016-12-08 14:34:00 -05002485 RValue<Int> res = val;
Nicolas Capensd1229402016-11-07 16:05:22 -05002486 val += 1;
2487 return res;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002488 }
2489
Nicolas Capens96d4e092016-11-18 14:22:38 -05002490 const Int &operator++(Int &val) // Pre-increment
Nicolas Capens598f8d82016-09-26 15:09:10 -04002491 {
Nicolas Capensd1229402016-11-07 16:05:22 -05002492 val += 1;
2493 return val;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002494 }
2495
Nicolas Capens96d4e092016-11-18 14:22:38 -05002496 RValue<Int> operator--(Int &val, int) // Post-decrement
Nicolas Capens598f8d82016-09-26 15:09:10 -04002497 {
Nicolas Capensd1229402016-11-07 16:05:22 -05002498 RValue<Int> res = val;
2499 val -= 1;
2500 return res;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002501 }
2502
Nicolas Capens96d4e092016-11-18 14:22:38 -05002503 const Int &operator--(Int &val) // Pre-decrement
Nicolas Capens598f8d82016-09-26 15:09:10 -04002504 {
Nicolas Capensd1229402016-11-07 16:05:22 -05002505 val -= 1;
2506 return val;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002507 }
2508
Nicolas Capens598f8d82016-09-26 15:09:10 -04002509 RValue<Int> RoundInt(RValue<Float> cast)
2510 {
Nicolas Capens091f3502017-10-03 14:56:49 -04002511 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capensf7b75882017-04-26 09:30:47 -04002512 {
2513 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2514 return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
2515 }
2516 else
2517 {
2518 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2519 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2520 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2521 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2522 nearbyint->addArg(cast.value);
2523 ::basicBlock->appendInst(nearbyint);
Nicolas Capensa8086512016-11-07 17:32:17 -05002524
Nicolas Capensf7b75882017-04-26 09:30:47 -04002525 return RValue<Int>(V(result));
2526 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002527 }
2528
2529 Type *Int::getType()
2530 {
2531 return T(Ice::IceType_i32);
2532 }
2533
Nicolas Capens598f8d82016-09-26 15:09:10 -04002534 Type *Long::getType()
2535 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04002536 return T(Ice::IceType_i64);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002537 }
2538
Nicolas Capens598f8d82016-09-26 15:09:10 -04002539 UInt::UInt(RValue<Float> cast)
2540 {
Nicolas Capensc70a1162016-12-03 00:16:14 -05002541 // Smallest positive value representable in UInt, but not in Int
2542 const unsigned int ustart = 0x80000000u;
2543 const float ustartf = float(ustart);
2544
2545 // If the value is negative, store 0, otherwise store the result of the conversion
2546 storeValue((~(As<Int>(cast) >> 31) &
2547 // Check if the value can be represented as an Int
2548 IfThenElse(cast >= ustartf,
2549 // If the value is too large, subtract ustart and re-add it after conversion.
2550 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
2551 // Otherwise, just convert normally
2552 Int(cast))).value);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002553 }
2554
Nicolas Capens96d4e092016-11-18 14:22:38 -05002555 RValue<UInt> operator++(UInt &val, int) // Post-increment
Nicolas Capens598f8d82016-09-26 15:09:10 -04002556 {
Nicolas Capensd1229402016-11-07 16:05:22 -05002557 RValue<UInt> res = val;
2558 val += 1;
2559 return res;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002560 }
2561
Nicolas Capens96d4e092016-11-18 14:22:38 -05002562 const UInt &operator++(UInt &val) // Pre-increment
Nicolas Capens598f8d82016-09-26 15:09:10 -04002563 {
Nicolas Capensd1229402016-11-07 16:05:22 -05002564 val += 1;
2565 return val;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002566 }
2567
Nicolas Capens96d4e092016-11-18 14:22:38 -05002568 RValue<UInt> operator--(UInt &val, int) // Post-decrement
Nicolas Capens598f8d82016-09-26 15:09:10 -04002569 {
Nicolas Capensd1229402016-11-07 16:05:22 -05002570 RValue<UInt> res = val;
2571 val -= 1;
2572 return res;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002573 }
2574
Nicolas Capens96d4e092016-11-18 14:22:38 -05002575 const UInt &operator--(UInt &val) // Pre-decrement
Nicolas Capens598f8d82016-09-26 15:09:10 -04002576 {
Nicolas Capensd1229402016-11-07 16:05:22 -05002577 val -= 1;
2578 return val;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002579 }
2580
Nicolas Capens598f8d82016-09-26 15:09:10 -04002581// RValue<UInt> RoundUInt(RValue<Float> cast)
2582// {
Ben Claytoneb50d252019-04-15 13:50:01 -04002583// ASSERT(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002584// }
2585
2586 Type *UInt::getType()
2587 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04002588 return T(Ice::IceType_i32);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002589 }
2590
2591// Int2::Int2(RValue<Int> cast)
2592// {
2593// Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2594// Value *vector = Nucleus::createBitCast(extend, Int2::getType());
2595//
2596// Constant *shuffle[2];
2597// shuffle[0] = Nucleus::createConstantInt(0);
2598// shuffle[1] = Nucleus::createConstantInt(0);
2599//
2600// Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
2601//
2602// storeValue(replicate);
2603// }
2604
Nicolas Capens598f8d82016-09-26 15:09:10 -04002605 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
2606 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002607 if(emulateIntrinsics)
2608 {
2609 Int2 result;
2610 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2611 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
2612
2613 return result;
2614 }
2615 else
2616 {
2617 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2618 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002619 }
2620
2621 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
2622 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002623 if(emulateIntrinsics)
2624 {
2625 Int2 result;
2626 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2627 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2628
2629 return result;
2630 }
2631 else
2632 {
2633 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2634 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002635 }
2636
Nicolas Capens598f8d82016-09-26 15:09:10 -04002637 Type *Int2::getType()
2638 {
Nicolas Capens8dfd9a72016-10-13 17:44:51 -04002639 return T(Type_v2i32);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002640 }
2641
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002642 RValue<UInt> Extract(RValue<UInt2> val, int i)
2643 {
2644 return RValue<UInt>(Nucleus::createExtractElement(val.value, UInt::getType(), i));
2645 }
2646
2647 RValue<UInt2> Insert(RValue<UInt2> val, RValue<UInt> element, int i)
2648 {
2649 return RValue<UInt2>(Nucleus::createInsertElement(val.value, element.value, i));
2650 }
2651
Nicolas Capens598f8d82016-09-26 15:09:10 -04002652 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
2653 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002654 if(emulateIntrinsics)
2655 {
2656 UInt2 result;
2657 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
2658 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
2659
2660 return result;
2661 }
2662 else
2663 {
2664 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2665 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002666 }
2667
2668 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
2669 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002670 if(emulateIntrinsics)
2671 {
2672 UInt2 result;
2673 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
2674 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
2675
2676 return result;
2677 }
2678 else
2679 {
2680 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2681 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002682 }
2683
Nicolas Capens598f8d82016-09-26 15:09:10 -04002684 Type *UInt2::getType()
2685 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04002686 return T(Type_v2i32);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002687 }
2688
Nicolas Capenscb986762017-01-20 11:34:37 -05002689 Int4::Int4(RValue<Byte4> cast) : XYZW(this)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002690 {
Nicolas Capensd4227962016-11-09 14:24:25 -05002691 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2692 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2693
2694 Value *e;
2695 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
2696 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2697 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
2698
2699 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2700 Value *d = Nucleus::createBitCast(c, Short8::getType());
2701 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
2702
2703 Value *f = Nucleus::createBitCast(e, Int4::getType());
2704 storeValue(f);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002705 }
2706
Nicolas Capenscb986762017-01-20 11:34:37 -05002707 Int4::Int4(RValue<SByte4> cast) : XYZW(this)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002708 {
Nicolas Capensd4227962016-11-09 14:24:25 -05002709 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
2710 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
2711
Nicolas Capensd4227962016-11-09 14:24:25 -05002712 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
2713 Value *b = Nucleus::createBitCast(a, Byte16::getType());
2714 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
2715
2716 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
2717 Value *d = Nucleus::createBitCast(c, Short8::getType());
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002718 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
Nicolas Capensd4227962016-11-09 14:24:25 -05002719
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002720 *this = As<Int4>(e) >> 24;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002721 }
2722
Nicolas Capenscb986762017-01-20 11:34:37 -05002723 Int4::Int4(RValue<Short4> cast) : XYZW(this)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002724 {
Nicolas Capensd4227962016-11-09 14:24:25 -05002725 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
2726 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002727
2728 *this = As<Int4>(c) >> 16;
Nicolas Capens598f8d82016-09-26 15:09:10 -04002729 }
2730
Nicolas Capenscb986762017-01-20 11:34:37 -05002731 Int4::Int4(RValue<UShort4> cast) : XYZW(this)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002732 {
Nicolas Capensd4227962016-11-09 14:24:25 -05002733 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2734 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
2735 Value *d = Nucleus::createBitCast(c, Int4::getType());
2736 storeValue(d);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002737 }
2738
Nicolas Capenscb986762017-01-20 11:34:37 -05002739 Int4::Int4(RValue<Int> rhs) : XYZW(this)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002740 {
Nicolas Capensf8beb4b2017-01-27 02:55:44 -08002741 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
Nicolas Capensd4227962016-11-09 14:24:25 -05002742
2743 int swizzle[4] = {0, 0, 0, 0};
Nicolas Capensf8beb4b2017-01-27 02:55:44 -08002744 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
Nicolas Capensd4227962016-11-09 14:24:25 -05002745
2746 storeValue(replicate);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002747 }
2748
Nicolas Capens598f8d82016-09-26 15:09:10 -04002749 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
2750 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002751 if(emulateIntrinsics)
2752 {
2753 Int4 result;
2754 result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
2755 result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
2756 result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
2757 result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
2758
2759 return result;
2760 }
2761 else
2762 {
2763 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2764 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002765 }
2766
2767 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
2768 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002769 if(emulateIntrinsics)
2770 {
2771 Int4 result;
2772 result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
2773 result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
2774 result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
2775 result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
2776
2777 return result;
2778 }
2779 else
2780 {
2781 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2782 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002783 }
2784
Nicolas Capens598f8d82016-09-26 15:09:10 -04002785 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
2786 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05002787 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002788 }
2789
2790 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
2791 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05002792 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002793 }
2794
2795 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
2796 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05002797 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002798 }
2799
2800 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
2801 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05002802 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002803 }
2804
2805 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
2806 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05002807 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002808 }
2809
2810 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
2811 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05002812 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002813 }
2814
2815 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
2816 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04002817 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
2818 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
2819 ::basicBlock->appendInst(cmp);
2820
2821 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2822 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2823 ::basicBlock->appendInst(select);
2824
2825 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002826 }
2827
2828 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
2829 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04002830 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
2831 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
2832 ::basicBlock->appendInst(cmp);
2833
2834 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2835 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
2836 ::basicBlock->appendInst(select);
2837
2838 return RValue<Int4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002839 }
2840
2841 RValue<Int4> RoundInt(RValue<Float4> cast)
2842 {
Nicolas Capens091f3502017-10-03 14:56:49 -04002843 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capensf7b75882017-04-26 09:30:47 -04002844 {
2845 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
2846 return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
2847 }
2848 else
2849 {
2850 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
2851 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2852 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2853 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2854 nearbyint->addArg(cast.value);
2855 ::basicBlock->appendInst(nearbyint);
Nicolas Capensa8086512016-11-07 17:32:17 -05002856
Nicolas Capensf7b75882017-04-26 09:30:47 -04002857 return RValue<Int4>(V(result));
2858 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002859 }
2860
Nicolas Capens33438a62017-09-27 11:47:35 -04002861 RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002862 {
Nicolas Capens8960fbf2017-07-25 15:32:12 -04002863 if(emulateIntrinsics)
2864 {
2865 Short8 result;
Nicolas Capens33438a62017-09-27 11:47:35 -04002866 result = Insert(result, SaturateSigned(Extract(x, 0)), 0);
2867 result = Insert(result, SaturateSigned(Extract(x, 1)), 1);
2868 result = Insert(result, SaturateSigned(Extract(x, 2)), 2);
2869 result = Insert(result, SaturateSigned(Extract(x, 3)), 3);
2870 result = Insert(result, SaturateSigned(Extract(y, 0)), 4);
2871 result = Insert(result, SaturateSigned(Extract(y, 1)), 5);
2872 result = Insert(result, SaturateSigned(Extract(y, 2)), 6);
2873 result = Insert(result, SaturateSigned(Extract(y, 3)), 7);
Nicolas Capensec54a172016-10-25 17:32:37 -04002874
Nicolas Capens8960fbf2017-07-25 15:32:12 -04002875 return result;
2876 }
2877 else
2878 {
2879 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2880 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2881 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2882 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2883 pack->addArg(x.value);
2884 pack->addArg(y.value);
2885 ::basicBlock->appendInst(pack);
2886
2887 return RValue<Short8>(V(result));
2888 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002889 }
2890
Nicolas Capens33438a62017-09-27 11:47:35 -04002891 RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
2892 {
Nicolas Capens091f3502017-10-03 14:56:49 -04002893 if(emulateIntrinsics || !(CPUID::SSE4_1 || CPUID::ARM))
2894 {
2895 RValue<Int4> sx = As<Int4>(x);
2896 RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
2897
2898 RValue<Int4> sy = As<Int4>(y);
2899 RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
2900
2901 return As<UShort8>(PackSigned(bx, by) + Short8(0x8000u));
2902 }
2903 else
Nicolas Capens33438a62017-09-27 11:47:35 -04002904 {
2905 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
2906 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2907 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2908 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2909 pack->addArg(x.value);
2910 pack->addArg(y.value);
2911 ::basicBlock->appendInst(pack);
2912
2913 return RValue<UShort8>(V(result));
2914 }
Nicolas Capens33438a62017-09-27 11:47:35 -04002915 }
2916
Nicolas Capens598f8d82016-09-26 15:09:10 -04002917 RValue<Int> SignMask(RValue<Int4> x)
2918 {
Nicolas Capens091f3502017-10-03 14:56:49 -04002919 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capensd6cacad2017-07-25 15:32:12 -04002920 {
2921 Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
2922 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
2923 }
2924 else
2925 {
2926 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2927 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2928 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2929 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2930 movmsk->addArg(x.value);
2931 ::basicBlock->appendInst(movmsk);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04002932
Nicolas Capensd6cacad2017-07-25 15:32:12 -04002933 return RValue<Int>(V(result));
2934 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002935 }
2936
Nicolas Capens598f8d82016-09-26 15:09:10 -04002937 Type *Int4::getType()
2938 {
Nicolas Capens23d99a42016-09-30 14:57:16 -04002939 return T(Ice::IceType_v4i32);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002940 }
2941
Nicolas Capenscb986762017-01-20 11:34:37 -05002942 UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
Nicolas Capens598f8d82016-09-26 15:09:10 -04002943 {
Nicolas Capensc70a1162016-12-03 00:16:14 -05002944 // Smallest positive value representable in UInt, but not in Int
2945 const unsigned int ustart = 0x80000000u;
2946 const float ustartf = float(ustart);
2947
2948 // Check if the value can be represented as an Int
2949 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
2950 // If the value is too large, subtract ustart and re-add it after conversion.
2951 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
2952 // Otherwise, just convert normally
2953 (~uiValue & Int4(cast));
2954 // If the value is negative, store 0, otherwise store the result of the conversion
2955 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
Nicolas Capens598f8d82016-09-26 15:09:10 -04002956 }
2957
Nicolas Capens598f8d82016-09-26 15:09:10 -04002958 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
2959 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002960 if(emulateIntrinsics)
2961 {
2962 UInt4 result;
2963 result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
2964 result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
2965 result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
2966 result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
2967
2968 return result;
2969 }
2970 else
2971 {
2972 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2973 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002974 }
2975
2976 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
2977 {
Nicolas Capens8be6c7b2017-07-25 15:32:12 -04002978 if(emulateIntrinsics)
2979 {
2980 UInt4 result;
2981 result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
2982 result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
2983 result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
2984 result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
2985
2986 return result;
2987 }
2988 else
2989 {
2990 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2991 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04002992 }
2993
Nicolas Capens598f8d82016-09-26 15:09:10 -04002994 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
2995 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05002996 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04002997 }
2998
2999 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
3000 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003001 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003002 }
3003
3004 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
3005 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003006 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003007 }
3008
3009 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
3010 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003011 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003012 }
3013
3014 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
3015 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003016 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003017 }
3018
3019 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
3020 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003021 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003022 }
3023
3024 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
3025 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003026 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3027 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3028 ::basicBlock->appendInst(cmp);
3029
3030 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3031 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3032 ::basicBlock->appendInst(select);
3033
3034 return RValue<UInt4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003035 }
3036
3037 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
3038 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003039 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
3040 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3041 ::basicBlock->appendInst(cmp);
3042
3043 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
3044 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3045 ::basicBlock->appendInst(select);
3046
3047 return RValue<UInt4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003048 }
3049
Nicolas Capens598f8d82016-09-26 15:09:10 -04003050 Type *UInt4::getType()
3051 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04003052 return T(Ice::IceType_v4i32);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003053 }
3054
Ben Claytonec1aeb82019-03-04 19:33:27 +00003055 Type *Half::getType()
3056 {
3057 return T(Ice::IceType_i16);
3058 }
Alexis Hetu734e2572018-12-20 14:00:49 -05003059
Nicolas Capens598f8d82016-09-26 15:09:10 -04003060 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
3061 {
Nicolas Capensd52e9362016-10-31 23:23:15 -04003062 return 1.0f / x;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003063 }
3064
3065 RValue<Float> RcpSqrt_pp(RValue<Float> x)
3066 {
Nicolas Capensd52e9362016-10-31 23:23:15 -04003067 return Rcp_pp(Sqrt(x));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003068 }
3069
3070 RValue<Float> Sqrt(RValue<Float> x)
3071 {
Nicolas Capensd52e9362016-10-31 23:23:15 -04003072 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
3073 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3074 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3075 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3076 sqrt->addArg(x.value);
3077 ::basicBlock->appendInst(sqrt);
3078
3079 return RValue<Float>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003080 }
3081
3082 RValue<Float> Round(RValue<Float> x)
3083 {
Nicolas Capensa8086512016-11-07 17:32:17 -05003084 return Float4(Round(Float4(x))).x;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003085 }
3086
3087 RValue<Float> Trunc(RValue<Float> x)
3088 {
Nicolas Capensa8086512016-11-07 17:32:17 -05003089 return Float4(Trunc(Float4(x))).x;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003090 }
3091
3092 RValue<Float> Frac(RValue<Float> x)
3093 {
Nicolas Capensa8086512016-11-07 17:32:17 -05003094 return Float4(Frac(Float4(x))).x;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003095 }
3096
3097 RValue<Float> Floor(RValue<Float> x)
3098 {
Nicolas Capensa8086512016-11-07 17:32:17 -05003099 return Float4(Floor(Float4(x))).x;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003100 }
3101
3102 RValue<Float> Ceil(RValue<Float> x)
3103 {
Nicolas Capensa8086512016-11-07 17:32:17 -05003104 return Float4(Ceil(Float4(x))).x;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003105 }
3106
3107 Type *Float::getType()
3108 {
Nicolas Capens9709d4f2016-09-30 11:44:14 -04003109 return T(Ice::IceType_f32);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003110 }
3111
Nicolas Capens598f8d82016-09-26 15:09:10 -04003112 Type *Float2::getType()
3113 {
Nicolas Capens4cfd4572016-10-20 01:00:19 -04003114 return T(Type_v2f32);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003115 }
3116
Nicolas Capenscb986762017-01-20 11:34:37 -05003117 Float4::Float4(RValue<Float> rhs) : XYZW(this)
Nicolas Capens598f8d82016-09-26 15:09:10 -04003118 {
Nicolas Capensf8beb4b2017-01-27 02:55:44 -08003119 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
Nicolas Capensd4227962016-11-09 14:24:25 -05003120
3121 int swizzle[4] = {0, 0, 0, 0};
Nicolas Capensf8beb4b2017-01-27 02:55:44 -08003122 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
Nicolas Capensd4227962016-11-09 14:24:25 -05003123
3124 storeValue(replicate);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003125 }
3126
Nicolas Capens598f8d82016-09-26 15:09:10 -04003127 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
3128 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003129 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capens5cdb91a2017-02-13 12:39:18 -05003130 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003131 ::basicBlock->appendInst(cmp);
3132
3133 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capens5cdb91a2017-02-13 12:39:18 -05003134 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003135 ::basicBlock->appendInst(select);
3136
3137 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003138 }
3139
3140 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
3141 {
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003142 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
Nicolas Capens5cdb91a2017-02-13 12:39:18 -05003143 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003144 ::basicBlock->appendInst(cmp);
3145
3146 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
Nicolas Capens5cdb91a2017-02-13 12:39:18 -05003147 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
Nicolas Capens53a8a3f2016-10-26 00:23:12 -04003148 ::basicBlock->appendInst(select);
3149
3150 return RValue<Float4>(V(result));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003151 }
3152
3153 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
3154 {
Nicolas Capensd52e9362016-10-31 23:23:15 -04003155 return Float4(1.0f) / x;
Nicolas Capens598f8d82016-09-26 15:09:10 -04003156 }
3157
3158 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
3159 {
Nicolas Capensd52e9362016-10-31 23:23:15 -04003160 return Rcp_pp(Sqrt(x));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003161 }
3162
3163 RValue<Float4> Sqrt(RValue<Float4> x)
3164 {
Nicolas Capens091f3502017-10-03 14:56:49 -04003165 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capens9f737d32017-07-25 17:26:14 -04003166 {
3167 Float4 result;
3168 result.x = Sqrt(Float(Float4(x).x));
3169 result.y = Sqrt(Float(Float4(x).y));
3170 result.z = Sqrt(Float(Float4(x).z));
3171 result.w = Sqrt(Float(Float4(x).w));
Nicolas Capensd52e9362016-10-31 23:23:15 -04003172
Nicolas Capens9f737d32017-07-25 17:26:14 -04003173 return result;
3174 }
3175 else
3176 {
3177 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3178 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3179 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3180 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3181 sqrt->addArg(x.value);
3182 ::basicBlock->appendInst(sqrt);
3183
3184 return RValue<Float4>(V(result));
3185 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003186 }
3187
Nicolas Capens598f8d82016-09-26 15:09:10 -04003188 RValue<Int> SignMask(RValue<Float4> x)
3189 {
Nicolas Capens091f3502017-10-03 14:56:49 -04003190 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capensd6cacad2017-07-25 15:32:12 -04003191 {
3192 Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
3193 return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
3194 }
3195 else
3196 {
3197 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
3198 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3199 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3200 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3201 movmsk->addArg(x.value);
3202 ::basicBlock->appendInst(movmsk);
Nicolas Capensf2cb9df2016-10-21 17:26:13 -04003203
Nicolas Capensd6cacad2017-07-25 15:32:12 -04003204 return RValue<Int>(V(result));
3205 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003206 }
3207
3208 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
3209 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003210 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003211 }
3212
3213 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
3214 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003215 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003216 }
3217
3218 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
3219 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003220 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003221 }
3222
3223 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
3224 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003225 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003226 }
3227
3228 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
3229 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003230 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003231 }
3232
3233 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
3234 {
Nicolas Capens5e6ca092017-01-13 15:09:21 -05003235 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003236 }
3237
Ben Claytonec1aeb82019-03-04 19:33:27 +00003238 RValue<Int4> CmpUEQ(RValue<Float4> x, RValue<Float4> y)
3239 {
3240 return RValue<Int4>(Nucleus::createFCmpUEQ(x.value, y.value));
3241 }
3242
3243 RValue<Int4> CmpULT(RValue<Float4> x, RValue<Float4> y)
3244 {
3245 return RValue<Int4>(Nucleus::createFCmpULT(x.value, y.value));
3246 }
3247
3248 RValue<Int4> CmpULE(RValue<Float4> x, RValue<Float4> y)
3249 {
3250 return RValue<Int4>(Nucleus::createFCmpULE(x.value, y.value));
3251 }
3252
3253 RValue<Int4> CmpUNEQ(RValue<Float4> x, RValue<Float4> y)
3254 {
3255 return RValue<Int4>(Nucleus::createFCmpUNE(x.value, y.value));
3256 }
3257
3258 RValue<Int4> CmpUNLT(RValue<Float4> x, RValue<Float4> y)
3259 {
3260 return RValue<Int4>(Nucleus::createFCmpUGE(x.value, y.value));
3261 }
3262
3263 RValue<Int4> CmpUNLE(RValue<Float4> x, RValue<Float4> y)
3264 {
3265 return RValue<Int4>(Nucleus::createFCmpUGT(x.value, y.value));
3266 }
3267
Nicolas Capens598f8d82016-09-26 15:09:10 -04003268 RValue<Float4> Round(RValue<Float4> x)
3269 {
Nicolas Capens091f3502017-10-03 14:56:49 -04003270 if(emulateIntrinsics || CPUID::ARM)
Nicolas Capensf7b75882017-04-26 09:30:47 -04003271 {
3272 // Push the fractional part off the mantissa. Accurate up to +/-2^22.
3273 return (x + Float4(0x00C00000)) - Float4(0x00C00000);
3274 }
3275 else if(CPUID::SSE4_1)
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003276 {
3277 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3278 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3279 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3280 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3281 round->addArg(x.value);
3282 round->addArg(::context->getConstantInt32(0));
3283 ::basicBlock->appendInst(round);
Nicolas Capensa8086512016-11-07 17:32:17 -05003284
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003285 return RValue<Float4>(V(result));
3286 }
3287 else
3288 {
3289 return Float4(RoundInt(x));
3290 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003291 }
3292
3293 RValue<Float4> Trunc(RValue<Float4> x)
3294 {
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003295 if(CPUID::SSE4_1)
3296 {
3297 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3298 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3299 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3300 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3301 round->addArg(x.value);
3302 round->addArg(::context->getConstantInt32(3));
3303 ::basicBlock->appendInst(round);
Nicolas Capensa8086512016-11-07 17:32:17 -05003304
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003305 return RValue<Float4>(V(result));
3306 }
3307 else
3308 {
3309 return Float4(Int4(x));
3310 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003311 }
3312
3313 RValue<Float4> Frac(RValue<Float4> x)
3314 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003315 Float4 frc;
3316
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003317 if(CPUID::SSE4_1)
3318 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003319 frc = x - Floor(x);
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003320 }
3321 else
3322 {
Nicolas Capensb9230422017-07-17 10:27:33 -04003323 frc = x - Float4(Int4(x)); // Signed fractional part.
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003324
Nicolas Capensb9230422017-07-17 10:27:33 -04003325 frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); // Add 1.0 if negative.
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003326 }
Nicolas Capensb9230422017-07-17 10:27:33 -04003327
3328 // x - floor(x) can be 1.0 for very small negative x.
3329 // Clamp against the value just below 1.0.
3330 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003331 }
3332
3333 RValue<Float4> Floor(RValue<Float4> x)
3334 {
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003335 if(CPUID::SSE4_1)
3336 {
3337 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3338 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3339 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3340 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3341 round->addArg(x.value);
3342 round->addArg(::context->getConstantInt32(1));
3343 ::basicBlock->appendInst(round);
Nicolas Capensa8086512016-11-07 17:32:17 -05003344
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003345 return RValue<Float4>(V(result));
3346 }
3347 else
3348 {
3349 return x - Frac(x);
3350 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003351 }
3352
3353 RValue<Float4> Ceil(RValue<Float4> x)
3354 {
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003355 if(CPUID::SSE4_1)
3356 {
3357 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
3358 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3359 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3360 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3361 round->addArg(x.value);
3362 round->addArg(::context->getConstantInt32(2));
3363 ::basicBlock->appendInst(round);
Nicolas Capensa8086512016-11-07 17:32:17 -05003364
Nicolas Capens9ca48d52017-01-14 12:52:55 -05003365 return RValue<Float4>(V(result));
3366 }
3367 else
3368 {
3369 return -Floor(-x);
3370 }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003371 }
3372
3373 Type *Float4::getType()
3374 {
Nicolas Capens9709d4f2016-09-30 11:44:14 -04003375 return T(Ice::IceType_v4f32);
Nicolas Capens598f8d82016-09-26 15:09:10 -04003376 }
3377
Nicolas Capens598f8d82016-09-26 15:09:10 -04003378 RValue<Long> Ticks()
3379 {
Ben Claytoneb50d252019-04-15 13:50:01 -04003380 UNIMPLEMENTED("RValue<Long> Ticks()");
3381 return Long(Int(0));
Nicolas Capens598f8d82016-09-26 15:09:10 -04003382 }
Ben Clayton147c4912019-04-11 00:17:59 -04003383
3384 // Below are functions currently unimplemented for the Subzero backend.
3385 // They are stubbed to satisfy the linker.
Ben Clayton147c4912019-04-11 00:17:59 -04003386 RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4(0); }
3387 RValue<Float4> Cos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cos()"); return Float4(0); }
3388 RValue<Float4> Tan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tan()"); return Float4(0); }
3389 RValue<Float4> Asin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asin()"); return Float4(0); }
3390 RValue<Float4> Acos(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acos()"); return Float4(0); }
3391 RValue<Float4> Atan(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atan()"); return Float4(0); }
3392 RValue<Float4> Sinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sinh()"); return Float4(0); }
3393 RValue<Float4> Cosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Cosh()"); return Float4(0); }
3394 RValue<Float4> Tanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Tanh()"); return Float4(0); }
3395 RValue<Float4> Asinh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Asinh()"); return Float4(0); }
3396 RValue<Float4> Acosh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Acosh()"); return Float4(0); }
3397 RValue<Float4> Atanh(RValue<Float4> x) { UNIMPLEMENTED("Subzero Atanh()"); return Float4(0); }
3398 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Atan2()"); return Float4(0); }
3399 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y) { UNIMPLEMENTED("Subzero Pow()"); return Float4(0); }
3400 RValue<Float4> Exp(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp()"); return Float4(0); }
3401 RValue<Float4> Log(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log()"); return Float4(0); }
3402 RValue<Float4> Exp2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Exp2()"); return Float4(0); }
3403 RValue<Float4> Log2(RValue<Float4> x) { UNIMPLEMENTED("Subzero Log2()"); return Float4(0); }
3404 RValue<UInt4> Ctlz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Ctlz()"); return UInt4(0); }
3405 RValue<UInt4> Cttz(RValue<UInt4> x, bool isZeroUndef) { UNIMPLEMENTED("Subzero Cttz()"); return UInt4(0); }
Nicolas Capens598f8d82016-09-26 15:09:10 -04003406}