blob: a6eb921d01e485b7e7bd9dcc5d7e0ed87f78f2f7 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
alan-baker57ce1c22022-04-26 19:10:44 -040039#include "ReplaceOpenCLBuiltinPass.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040040#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
David Neto22f144c2017-06-12 14:26:21 -040046namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000047
David Neto22f144c2017-06-12 14:26:21 -040048uint32_t clz(uint32_t v) {
49 uint32_t r;
50 uint32_t shift;
51
52 r = (v > 0xFFFF) << 4;
53 v >>= r;
54 shift = (v > 0xFF) << 3;
55 v >>= shift;
56 r |= shift;
57 shift = (v > 0xF) << 2;
58 v >>= shift;
59 r |= shift;
60 shift = (v > 0x3) << 1;
61 v >>= shift;
62 r |= shift;
63 r |= (v >> 1);
64
65 return r;
66}
67
Kévin Petitfdfa92e2019-09-25 14:20:58 +010068Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
69 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040070 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040071 IntTy = FixedVectorType::get(IntTy,
72 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010073 }
74 return IntTy;
75}
76
alan-baker4986eff2020-10-29 13:38:00 -040077Value *MemoryOrderSemantics(Value *order, bool is_global,
78 Instruction *InsertBefore,
alan-baker36309f92021-02-05 12:28:03 -050079 spv::MemorySemanticsMask base_semantics,
80 bool include_storage = true) {
alan-baker4986eff2020-10-29 13:38:00 -040081 enum AtomicMemoryOrder : uint32_t {
82 kMemoryOrderRelaxed = 0,
83 kMemoryOrderAcquire = 2,
84 kMemoryOrderRelease = 3,
85 kMemoryOrderAcqRel = 4,
86 kMemoryOrderSeqCst = 5
87 };
88
89 IRBuilder<> builder(InsertBefore);
90
91 // Constants for OpenCL C 2.0 memory_order.
92 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
93 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
94 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
95 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
96
97 // Constants for SPIR-V ordering memory semantics.
98 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
99 const auto AcquireSemantics =
100 builder.getInt32(spv::MemorySemanticsAcquireMask);
101 const auto ReleaseSemantics =
102 builder.getInt32(spv::MemorySemanticsReleaseMask);
103 const auto AcqRelSemantics =
104 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
105
106 // Constants for SPIR-V storage class semantics.
107 const auto UniformSemantics =
108 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
109 const auto WorkgroupSemantics =
110 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
111
112 // Instead of sequentially consistent, use acquire, release or acquire
113 // release semantics.
114 Value *base_order = nullptr;
115 switch (base_semantics) {
116 case spv::MemorySemanticsAcquireMask:
117 base_order = AcquireSemantics;
118 break;
119 case spv::MemorySemanticsReleaseMask:
120 base_order = ReleaseSemantics;
121 break;
122 default:
123 base_order = AcqRelSemantics;
124 break;
125 }
126
127 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
alan-baker36309f92021-02-05 12:28:03 -0500128 if (order == nullptr) {
129 if (include_storage)
130 return builder.CreateOr({storage, base_order});
131 else
132 return base_order;
133 }
alan-baker4986eff2020-10-29 13:38:00 -0400134
135 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
136 auto is_acquire = builder.CreateICmpEQ(order, acquire);
137 auto is_release = builder.CreateICmpEQ(order, release);
138 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
139 auto semantics =
140 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
141 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
142 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
143 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
alan-baker36309f92021-02-05 12:28:03 -0500144 if (include_storage)
145 return builder.CreateOr({storage, semantics});
146 else
147 return semantics;
alan-baker4986eff2020-10-29 13:38:00 -0400148}
149
150Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
151 enum AtomicMemoryScope : uint32_t {
152 kMemoryScopeWorkItem = 0,
153 kMemoryScopeWorkGroup = 1,
154 kMemoryScopeDevice = 2,
155 kMemoryScopeAllSVMDevices = 3, // not supported
156 kMemoryScopeSubGroup = 4
157 };
158
159 IRBuilder<> builder(InsertBefore);
160
161 // Constants for OpenCL C 2.0 memory_scope.
162 const auto work_item =
163 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
164 const auto work_group =
165 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
166 const auto sub_group =
167 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
168 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
169
170 // Constants for SPIR-V memory scopes.
171 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
172 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
173 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
174 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
175
176 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
177 if (scope == nullptr)
178 return base_scope;
179
180 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
181 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
182 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
183 auto is_device = builder.CreateICmpEQ(scope, device);
184
185 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
186 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
187 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
188 scope = builder.CreateSelect(is_device, DeviceScope, scope);
189
190 return scope;
191}
192
SJW2c317da2020-03-23 07:39:13 -0500193bool replaceCallsWithValue(Function &F,
194 std::function<Value *(CallInst *)> Replacer) {
195
196 bool Changed = false;
197
198 SmallVector<Instruction *, 4> ToRemoves;
199
200 // Walk the users of the function.
201 for (auto &U : F.uses()) {
202 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
203
204 auto NewValue = Replacer(CI);
205
206 if (NewValue != nullptr) {
207 CI->replaceAllUsesWith(NewValue);
208
209 // Lastly, remember to remove the user.
210 ToRemoves.push_back(CI);
211 }
212 }
213 }
214
215 Changed = !ToRemoves.empty();
216
217 // And cleanup the calls we don't use anymore.
218 for (auto V : ToRemoves) {
219 V->eraseFromParent();
220 }
221
222 return Changed;
223}
224
Kévin Petit91bc72e2019-04-08 15:17:46 +0100225} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400226
alan-baker57ce1c22022-04-26 19:10:44 -0400227PreservedAnalyses ReplaceOpenCLBuiltinPass::run(Module &M,
228 ModuleAnalysisManager &MPM) {
229 PreservedAnalyses PA;
SJW2c317da2020-03-23 07:39:13 -0500230 std::list<Function *> func_list;
231 for (auto &F : M.getFunctionList()) {
232 // process only function declarations
233 if (F.isDeclaration() && runOnFunction(F)) {
234 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000235 }
236 }
SJW2c317da2020-03-23 07:39:13 -0500237 if (func_list.size() != 0) {
238 // recursively convert functions, but first remove dead
239 for (auto *F : func_list) {
240 if (F->use_empty()) {
241 F->eraseFromParent();
242 }
243 }
alan-baker57ce1c22022-04-26 19:10:44 -0400244 PA = run(M, MPM);
245 return PA;
SJW2c317da2020-03-23 07:39:13 -0500246 }
alan-baker57ce1c22022-04-26 19:10:44 -0400247 return PA;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000248}
249
SJW2c317da2020-03-23 07:39:13 -0500250bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
251 auto &FI = Builtins::Lookup(&F);
252 switch (FI.getType()) {
253 case Builtins::kAbs:
254 if (!FI.getParameter(0).is_signed) {
255 return replaceAbs(F);
256 }
257 break;
258 case Builtins::kAbsDiff:
259 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400260
261 case Builtins::kAddSat:
alan-baker3f1bf492020-11-05 09:07:36 -0500262 return replaceAddSubSat(F, FI.getParameter(0).is_signed, true);
alan-bakera52b7312020-10-26 08:58:51 -0400263
alan-bakercc2bafb2020-11-02 08:30:18 -0500264 case Builtins::kClz:
alan-baker2cecaa72020-11-05 14:05:20 -0500265 return replaceCountZeroes(F, true);
266
267 case Builtins::kCtz:
268 return replaceCountZeroes(F, false);
alan-bakercc2bafb2020-11-02 08:30:18 -0500269
alan-bakerb6da5132020-10-29 15:59:06 -0400270 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000271 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400272 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000273 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400274
SJW2c317da2020-03-23 07:39:13 -0500275 case Builtins::kCopysign:
276 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100277
SJW2c317da2020-03-23 07:39:13 -0500278 case Builtins::kHalfRecip:
279 case Builtins::kNativeRecip:
280 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100281
SJW2c317da2020-03-23 07:39:13 -0500282 case Builtins::kHalfDivide:
283 case Builtins::kNativeDivide:
284 return replaceDivide(F);
285
286 case Builtins::kDot:
287 return replaceDot(F);
288
289 case Builtins::kExp10:
290 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500291 case Builtins::kNativeExp10:
292 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500293
alan-baker8b968112020-12-15 15:53:29 -0500294 case Builtins::kExpm1:
295 return replaceExpm1(F);
296
SJW2c317da2020-03-23 07:39:13 -0500297 case Builtins::kLog10:
298 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500299 case Builtins::kNativeLog10:
300 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500301
gnl21636e7992020-09-09 16:08:16 +0100302 case Builtins::kLog1p:
303 return replaceLog1p(F);
304
alan-bakere0406e72020-11-10 12:32:04 -0500305 case Builtins::kFdim:
306 return replaceFDim(F);
307
SJW2c317da2020-03-23 07:39:13 -0500308 case Builtins::kFmod:
309 return replaceFmod(F);
310
alan-baker8b968112020-12-15 15:53:29 -0500311 case Builtins::kPown:
312 return replacePown(F);
313
alan-baker3e0de472020-12-08 15:57:17 -0500314 case Builtins::kRound:
315 return replaceRound(F);
316
317 case Builtins::kCospi:
318 case Builtins::kSinpi:
319 case Builtins::kTanpi:
320 return replaceTrigPi(F, FI.getType());
321
alan-baker8b968112020-12-15 15:53:29 -0500322 case Builtins::kSincos:
323 return replaceSincos(F);
324
SJW2c317da2020-03-23 07:39:13 -0500325 case Builtins::kBarrier:
326 case Builtins::kWorkGroupBarrier:
327 return replaceBarrier(F);
328
alan-baker12d2c182020-07-20 08:22:42 -0400329 case Builtins::kSubGroupBarrier:
330 return replaceBarrier(F, true);
331
alan-baker36309f92021-02-05 12:28:03 -0500332 case Builtins::kAtomicWorkItemFence:
333 return replaceMemFence(F, spv::MemorySemanticsMaskNone);
SJW2c317da2020-03-23 07:39:13 -0500334 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400335 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500336 case Builtins::kReadMemFence:
337 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
338 case Builtins::kWriteMemFence:
339 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
340
341 // Relational
342 case Builtins::kIsequal:
alan-baker3e217772020-11-07 17:29:40 -0500343 return replaceRelational(F, CmpInst::FCMP_OEQ);
SJW2c317da2020-03-23 07:39:13 -0500344 case Builtins::kIsgreater:
alan-baker3e217772020-11-07 17:29:40 -0500345 return replaceRelational(F, CmpInst::FCMP_OGT);
SJW2c317da2020-03-23 07:39:13 -0500346 case Builtins::kIsgreaterequal:
alan-baker3e217772020-11-07 17:29:40 -0500347 return replaceRelational(F, CmpInst::FCMP_OGE);
SJW2c317da2020-03-23 07:39:13 -0500348 case Builtins::kIsless:
alan-baker3e217772020-11-07 17:29:40 -0500349 return replaceRelational(F, CmpInst::FCMP_OLT);
SJW2c317da2020-03-23 07:39:13 -0500350 case Builtins::kIslessequal:
alan-baker3e217772020-11-07 17:29:40 -0500351 return replaceRelational(F, CmpInst::FCMP_OLE);
SJW2c317da2020-03-23 07:39:13 -0500352 case Builtins::kIsnotequal:
alan-baker3e217772020-11-07 17:29:40 -0500353 return replaceRelational(F, CmpInst::FCMP_UNE);
354 case Builtins::kIslessgreater:
355 return replaceRelational(F, CmpInst::FCMP_ONE);
SJW2c317da2020-03-23 07:39:13 -0500356
alan-baker15106572020-11-06 15:08:10 -0500357 case Builtins::kIsordered:
358 return replaceOrdered(F, true);
359
360 case Builtins::kIsunordered:
361 return replaceOrdered(F, false);
362
SJW2c317da2020-03-23 07:39:13 -0500363 case Builtins::kIsinf: {
364 bool is_vec = FI.getParameter(0).vector_size != 0;
365 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
366 }
367 case Builtins::kIsnan: {
368 bool is_vec = FI.getParameter(0).vector_size != 0;
369 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
370 }
371
372 case Builtins::kIsfinite:
373 return replaceIsFinite(F);
374
375 case Builtins::kAll: {
376 bool is_vec = FI.getParameter(0).vector_size != 0;
377 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
378 }
379 case Builtins::kAny: {
380 bool is_vec = FI.getParameter(0).vector_size != 0;
381 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
382 }
383
alan-baker497920b2020-11-09 16:41:36 -0500384 case Builtins::kIsnormal:
385 return replaceIsNormal(F);
386
SJW2c317da2020-03-23 07:39:13 -0500387 case Builtins::kUpsample:
388 return replaceUpsample(F);
389
390 case Builtins::kRotate:
391 return replaceRotate(F);
392
393 case Builtins::kConvert:
394 return replaceConvert(F, FI.getParameter(0).is_signed,
395 FI.getReturnType().is_signed);
396
alan-baker4986eff2020-10-29 13:38:00 -0400397 // OpenCL 2.0 explicit atomics have different default scopes and semantics
398 // than legacy atomic functions.
399 case Builtins::kAtomicLoad:
400 case Builtins::kAtomicLoadExplicit:
401 return replaceAtomicLoad(F);
402 case Builtins::kAtomicStore:
403 case Builtins::kAtomicStoreExplicit:
404 return replaceExplicitAtomics(F, spv::OpAtomicStore,
405 spv::MemorySemanticsReleaseMask);
406 case Builtins::kAtomicExchange:
407 case Builtins::kAtomicExchangeExplicit:
408 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
409 case Builtins::kAtomicFetchAdd:
410 case Builtins::kAtomicFetchAddExplicit:
411 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
412 case Builtins::kAtomicFetchSub:
413 case Builtins::kAtomicFetchSubExplicit:
414 return replaceExplicitAtomics(F, spv::OpAtomicISub);
415 case Builtins::kAtomicFetchOr:
416 case Builtins::kAtomicFetchOrExplicit:
417 return replaceExplicitAtomics(F, spv::OpAtomicOr);
418 case Builtins::kAtomicFetchXor:
419 case Builtins::kAtomicFetchXorExplicit:
420 return replaceExplicitAtomics(F, spv::OpAtomicXor);
421 case Builtins::kAtomicFetchAnd:
422 case Builtins::kAtomicFetchAndExplicit:
423 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
424 case Builtins::kAtomicFetchMin:
425 case Builtins::kAtomicFetchMinExplicit:
426 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
427 ? spv::OpAtomicSMin
428 : spv::OpAtomicUMin);
429 case Builtins::kAtomicFetchMax:
430 case Builtins::kAtomicFetchMaxExplicit:
431 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
432 ? spv::OpAtomicSMax
433 : spv::OpAtomicUMax);
434 // Weak compare exchange is generated as strong compare exchange.
435 case Builtins::kAtomicCompareExchangeWeak:
436 case Builtins::kAtomicCompareExchangeWeakExplicit:
437 case Builtins::kAtomicCompareExchangeStrong:
438 case Builtins::kAtomicCompareExchangeStrongExplicit:
439 return replaceAtomicCompareExchange(F);
440
441 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500442 case Builtins::kAtomicInc:
443 return replaceAtomics(F, spv::OpAtomicIIncrement);
444 case Builtins::kAtomicDec:
445 return replaceAtomics(F, spv::OpAtomicIDecrement);
446 case Builtins::kAtomicCmpxchg:
447 return replaceAtomics(F, spv::OpAtomicCompareExchange);
448 case Builtins::kAtomicAdd:
449 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
450 case Builtins::kAtomicSub:
451 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
452 case Builtins::kAtomicXchg:
453 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
454 case Builtins::kAtomicMin:
455 return replaceAtomics(F, FI.getParameter(0).is_signed
456 ? llvm::AtomicRMWInst::Min
457 : llvm::AtomicRMWInst::UMin);
458 case Builtins::kAtomicMax:
459 return replaceAtomics(F, FI.getParameter(0).is_signed
460 ? llvm::AtomicRMWInst::Max
461 : llvm::AtomicRMWInst::UMax);
462 case Builtins::kAtomicAnd:
463 return replaceAtomics(F, llvm::AtomicRMWInst::And);
464 case Builtins::kAtomicOr:
465 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
466 case Builtins::kAtomicXor:
467 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
468
469 case Builtins::kCross:
470 if (FI.getParameter(0).vector_size == 4) {
471 return replaceCross(F);
472 }
473 break;
474
475 case Builtins::kFract:
476 if (FI.getParameterCount()) {
477 return replaceFract(F, FI.getParameter(0).vector_size);
478 }
479 break;
480
481 case Builtins::kMadHi:
482 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
483 case Builtins::kMulHi:
484 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
485
alan-baker6b9d1ee2020-11-03 23:11:32 -0500486 case Builtins::kMadSat:
487 return replaceMadSat(F, FI.getParameter(0).is_signed);
488
SJW2c317da2020-03-23 07:39:13 -0500489 case Builtins::kMad:
490 case Builtins::kMad24:
491 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
492 true);
493 case Builtins::kMul24:
494 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
495 false);
496
497 case Builtins::kSelect:
498 return replaceSelect(F);
499
500 case Builtins::kBitselect:
501 return replaceBitSelect(F);
502
503 case Builtins::kVload:
504 return replaceVload(F);
505
506 case Builtins::kVloadaHalf:
Romaric Jodin71fdb322022-05-03 17:01:10 +0200507 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size,
508 true);
SJW2c317da2020-03-23 07:39:13 -0500509 case Builtins::kVloadHalf:
Romaric Jodin71fdb322022-05-03 17:01:10 +0200510 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size,
511 false);
SJW2c317da2020-03-23 07:39:13 -0500512
513 case Builtins::kVstore:
514 return replaceVstore(F);
515
SJW2c317da2020-03-23 07:39:13 -0500516 case Builtins::kVstoreaHalf:
Romaric Jodin71fdb322022-05-03 17:01:10 +0200517 return replaceVstoreHalf(F, FI.getParameter(0).vector_size, true);
518 case Builtins::kVstoreHalf:
519 return replaceVstoreHalf(F, FI.getParameter(0).vector_size, false);
SJW2c317da2020-03-23 07:39:13 -0500520
521 case Builtins::kSmoothstep: {
522 int vec_size = FI.getLastParameter().vector_size;
523 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500524 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500525 }
526 break;
527 }
528 case Builtins::kStep: {
529 int vec_size = FI.getLastParameter().vector_size;
530 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500531 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500532 }
533 break;
534 }
535
536 case Builtins::kSignbit:
537 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
538
alan-baker3f1bf492020-11-05 09:07:36 -0500539 case Builtins::kSubSat:
540 return replaceAddSubSat(F, FI.getParameter(0).is_signed, false);
541
SJW2c317da2020-03-23 07:39:13 -0500542 case Builtins::kReadImageh:
543 return replaceHalfReadImage(F);
544 case Builtins::kReadImagef:
545 case Builtins::kReadImagei:
546 case Builtins::kReadImageui: {
547 if (FI.getParameter(1).isSampler() &&
548 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
549 return replaceSampledReadImageWithIntCoords(F);
550 }
551 break;
552 }
553
554 case Builtins::kWriteImageh:
555 return replaceHalfWriteImage(F);
556
Kévin Petit1cb45112020-04-27 18:55:48 +0100557 case Builtins::kPrefetch:
558 return replacePrefetch(F);
559
rjodinchr791203f2021-10-07 20:42:41 +0200560 // Asynchronous copies
561 case Builtins::kAsyncWorkGroupCopy:
alan-baker67d639b2022-05-09 11:23:31 -0400562 return replaceAsyncWorkGroupCopy(
563 F, FI.getParameter(0).DataType(F.getParent()->getContext()));
rjodinchr791203f2021-10-07 20:42:41 +0200564 case Builtins::kAsyncWorkGroupStridedCopy:
alan-baker67d639b2022-05-09 11:23:31 -0400565 return replaceAsyncWorkGroupStridedCopy(
566 F, FI.getParameter(0).DataType(F.getParent()->getContext()));
rjodinchr791203f2021-10-07 20:42:41 +0200567 case Builtins::kWaitGroupEvents:
568 return replaceWaitGroupEvents(F);
569
SJW2c317da2020-03-23 07:39:13 -0500570 default:
571 break;
572 }
573
574 return false;
575}
576
alan-baker6b9d1ee2020-11-03 23:11:32 -0500577Type *ReplaceOpenCLBuiltinPass::GetPairStruct(Type *type) {
578 auto iter = PairStructMap.find(type);
579 if (iter != PairStructMap.end())
580 return iter->second;
581
582 auto new_struct = StructType::get(type->getContext(), {type, type});
583 PairStructMap[type] = new_struct;
584 return new_struct;
585}
586
Romaric Jodinc507f312022-04-08 19:09:45 +0200587Value *ReplaceOpenCLBuiltinPass::InsertOpMulExtended(Instruction *InsertPoint,
588 Value *a, Value *b,
alan-baker57ce1c22022-04-26 19:10:44 -0400589 bool IsSigned,
590 bool Int64) {
Romaric Jodinc507f312022-04-08 19:09:45 +0200591
592 Type *Ty = a->getType();
593 Type *RetTy = GetPairStruct(a->getType());
594 assert(Ty == b->getType());
595
596 if (!Option::HackMulExtended()) {
597 spv::Op opcode = IsSigned ? spv::OpSMulExtended : spv::OpUMulExtended;
598
599 return clspv::InsertSPIRVOp(InsertPoint, opcode, {Attribute::ReadNone},
600 RetTy, {a, b});
601 }
602
603 unsigned int ScalarSizeInBits = Ty->getScalarSizeInBits();
604 bool IsVector = Ty->isVectorTy();
605
606 IRBuilder<> Builder(InsertPoint);
607
608 if (ScalarSizeInBits < 32 || (ScalarSizeInBits == 32 && Int64)) {
609 /*
610 * {mul_lo, mul_hi} = OpMulExtended(a, b, IsSigned) {
611 * S = SizeInBits(a)
612 * a_ext = ext2S(a, IsSigned)
613 * b_ext = ext2S(b, IsSigned)
614 * mul = a_ext * b_ext
615 * mul_lo = truncS(mul)
616 * mul_hi = truncS(mul >> S)
617 * return {mul_lo, mul_hi}
618 * }
619 */
620 Type *TyTimes2 =
621 Ty->getIntNTy(InsertPoint->getContext(), ScalarSizeInBits * 2);
622 if (IsVector) {
623 TyTimes2 = VectorType::get(TyTimes2, dyn_cast<VectorType>(Ty));
624 }
625 Value *aExtended, *bExtended;
626 if (IsSigned) {
627 aExtended = Builder.CreateSExt(a, TyTimes2);
628 bExtended = Builder.CreateSExt(b, TyTimes2);
629 } else {
630 aExtended = Builder.CreateZExt(a, TyTimes2);
631 bExtended = Builder.CreateZExt(b, TyTimes2);
632 }
633 auto mul = Builder.CreateMul(aExtended, bExtended);
634 auto mul_lo = Builder.CreateTrunc(mul, Ty);
635 auto mul_hi =
636 Builder.CreateTrunc(Builder.CreateLShr(mul, ScalarSizeInBits), Ty);
637
638 return Builder.CreateInsertValue(
639 Builder.CreateInsertValue(UndefValue::get(RetTy), mul_lo, {0}), mul_hi,
640 {1});
641 } else if (ScalarSizeInBits == 64 || (ScalarSizeInBits == 32 && !Int64)) {
642 /*
643 * {mul_lo, mul_hi} = OpMulExtended(a, b, IsSigned) {
644 * S = SizeInBits(a)
645 * hS = S / 2
646 * if (IsSigned) {
647 * res_neg = (a > 0) ^ (b > 0) = (a ^ b) < 0
648 * a = abs(a)
649 * b = abs(b)
650 * }
651 * a0 = trunchS(a)
652 * a1 = trunchS(a >> hS)
653 * b0 = trunchS(b)
654 * b1 = trunchS(b >> hS)
655 * {a0b0_0, a0b0_1} = zextS(OpUMulExtended(a0, b0))
656 * {a1b0_0, a1b0_1} = zextS(OpUMulExtended(a1, b0))
657 * {a0b1_0, a0b1_1} = zextS(OpUMulExtended(a0, b1))
658 * {a1b1_0, a1b1_1} = zextS(OpUMulExtended(a1, b1))
659 *
660 * mul_lo_hi = a0b0_1 + a1b0_0 + a0b1_0
661 * carry_mul_lo_hi = mul_lo_hi >> hS
662 * mul_hi_lo = a1b1_0 + a1b0_1 + a0b1_1 + carry_mul_lo_hi
663 * mul_lo = a0b0_0 + mul_lo_hi << hS
664 * mul_hi = mul_hi_lo + a1b1_1 << hS
665 *
666 * if (IsSigned) {
667 * mul_lo_xor = mul_lo ^ -1
668 * {mul_lo_inv, carry} = OpIAddCarry(mul_lo_xor, 1)
669 * mul_hi_inv = mul_hi ^ -1 + carry
670 * mul_lo = res_neg ? mul_lo_inv : mul_lo
671 * mul_hi = res_neg ? mul_hi_inv : mul_hi
672 * }
673 * return {mul_lo, mul_hi}
674 * }
675 */
676 Type *TyDiv2 =
677 Ty->getIntNTy(InsertPoint->getContext(), ScalarSizeInBits / 2);
678 if (IsVector) {
679 TyDiv2 = VectorType::get(TyDiv2, dyn_cast<VectorType>(Ty));
680 }
681
682 Value *res_neg;
683 if (IsSigned) {
684 // We want to work with unsigned value.
685 // Convert everything to unsigned and remember the signed of the end
686 // result.
687 auto a_b_xor = Builder.CreateXor(a, b);
688 res_neg = Builder.CreateICmpSLT(a_b_xor, ConstantInt::get(Ty, 0, true));
689
690 auto F = InsertPoint->getFunction();
691 auto abs = Intrinsic::getDeclaration(F->getParent(), Intrinsic::abs, Ty);
692 a = Builder.CreateCall(abs, {a, Builder.getInt1(false)});
693 b = Builder.CreateCall(abs, {b, Builder.getInt1(false)});
694 }
695
696 auto a0 = Builder.CreateTrunc(a, TyDiv2);
697 auto a1 = Builder.CreateTrunc(Builder.CreateLShr(a, ScalarSizeInBits / 2),
698 TyDiv2);
699 auto b0 = Builder.CreateTrunc(b, TyDiv2);
700 auto b1 = Builder.CreateTrunc(Builder.CreateLShr(b, ScalarSizeInBits / 2),
701 TyDiv2);
702
703 auto a0b0 = InsertOpMulExtended(InsertPoint, a0, b0, false, true);
704 auto a1b0 = InsertOpMulExtended(InsertPoint, a1, b0, false, true);
705 auto a0b1 = InsertOpMulExtended(InsertPoint, a0, b1, false, true);
706 auto a1b1 = InsertOpMulExtended(InsertPoint, a1, b1, false, true);
707 auto a0b0_0 = Builder.CreateZExt(Builder.CreateExtractValue(a0b0, {0}), Ty);
708 auto a0b0_1 = Builder.CreateZExt(Builder.CreateExtractValue(a0b0, {1}), Ty);
709 auto a1b0_0 = Builder.CreateZExt(Builder.CreateExtractValue(a1b0, {0}), Ty);
710 auto a1b0_1 = Builder.CreateZExt(Builder.CreateExtractValue(a1b0, {1}), Ty);
711 auto a0b1_0 = Builder.CreateZExt(Builder.CreateExtractValue(a0b1, {0}), Ty);
712 auto a0b1_1 = Builder.CreateZExt(Builder.CreateExtractValue(a0b1, {1}), Ty);
713 auto a1b1_0 = Builder.CreateZExt(Builder.CreateExtractValue(a1b1, {0}), Ty);
714 auto a1b1_1 = Builder.CreateZExt(Builder.CreateExtractValue(a1b1, {1}), Ty);
715
716 auto mul_lo_hi =
717 Builder.CreateAdd(Builder.CreateAdd(a0b0_1, a1b0_0), a0b1_0);
718 auto carry_mul_lo_hi = Builder.CreateLShr(mul_lo_hi, ScalarSizeInBits / 2);
719 auto mul_hi_lo = Builder.CreateAdd(
720 Builder.CreateAdd(Builder.CreateAdd(a1b1_0, a1b0_1), a0b1_1),
721 carry_mul_lo_hi);
722 auto mul_lo = Builder.CreateAdd(
723 a0b0_0, Builder.CreateShl(mul_lo_hi, ScalarSizeInBits / 2));
724 auto mul_hi = Builder.CreateAdd(
725 mul_hi_lo, Builder.CreateShl(a1b1_1, ScalarSizeInBits / 2));
726
727 if (IsSigned) {
728 // Apply the sign that we got from the previous if statement setting
729 // res_neg.
730 auto mul_lo_xor =
731 Builder.CreateXor(mul_lo, Constant::getAllOnesValue(Ty));
732 auto mul_lo_xor_add =
733 InsertSPIRVOp(InsertPoint, spv::OpIAddCarry, {Attribute::ReadNone},
734 RetTy, {mul_lo_xor, ConstantInt::get(Ty, 1)});
735 auto mul_lo_inv = Builder.CreateExtractValue(mul_lo_xor_add, {0});
736 auto carry = Builder.CreateExtractValue(mul_lo_xor_add, {1});
737 auto mul_hi_inv = Builder.CreateAdd(
738 carry, Builder.CreateXor(mul_hi, Constant::getAllOnesValue(Ty)));
739 mul_lo = Builder.CreateSelect(res_neg, mul_lo_inv, mul_lo);
740 mul_hi = Builder.CreateSelect(res_neg, mul_hi_inv, mul_hi);
741 }
742
743 return Builder.CreateInsertValue(
744 Builder.CreateInsertValue(UndefValue::get(RetTy), mul_lo, {0}), mul_hi,
745 {1});
746 } else {
747 llvm_unreachable("Unexpected type for InsertOpMulExtended");
748 }
749}
750
rjodinchr791203f2021-10-07 20:42:41 +0200751bool ReplaceOpenCLBuiltinPass::replaceWaitGroupEvents(Function &F) {
752 /* Simple implementation for wait_group_events to avoid dealing with the event
753 * list:
754 *
755 * void wait_group_events(int num_events, event_t *event_list) {
756 * barrier(CLK_LOCAL_MEM_FENCE);
757 * }
758 *
759 */
760
761 enum {
762 CLK_LOCAL_MEM_FENCE = 0x01,
763 CLK_GLOBAL_MEM_FENCE = 0x02,
764 CLK_IMAGE_MEM_FENCE = 0x04
765 };
766
767 return replaceCallsWithValue(F, [](CallInst *CI) {
768 IRBuilder<> Builder(CI);
769
770 const auto ConstantScopeWorkgroup = Builder.getInt32(spv::ScopeWorkgroup);
771 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
772 Instruction::Shl, Builder.getInt32(CLK_LOCAL_MEM_FENCE),
773 Builder.getInt32(clz(spv::MemorySemanticsWorkgroupMemoryMask) -
774 clz(CLK_LOCAL_MEM_FENCE)),
775 "", CI);
776 auto MemorySemantics = BinaryOperator::Create(
777 Instruction::Or, MemorySemanticsWorkgroup,
778 ConstantInt::get(Builder.getInt32Ty(),
779 spv::MemorySemanticsAcquireReleaseMask),
780 "", CI);
781
782 return clspv::InsertSPIRVOp(
783 CI, spv::OpControlBarrier,
784 {Attribute::NoDuplicate, Attribute::Convergent}, Builder.getVoidTy(),
785 {ConstantScopeWorkgroup, ConstantScopeWorkgroup, MemorySemantics});
786 });
787}
788
789GlobalVariable *ReplaceOpenCLBuiltinPass::getOrCreateGlobalVariable(
790 Module &M, std::string VariableName,
791 AddressSpace::Type VariableAddressSpace) {
792 GlobalVariable *GV = M.getGlobalVariable(VariableName);
793 if (GV == nullptr) {
794 IntegerType *IT = IntegerType::get(M.getContext(), 32);
795 VectorType *VT = FixedVectorType::get(IT, 3);
796
797 GV = new GlobalVariable(M, VT, false, GlobalValue::ExternalLinkage, nullptr,
798 VariableName, nullptr,
799 GlobalValue::ThreadLocalMode::NotThreadLocal,
800 VariableAddressSpace);
801 GV->setInitializer(Constant::getNullValue(VT));
802 }
803 return GV;
804}
805
806Value *ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopies(
alan-baker67d639b2022-05-09 11:23:31 -0400807 Module &M, CallInst *CI, Value *Dst, Value *Src, Type *GenType,
808 Value *NumGentypes, Value *Stride, Value *Event) {
rjodinchr791203f2021-10-07 20:42:41 +0200809 /*
810 * event_t *async_work_group_strided_copy(T *dst, T *src, size_t num_gentypes,
811 * size_t stride, event_t event) {
812 * size_t start_id = ((get_local_id(2) * get_local_size(1))
813 * + get_local_id(1)) * get_local_size(0)
814 * + get_local_id(0);
815 * size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
816 * for (size_t it = start_id; it < num_gentypes; it += incr) {
817 * dst[it] = src[it * stride];
818 * }
819 * return event;
820 * }
821 */
822
823 /* BB:
824 * before
825 * async_work_group_strided_copy
826 * after
827 *
828 * ================================
829 *
830 * BB:
831 * before
832 * start_id = f(get_local_ids, get_local_sizes)
833 * incr = g(get_local_sizes)
834 * br CmpBB
835 *
836 * CmpBB:
837 * it = PHI(start_id, it)
838 * cmp = it < NumGentypes
839 * condBr cmp, LoopBB, ExitBB
840 *
841 * LoopBB:
842 * dstI = dst[it]
843 * srcI = src[it * stride]
844 * OpCopyMemory dstI, srcI
845 * it += incr
846 * br CmpBB
847 *
848 * ExitBB:
849 * after
850 */
851
852 IRBuilder<> Builder(CI);
853
854 auto Cst0 = Builder.getInt32(0);
855 auto Cst1 = Builder.getInt32(1);
856 auto Cst2 = Builder.getInt32(2);
857
alan-baker67d639b2022-05-09 11:23:31 -0400858 auto *IT = IntegerType::get(M.getContext(), 32);
859 auto *VT = FixedVectorType::get(IT, 3);
860
rjodinchr791203f2021-10-07 20:42:41 +0200861 // get_local_id({0, 1, 2});
862 GlobalVariable *GVId =
863 getOrCreateGlobalVariable(M, clspv::LocalInvocationIdVariableName(),
864 clspv::LocalInvocationIdAddressSpace());
alan-baker67d639b2022-05-09 11:23:31 -0400865 Value *GEP0 = Builder.CreateGEP(VT, GVId, {Cst0, Cst0});
866 Value *LocalId0 = Builder.CreateLoad(IT, GEP0);
867 Value *GEP1 = Builder.CreateGEP(VT, GVId, {Cst0, Cst1});
868 Value *LocalId1 = Builder.CreateLoad(IT, GEP1);
869 Value *GEP2 = Builder.CreateGEP(VT, GVId, {Cst0, Cst2});
870 Value *LocalId2 = Builder.CreateLoad(IT, GEP2);
rjodinchr791203f2021-10-07 20:42:41 +0200871
872 // get_local_size({0, 1, 2});
873 GlobalVariable *GVSize =
874 getOrCreateGlobalVariable(M, clspv::WorkgroupSizeVariableName(),
875 clspv::WorkgroupSizeAddressSpace());
alan-baker67d639b2022-05-09 11:23:31 -0400876 auto LocalSize = Builder.CreateLoad(VT, GVSize);
rjodinchr791203f2021-10-07 20:42:41 +0200877 auto LocalSize0 = Builder.CreateExtractElement(LocalSize, Cst0);
878 auto LocalSize1 = Builder.CreateExtractElement(LocalSize, Cst1);
879 auto LocalSize2 = Builder.CreateExtractElement(LocalSize, Cst2);
880
881 // size_t start_id = ((get_local_id(2) * get_local_size(1))
882 // + get_local_id(1)) * get_local_size(0)
883 // + get_local_id(0);
884 auto tmp0 = Builder.CreateMul(LocalId2, LocalSize1);
885 auto tmp1 = Builder.CreateAdd(tmp0, LocalId1);
886 auto tmp2 = Builder.CreateMul(tmp1, LocalSize0);
887 auto StartId = Builder.CreateAdd(tmp2, LocalId0);
888
889 // size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
890 auto tmp3 = Builder.CreateMul(LocalSize0, LocalSize1);
891 auto Incr = Builder.CreateMul(tmp3, LocalSize2);
892
893 // Create BasicBlocks
894 auto BB = CI->getParent();
895 auto CmpBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
896 auto LoopBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
897 auto ExitBB = SplitBlock(BB, CI);
898
899 // BB
900 auto BrCmpBB = BranchInst::Create(CmpBB);
901 ReplaceInstWithInst(BB->getTerminator(), BrCmpBB);
902
903 // CmpBB
904 Builder.SetInsertPoint(CmpBB);
905 auto PHIIterator = Builder.CreatePHI(Builder.getInt32Ty(), 2);
906 auto Cmp = Builder.CreateCmp(CmpInst::ICMP_ULT, PHIIterator, NumGentypes);
907 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
908
909 // LoopBB
910 Builder.SetInsertPoint(LoopBB);
911
912 // default values for non-strided copies
913 Value *SrcIterator = PHIIterator;
914 Value *DstIterator = PHIIterator;
915 if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
916 clspv::AddressSpace::Global)) {
917 // async_work_group_strided_copy local to global case
918 DstIterator = Builder.CreateMul(PHIIterator, Stride);
919 } else if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
920 clspv::AddressSpace::Local)) {
921 // async_work_group_strided_copy global to local case
922 SrcIterator = Builder.CreateMul(PHIIterator, Stride);
923 }
alan-baker67d639b2022-05-09 11:23:31 -0400924 auto DstI = Builder.CreateGEP(GenType, Dst, DstIterator);
925 auto SrcI = Builder.CreateGEP(GenType, Src, SrcIterator);
rjodinchr791203f2021-10-07 20:42:41 +0200926 auto NewIterator = Builder.CreateAdd(PHIIterator, Incr);
927 auto Br = Builder.CreateBr(CmpBB);
928 clspv::InsertSPIRVOp(Br, spv::OpCopyMemory, {}, Builder.getVoidTy(),
929 {DstI, SrcI});
930
931 // Set PHIIterator for CmpBB now that we have NewIterator
932 PHIIterator->addIncoming(StartId, BB);
933 PHIIterator->addIncoming(NewIterator, LoopBB);
934
935 return Event;
936}
937
alan-baker67d639b2022-05-09 11:23:31 -0400938bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopy(Function &F, Type *ty) {
939 return replaceCallsWithValue(F, [&F, ty, this](CallInst *CI) {
rjodinchr791203f2021-10-07 20:42:41 +0200940 Module &M = *F.getParent();
941
942 auto Dst = CI->getOperand(0);
943 auto Src = CI->getOperand(1);
944 auto NumGentypes = CI->getOperand(2);
945 auto Event = CI->getOperand(3);
946
alan-baker67d639b2022-05-09 11:23:31 -0400947 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, ty, NumGentypes,
948 nullptr, Event);
rjodinchr791203f2021-10-07 20:42:41 +0200949 });
950}
951
alan-baker67d639b2022-05-09 11:23:31 -0400952bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupStridedCopy(Function &F, Type *ty) {
953 return replaceCallsWithValue(F, [&F, ty, this](CallInst *CI) {
rjodinchr791203f2021-10-07 20:42:41 +0200954 Module &M = *F.getParent();
955
956 auto Dst = CI->getOperand(0);
957 auto Src = CI->getOperand(1);
958 auto NumGentypes = CI->getOperand(2);
959 auto Stride = CI->getOperand(3);
960 auto Event = CI->getOperand(4);
961
alan-baker67d639b2022-05-09 11:23:31 -0400962 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, ty, NumGentypes, Stride,
rjodinchr791203f2021-10-07 20:42:41 +0200963 Event);
964 });
965}
966
SJW2c317da2020-03-23 07:39:13 -0500967bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
968 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400969 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100970}
971
SJW2c317da2020-03-23 07:39:13 -0500972bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
973 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100974 auto XValue = CI->getOperand(0);
975 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100976
Kévin Petite8edce32019-04-10 14:23:32 +0100977 IRBuilder<> Builder(CI);
978 auto XmY = Builder.CreateSub(XValue, YValue);
979 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100980
SJW2c317da2020-03-23 07:39:13 -0500981 Value *Cmp = nullptr;
982 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100983 Cmp = Builder.CreateICmpSGT(YValue, XValue);
984 } else {
985 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100986 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100987
Kévin Petite8edce32019-04-10 14:23:32 +0100988 return Builder.CreateSelect(Cmp, YmX, XmY);
989 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100990}
991
SJW2c317da2020-03-23 07:39:13 -0500992bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
alan-baker5f2e88e2020-12-07 15:24:04 -0500993 return replaceCallsWithValue(F, [&F](CallInst *Call) {
994 const auto x = Call->getArgOperand(0);
995 const auto y = Call->getArgOperand(1);
996 auto intrinsic = Intrinsic::getDeclaration(
997 F.getParent(), Intrinsic::copysign, Call->getType());
998 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {x, y}, "",
999 Call);
Kévin Petite8edce32019-04-10 14:23:32 +01001000 });
Kévin Petit8c1be282019-04-02 19:34:25 +01001001}
1002
SJW2c317da2020-03-23 07:39:13 -05001003bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
1004 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001005 // Recip has one arg.
1006 auto Arg = CI->getOperand(0);
1007 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
1008 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
1009 });
David Neto22f144c2017-06-12 14:26:21 -04001010}
1011
SJW2c317da2020-03-23 07:39:13 -05001012bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
1013 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001014 auto Op0 = CI->getOperand(0);
1015 auto Op1 = CI->getOperand(1);
1016 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
1017 });
David Neto22f144c2017-06-12 14:26:21 -04001018}
1019
SJW2c317da2020-03-23 07:39:13 -05001020bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
1021 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +01001022 auto Op0 = CI->getOperand(0);
1023 auto Op1 = CI->getOperand(1);
1024
SJW2c317da2020-03-23 07:39:13 -05001025 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +01001026 if (Op0->getType()->isVectorTy()) {
1027 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
1028 CI->getType(), {Op0, Op1});
1029 } else {
1030 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
1031 }
1032
1033 return V;
1034 });
1035}
1036
SJW2c317da2020-03-23 07:39:13 -05001037bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -05001038 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001039 // convert to natural
1040 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001041 std::string NewFName = basename.substr(0, slen);
1042 NewFName =
1043 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001044
SJW2c317da2020-03-23 07:39:13 -05001045 Module &M = *F.getParent();
1046 return replaceCallsWithValue(F, [&](CallInst *CI) {
1047 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1048
1049 auto Arg = CI->getOperand(0);
1050
1051 // Constant of the natural log of 10 (ln(10)).
1052 const double Ln10 =
1053 2.302585092994045684017991454684364207601101488628772976033;
1054
1055 auto Mul = BinaryOperator::Create(
1056 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
1057
1058 return CallInst::Create(NewF, Mul, "", CI);
1059 });
David Neto22f144c2017-06-12 14:26:21 -04001060}
1061
SJW2c317da2020-03-23 07:39:13 -05001062bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +01001063 // OpenCL fmod(x,y) is x - y * trunc(x/y)
1064 // The sign for a non-zero result is taken from x.
1065 // (Try an example.)
1066 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -05001067 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +01001068 auto Op0 = CI->getOperand(0);
1069 auto Op1 = CI->getOperand(1);
1070 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
1071 });
1072}
1073
SJW2c317da2020-03-23 07:39:13 -05001074bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -05001075 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001076 // convert to natural
1077 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001078 std::string NewFName = basename.substr(0, slen);
1079 NewFName =
1080 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001081
SJW2c317da2020-03-23 07:39:13 -05001082 Module &M = *F.getParent();
1083 return replaceCallsWithValue(F, [&](CallInst *CI) {
1084 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1085
1086 auto Arg = CI->getOperand(0);
1087
1088 // Constant of the reciprocal of the natural log of 10 (ln(10)).
1089 const double Ln10 =
1090 0.434294481903251827651128918916605082294397005803666566114;
1091
1092 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
1093
1094 return BinaryOperator::Create(Instruction::FMul,
1095 ConstantFP::get(Arg->getType(), Ln10), NewCI,
1096 "", CI);
1097 });
David Neto22f144c2017-06-12 14:26:21 -04001098}
1099
gnl21636e7992020-09-09 16:08:16 +01001100bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
1101 // convert to natural
alan-baker8b968112020-12-15 15:53:29 -05001102 return replaceCallsWithValue(F, [&F](CallInst *CI) {
gnl21636e7992020-09-09 16:08:16 +01001103 auto Arg = CI->getOperand(0);
1104
1105 auto ArgP1 = BinaryOperator::Create(
1106 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
1107
alan-baker8b968112020-12-15 15:53:29 -05001108 auto log =
1109 Intrinsic::getDeclaration(F.getParent(), Intrinsic::log, CI->getType());
1110 return CallInst::Create(log, ArgP1, "", CI);
gnl21636e7992020-09-09 16:08:16 +01001111 });
1112}
1113
alan-baker12d2c182020-07-20 08:22:42 -04001114bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -04001115
alan-bakerf6bc8252020-09-23 14:58:55 -04001116 enum {
1117 CLK_LOCAL_MEM_FENCE = 0x01,
1118 CLK_GLOBAL_MEM_FENCE = 0x02,
1119 CLK_IMAGE_MEM_FENCE = 0x04
1120 };
David Neto22f144c2017-06-12 14:26:21 -04001121
alan-baker12d2c182020-07-20 08:22:42 -04001122 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +01001123 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001124
Kévin Petitc4643922019-06-17 19:32:05 +01001125 // We need to map the OpenCL constants to the SPIR-V equivalents.
1126 const auto LocalMemFence =
1127 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1128 const auto GlobalMemFence =
1129 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001130 const auto ImageMemFence =
1131 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -04001132 const auto ConstantAcquireRelease = ConstantInt::get(
1133 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +01001134 const auto ConstantScopeDevice =
1135 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
1136 const auto ConstantScopeWorkgroup =
1137 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -04001138 const auto ConstantScopeSubgroup =
1139 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -04001140
Kévin Petitc4643922019-06-17 19:32:05 +01001141 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1142 const auto LocalMemFenceMask =
1143 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1144 const auto WorkgroupShiftAmount =
1145 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1146 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1147 Instruction::Shl, LocalMemFenceMask,
1148 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001149
Kévin Petitc4643922019-06-17 19:32:05 +01001150 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1151 const auto GlobalMemFenceMask =
1152 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1153 const auto UniformShiftAmount =
1154 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1155 const auto MemorySemanticsUniform = BinaryOperator::Create(
1156 Instruction::Shl, GlobalMemFenceMask,
1157 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001158
alan-bakerf6bc8252020-09-23 14:58:55 -04001159 // OpenCL 2.0
1160 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1161 const auto ImageMemFenceMask =
1162 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1163 const auto ImageShiftAmount =
1164 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1165 const auto MemorySemanticsImage = BinaryOperator::Create(
1166 Instruction::Shl, ImageMemFenceMask,
1167 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1168
Kévin Petitc4643922019-06-17 19:32:05 +01001169 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -04001170 // MemorySemanticsSequentiallyConsistentMask.
1171 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +01001172 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -04001173 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -04001174 auto MemorySemantics2 = BinaryOperator::Create(
1175 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
1176 auto MemorySemantics = BinaryOperator::Create(
1177 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001178
alan-baker12d2c182020-07-20 08:22:42 -04001179 // If the memory scope is not specified explicitly, it is either Subgroup
1180 // or Workgroup depending on the type of barrier.
1181 Value *MemoryScope =
1182 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
1183 if (CI->data_operands_size() > 1) {
1184 enum {
1185 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
1186 CL_MEMORY_SCOPE_DEVICE = 0x2,
1187 CL_MEMORY_SCOPE_SUBGROUP = 0x4
1188 };
1189 // The call was given an explicit memory scope.
1190 const auto MemoryScopeSubgroup =
1191 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
1192 const auto MemoryScopeDevice =
1193 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -04001194
alan-baker12d2c182020-07-20 08:22:42 -04001195 auto Cmp =
1196 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1197 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
1198 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
1199 ConstantScopeWorkgroup, "", CI);
1200 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1201 MemoryScopeDevice, CI->getOperand(1), "", CI);
1202 MemoryScope =
1203 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
1204 }
1205
1206 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
1207 // the type of barrier;
1208 const auto ExecutionScope =
1209 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -04001210
Kévin Petitc4643922019-06-17 19:32:05 +01001211 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -04001212 {Attribute::NoDuplicate, Attribute::Convergent},
1213 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +01001214 {ExecutionScope, MemoryScope, MemorySemantics});
1215 });
David Neto22f144c2017-06-12 14:26:21 -04001216}
1217
alan-baker36309f92021-02-05 12:28:03 -05001218bool ReplaceOpenCLBuiltinPass::replaceMemFence(
1219 Function &F, spv::MemorySemanticsMask semantics) {
David Neto22f144c2017-06-12 14:26:21 -04001220
SJW2c317da2020-03-23 07:39:13 -05001221 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -04001222 enum {
1223 CLK_LOCAL_MEM_FENCE = 0x01,
1224 CLK_GLOBAL_MEM_FENCE = 0x02,
1225 CLK_IMAGE_MEM_FENCE = 0x04,
1226 };
David Neto22f144c2017-06-12 14:26:21 -04001227
SJW2c317da2020-03-23 07:39:13 -05001228 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001229
SJW2c317da2020-03-23 07:39:13 -05001230 // We need to map the OpenCL constants to the SPIR-V equivalents.
1231 const auto LocalMemFence =
1232 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1233 const auto GlobalMemFence =
1234 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001235 const auto ImageMemFence =
1236 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -05001237 const auto ConstantMemorySemantics =
1238 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -04001239 const auto ConstantScopeWorkgroup =
1240 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -04001241
SJW2c317da2020-03-23 07:39:13 -05001242 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1243 const auto LocalMemFenceMask =
1244 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1245 const auto WorkgroupShiftAmount =
1246 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1247 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1248 Instruction::Shl, LocalMemFenceMask,
1249 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001250
SJW2c317da2020-03-23 07:39:13 -05001251 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1252 const auto GlobalMemFenceMask =
1253 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1254 const auto UniformShiftAmount =
1255 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1256 const auto MemorySemanticsUniform = BinaryOperator::Create(
1257 Instruction::Shl, GlobalMemFenceMask,
1258 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001259
alan-bakerf6bc8252020-09-23 14:58:55 -04001260 // OpenCL 2.0
1261 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1262 const auto ImageMemFenceMask =
1263 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1264 const auto ImageShiftAmount =
1265 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1266 const auto MemorySemanticsImage = BinaryOperator::Create(
1267 Instruction::Shl, ImageMemFenceMask,
1268 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1269
alan-baker36309f92021-02-05 12:28:03 -05001270 Value *MemOrder = ConstantMemorySemantics;
1271 Value *MemScope = ConstantScopeWorkgroup;
1272 IRBuilder<> builder(CI);
alan-baker5641f5c2021-10-15 09:16:04 -04001273 if (CI->arg_size() > 1) {
alan-baker36309f92021-02-05 12:28:03 -05001274 MemOrder = MemoryOrderSemantics(CI->getArgOperand(1), false, CI,
1275 semantics, false);
1276 MemScope = MemoryScope(CI->getArgOperand(2), false, CI);
1277 }
1278 // Join the storage semantics and the order semantics.
alan-bakerf6bc8252020-09-23 14:58:55 -04001279 auto MemorySemantics1 =
alan-baker36309f92021-02-05 12:28:03 -05001280 builder.CreateOr({MemorySemanticsWorkgroup, MemorySemanticsUniform});
1281 auto MemorySemantics2 = builder.CreateOr({MemorySemanticsImage, MemOrder});
1282 auto MemorySemantics =
1283 builder.CreateOr({MemorySemantics1, MemorySemantics2});
David Neto22f144c2017-06-12 14:26:21 -04001284
alan-baker3d905692020-10-28 14:02:37 -04001285 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
1286 {Attribute::Convergent}, CI->getType(),
alan-baker36309f92021-02-05 12:28:03 -05001287 {MemScope, MemorySemantics});
SJW2c317da2020-03-23 07:39:13 -05001288 });
David Neto22f144c2017-06-12 14:26:21 -04001289}
1290
Kévin Petit1cb45112020-04-27 18:55:48 +01001291bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
1292 bool Changed = false;
1293
1294 SmallVector<Instruction *, 4> ToRemoves;
1295
1296 // Find all calls to the function
1297 for (auto &U : F.uses()) {
1298 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1299 ToRemoves.push_back(CI);
1300 }
1301 }
1302
1303 Changed = !ToRemoves.empty();
1304
1305 // Delete them
1306 for (auto V : ToRemoves) {
1307 V->eraseFromParent();
1308 }
1309
1310 return Changed;
1311}
1312
SJW2c317da2020-03-23 07:39:13 -05001313bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
alan-baker3e217772020-11-07 17:29:40 -05001314 CmpInst::Predicate P) {
SJW2c317da2020-03-23 07:39:13 -05001315 return replaceCallsWithValue(F, [&](CallInst *CI) {
1316 // The predicate to use in the CmpInst.
1317 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -04001318
SJW2c317da2020-03-23 07:39:13 -05001319 auto Arg1 = CI->getOperand(0);
1320 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001321
SJW2c317da2020-03-23 07:39:13 -05001322 const auto Cmp =
1323 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
alan-baker3e217772020-11-07 17:29:40 -05001324 if (isa<VectorType>(F.getReturnType()))
1325 return CastInst::Create(Instruction::SExt, Cmp, CI->getType(), "", CI);
1326 return CastInst::Create(Instruction::ZExt, Cmp, CI->getType(), "", CI);
SJW2c317da2020-03-23 07:39:13 -05001327 });
David Neto22f144c2017-06-12 14:26:21 -04001328}
1329
SJW2c317da2020-03-23 07:39:13 -05001330bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
1331 spv::Op SPIRVOp,
1332 int32_t C) {
1333 Module &M = *F.getParent();
1334 return replaceCallsWithValue(F, [&](CallInst *CI) {
1335 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -04001336
SJW2c317da2020-03-23 07:39:13 -05001337 // The value to return for true.
1338 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -04001339
SJW2c317da2020-03-23 07:39:13 -05001340 // The value to return for false.
1341 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001342
SJW2c317da2020-03-23 07:39:13 -05001343 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001344 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001345 CorrespondingBoolTy =
1346 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1347 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001348 }
David Neto22f144c2017-06-12 14:26:21 -04001349
SJW2c317da2020-03-23 07:39:13 -05001350 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1351 CorrespondingBoolTy, {CI->getOperand(0)});
1352
1353 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1354 });
David Neto22f144c2017-06-12 14:26:21 -04001355}
1356
SJW2c317da2020-03-23 07:39:13 -05001357bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1358 Module &M = *F.getParent();
1359 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001360 auto &C = M.getContext();
1361 auto Val = CI->getOperand(0);
1362 auto ValTy = Val->getType();
1363 auto RetTy = CI->getType();
1364
1365 // Get a suitable integer type to represent the number
1366 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1367
1368 // Create Mask
1369 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001370 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001371 switch (ScalarSize) {
1372 case 16:
1373 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1374 break;
1375 case 32:
1376 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1377 break;
1378 case 64:
1379 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1380 break;
1381 default:
1382 llvm_unreachable("Unsupported floating-point type");
1383 }
1384
1385 IRBuilder<> Builder(CI);
1386
1387 // Bitcast to int
1388 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1389
1390 // Mask and compare
1391 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1392 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1393
1394 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001395 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001396 if (ValTy->isVectorTy()) {
1397 RetTrue = ConstantInt::getSigned(RetTy, -1);
1398 } else {
1399 RetTrue = ConstantInt::get(RetTy, 1);
1400 }
1401 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1402 });
1403}
1404
SJW2c317da2020-03-23 07:39:13 -05001405bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1406 Module &M = *F.getParent();
1407 return replaceCallsWithValue(F, [&](CallInst *CI) {
1408 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001409
SJW2c317da2020-03-23 07:39:13 -05001410 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001411
SJW2c317da2020-03-23 07:39:13 -05001412 // If the argument is a 32-bit int, just use a shift
1413 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1414 V = BinaryOperator::Create(Instruction::LShr, Arg,
1415 ConstantInt::get(Arg->getType(), 31), "", CI);
1416 } else {
1417 // The value for zero to compare against.
1418 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001419
SJW2c317da2020-03-23 07:39:13 -05001420 // The value to return for true.
1421 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001422
SJW2c317da2020-03-23 07:39:13 -05001423 // The value to return for false.
1424 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001425
SJW2c317da2020-03-23 07:39:13 -05001426 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1427 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001428
SJW2c317da2020-03-23 07:39:13 -05001429 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001430
SJW2c317da2020-03-23 07:39:13 -05001431 // If we have a function to call, call it!
1432 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001433
SJW2c317da2020-03-23 07:39:13 -05001434 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001435
SJW2c317da2020-03-23 07:39:13 -05001436 const auto NewCI = clspv::InsertSPIRVOp(
1437 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1438 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001439
SJW2c317da2020-03-23 07:39:13 -05001440 } else {
1441 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001442 }
1443
SJW2c317da2020-03-23 07:39:13 -05001444 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001445 }
SJW2c317da2020-03-23 07:39:13 -05001446 return V;
1447 });
David Neto22f144c2017-06-12 14:26:21 -04001448}
1449
SJW2c317da2020-03-23 07:39:13 -05001450bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1451 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1452 // Get arguments
1453 auto HiValue = CI->getOperand(0);
1454 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001455
SJW2c317da2020-03-23 07:39:13 -05001456 // Don't touch overloads that aren't in OpenCL C
1457 auto HiType = HiValue->getType();
1458 auto LoType = LoValue->getType();
1459
1460 if (HiType != LoType) {
1461 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001462 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001463
SJW2c317da2020-03-23 07:39:13 -05001464 if (!HiType->isIntOrIntVectorTy()) {
1465 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001466 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001467
SJW2c317da2020-03-23 07:39:13 -05001468 if (HiType->getScalarSizeInBits() * 2 !=
1469 CI->getType()->getScalarSizeInBits()) {
1470 return nullptr;
1471 }
1472
1473 if ((HiType->getScalarSizeInBits() != 8) &&
1474 (HiType->getScalarSizeInBits() != 16) &&
1475 (HiType->getScalarSizeInBits() != 32)) {
1476 return nullptr;
1477 }
1478
James Pricecf53df42020-04-20 14:41:24 -04001479 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001480 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001481 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1482 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001483 return nullptr;
1484 }
1485 }
1486
1487 // Convert both operands to the result type
1488 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1489 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1490
1491 // Shift high operand
1492 auto ShiftAmount =
1493 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1494 auto HiShifted =
1495 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1496
1497 // OR both results
1498 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1499 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001500}
1501
SJW2c317da2020-03-23 07:39:13 -05001502bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1503 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1504 // Get arguments
1505 auto SrcValue = CI->getOperand(0);
1506 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001507
SJW2c317da2020-03-23 07:39:13 -05001508 // Don't touch overloads that aren't in OpenCL C
1509 auto SrcType = SrcValue->getType();
1510 auto RotType = RotAmount->getType();
1511
1512 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1513 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001514 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001515
SJW2c317da2020-03-23 07:39:13 -05001516 if (!SrcType->isIntOrIntVectorTy()) {
1517 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001518 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001519
SJW2c317da2020-03-23 07:39:13 -05001520 if ((SrcType->getScalarSizeInBits() != 8) &&
1521 (SrcType->getScalarSizeInBits() != 16) &&
1522 (SrcType->getScalarSizeInBits() != 32) &&
1523 (SrcType->getScalarSizeInBits() != 64)) {
1524 return nullptr;
1525 }
1526
James Pricecf53df42020-04-20 14:41:24 -04001527 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001528 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001529 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1530 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001531 return nullptr;
1532 }
1533 }
1534
alan-bakerfd22ae12020-10-29 15:59:22 -04001535 // Replace with LLVM's funnel shift left intrinsic because it is more
1536 // generic than rotate.
1537 Function *intrinsic =
1538 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1539 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1540 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001541 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001542}
1543
SJW2c317da2020-03-23 07:39:13 -05001544bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1545 bool DstIsSigned) {
1546 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1547 Value *V = nullptr;
1548 // Get arguments
1549 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001550
SJW2c317da2020-03-23 07:39:13 -05001551 // Don't touch overloads that aren't in OpenCL C
1552 auto SrcType = SrcValue->getType();
1553 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001554
SJW2c317da2020-03-23 07:39:13 -05001555 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1556 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1557 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001558 }
1559
James Pricecf53df42020-04-20 14:41:24 -04001560 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001561 unsigned SrcNumElements =
1562 SrcVecType->getElementCount().getKnownMinValue();
1563 unsigned DstNumElements =
1564 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001565 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001566 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001567 }
1568
James Pricecf53df42020-04-20 14:41:24 -04001569 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1570 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1571 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001572 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001573 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001574 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001575
SJW2c317da2020-03-23 07:39:13 -05001576 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1577 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1578
1579 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1580 bool DstIsInt = DstType->isIntOrIntVectorTy();
1581
1582 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1583 // Unnecessary cast operation.
1584 V = SrcValue;
1585 } else if (SrcIsFloat && DstIsFloat) {
1586 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1587 } else if (SrcIsFloat && DstIsInt) {
1588 if (DstIsSigned) {
1589 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1590 } else {
1591 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1592 }
1593 } else if (SrcIsInt && DstIsFloat) {
1594 if (SrcIsSigned) {
1595 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1596 } else {
1597 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1598 }
1599 } else if (SrcIsInt && DstIsInt) {
1600 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1601 } else {
1602 // Not something we're supposed to handle, just move on
1603 }
1604
1605 return V;
1606 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001607}
1608
SJW2c317da2020-03-23 07:39:13 -05001609bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1610 bool is_mad) {
1611 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1612 Value *V = nullptr;
1613 // Get arguments
1614 auto AValue = CI->getOperand(0);
1615 auto BValue = CI->getOperand(1);
1616 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001617
SJW2c317da2020-03-23 07:39:13 -05001618 // Don't touch overloads that aren't in OpenCL C
1619 auto AType = AValue->getType();
1620 auto BType = BValue->getType();
1621 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001622
SJW2c317da2020-03-23 07:39:13 -05001623 if ((AType != BType) || (CI->getType() != AType) ||
1624 (is_mad && (AType != CType))) {
1625 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001626 }
1627
SJW2c317da2020-03-23 07:39:13 -05001628 if (!AType->isIntOrIntVectorTy()) {
1629 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001630 }
Kévin Petit8a560882019-03-21 15:24:34 +00001631
SJW2c317da2020-03-23 07:39:13 -05001632 if ((AType->getScalarSizeInBits() != 8) &&
1633 (AType->getScalarSizeInBits() != 16) &&
1634 (AType->getScalarSizeInBits() != 32) &&
1635 (AType->getScalarSizeInBits() != 64)) {
1636 return V;
1637 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001638
James Pricecf53df42020-04-20 14:41:24 -04001639 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001640 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001641 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1642 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001643 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001644 }
1645 }
1646
Romaric Jodinc507f312022-04-08 19:09:45 +02001647 auto Call = InsertOpMulExtended(CI, AValue, BValue, is_signed);
SJW2c317da2020-03-23 07:39:13 -05001648
1649 // Get the high part of the result
1650 unsigned Idxs[] = {1};
1651 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1652
1653 // If we're handling a mad_hi, add the third argument to the result
1654 if (is_mad) {
1655 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001656 }
1657
SJW2c317da2020-03-23 07:39:13 -05001658 return V;
1659 });
Kévin Petit8a560882019-03-21 15:24:34 +00001660}
1661
SJW2c317da2020-03-23 07:39:13 -05001662bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1663 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1664 // Get arguments
1665 auto FalseValue = CI->getOperand(0);
1666 auto TrueValue = CI->getOperand(1);
1667 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001668
SJW2c317da2020-03-23 07:39:13 -05001669 // Don't touch overloads that aren't in OpenCL C
1670 auto FalseType = FalseValue->getType();
1671 auto TrueType = TrueValue->getType();
1672 auto PredicateType = PredicateValue->getType();
1673
1674 if (FalseType != TrueType) {
1675 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001676 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001677
SJW2c317da2020-03-23 07:39:13 -05001678 if (!PredicateType->isIntOrIntVectorTy()) {
1679 return nullptr;
1680 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001681
SJW2c317da2020-03-23 07:39:13 -05001682 if (!FalseType->isIntOrIntVectorTy() &&
1683 !FalseType->getScalarType()->isFloatingPointTy()) {
1684 return nullptr;
1685 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001686
SJW2c317da2020-03-23 07:39:13 -05001687 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1688 return nullptr;
1689 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001690
SJW2c317da2020-03-23 07:39:13 -05001691 if (FalseType->getScalarSizeInBits() !=
1692 PredicateType->getScalarSizeInBits()) {
1693 return nullptr;
1694 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001695
James Pricecf53df42020-04-20 14:41:24 -04001696 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001697 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1698 if (NumElements != cast<VectorType>(PredicateType)
1699 ->getElementCount()
1700 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001701 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001702 }
1703
James Pricecf53df42020-04-20 14:41:24 -04001704 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1705 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001706 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001707 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001708 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001709
SJW2c317da2020-03-23 07:39:13 -05001710 // Create constant
1711 const auto ZeroValue = Constant::getNullValue(PredicateType);
1712
1713 // Scalar and vector are to be treated differently
1714 CmpInst::Predicate Pred;
1715 if (PredicateType->isVectorTy()) {
1716 Pred = CmpInst::ICMP_SLT;
1717 } else {
1718 Pred = CmpInst::ICMP_NE;
1719 }
1720
1721 // Create comparison instruction
1722 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1723 ZeroValue, "", CI);
1724
1725 // Create select
1726 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1727 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001728}
1729
SJW2c317da2020-03-23 07:39:13 -05001730bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1731 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1732 Value *V = nullptr;
1733 if (CI->getNumOperands() != 4) {
1734 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001735 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001736
SJW2c317da2020-03-23 07:39:13 -05001737 // Get arguments
1738 auto FalseValue = CI->getOperand(0);
1739 auto TrueValue = CI->getOperand(1);
1740 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001741
SJW2c317da2020-03-23 07:39:13 -05001742 // Don't touch overloads that aren't in OpenCL C
1743 auto FalseType = FalseValue->getType();
1744 auto TrueType = TrueValue->getType();
1745 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001746
SJW2c317da2020-03-23 07:39:13 -05001747 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1748 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001749 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001750
James Pricecf53df42020-04-20 14:41:24 -04001751 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001752 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1753 !TrueType->getScalarType()->isIntegerTy()) {
1754 return V;
1755 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001756 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001757 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1758 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001759 return V;
1760 }
1761 }
1762
1763 // Remember the type of the operands
1764 auto OpType = TrueType;
1765
1766 // The actual bit selection will always be done on an integer type,
1767 // declare it here
1768 Type *BitType;
1769
1770 // If the operands are float, then bitcast them to int
1771 if (OpType->getScalarType()->isFloatingPointTy()) {
1772
1773 // First create the new type
1774 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1775
1776 // Then bitcast all operands
1777 PredicateValue =
1778 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1779 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1780 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1781
1782 } else {
1783 // The operands have an integer type, use it directly
1784 BitType = OpType;
1785 }
1786
1787 // All the operands are now always integers
1788 // implement as (c & b) | (~c & a)
1789
1790 // Create our negated predicate value
1791 auto AllOnes = Constant::getAllOnesValue(BitType);
1792 auto NotPredicateValue = BinaryOperator::Create(
1793 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1794
1795 // Then put everything together
1796 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1797 FalseValue, "", CI);
1798 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1799 TrueValue, "", CI);
1800
1801 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1802
1803 // If we were dealing with a floating point type, we must bitcast
1804 // the result back to that
1805 if (OpType->getScalarType()->isFloatingPointTy()) {
1806 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1807 }
1808
1809 return V;
1810 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001811}
1812
SJW61531372020-06-09 07:31:08 -05001813bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001814 // convert to vector versions
1815 Module &M = *F.getParent();
1816 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1817 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1818 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001819
SJW2c317da2020-03-23 07:39:13 -05001820 // First figure out which function we're dealing with
1821 if (is_smooth) {
1822 ArgsToSplat.push_back(CI->getOperand(1));
1823 VectorArg = CI->getOperand(2);
1824 } else {
1825 VectorArg = CI->getOperand(1);
1826 }
1827
1828 // Splat arguments that need to be
1829 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001830 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001831
1832 for (auto arg : ArgsToSplat) {
1833 Value *NewVectorArg = UndefValue::get(VecType);
Marco Antognini7e338402021-03-15 12:48:37 +00001834 for (size_t i = 0; i < VecType->getElementCount().getKnownMinValue();
1835 i++) {
SJW2c317da2020-03-23 07:39:13 -05001836 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1837 NewVectorArg =
1838 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1839 }
1840 SplatArgs.push_back(NewVectorArg);
1841 }
1842
1843 // Replace the call with the vector/vector flavour
1844 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1845 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1846
SJW61531372020-06-09 07:31:08 -05001847 std::string NewFName = Builtins::GetMangledFunctionName(
1848 is_smooth ? "smoothstep" : "step", NewFType);
1849
SJW2c317da2020-03-23 07:39:13 -05001850 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1851
1852 SmallVector<Value *, 3> NewArgs;
1853 for (auto arg : SplatArgs) {
1854 NewArgs.push_back(arg);
1855 }
1856 NewArgs.push_back(VectorArg);
1857
1858 return CallInst::Create(NewF, NewArgs, "", CI);
1859 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001860}
1861
SJW2c317da2020-03-23 07:39:13 -05001862bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001863 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1864 auto Arg = CI->getOperand(0);
1865 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001866
SJW2c317da2020-03-23 07:39:13 -05001867 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001868
SJW2c317da2020-03-23 07:39:13 -05001869 return BinaryOperator::Create(Op, Bitcast,
1870 ConstantInt::get(CI->getType(), 31), "", CI);
1871 });
David Neto22f144c2017-06-12 14:26:21 -04001872}
1873
SJW2c317da2020-03-23 07:39:13 -05001874bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1875 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001876 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1877 // The multiply instruction to use.
1878 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001879
SJW2c317da2020-03-23 07:39:13 -05001880 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001881
SJW2c317da2020-03-23 07:39:13 -05001882 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1883 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001884
SJW2c317da2020-03-23 07:39:13 -05001885 if (is_mad) {
1886 // The add instruction to use.
1887 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001888
SJW2c317da2020-03-23 07:39:13 -05001889 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001890 }
David Neto22f144c2017-06-12 14:26:21 -04001891
SJW2c317da2020-03-23 07:39:13 -05001892 return V;
1893 });
David Neto22f144c2017-06-12 14:26:21 -04001894}
1895
SJW2c317da2020-03-23 07:39:13 -05001896bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001897 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1898 Value *V = nullptr;
1899 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001900
SJW2c317da2020-03-23 07:39:13 -05001901 auto data_type = data->getType();
1902 if (!data_type->isVectorTy())
1903 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001904
James Pricecf53df42020-04-20 14:41:24 -04001905 auto vec_data_type = cast<VectorType>(data_type);
1906
alan-baker5a8c3be2020-09-09 13:44:26 -04001907 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001908 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1909 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001910
SJW2c317da2020-03-23 07:39:13 -05001911 auto offset = CI->getOperand(1);
1912 auto ptr = CI->getOperand(2);
alan-bakerf795f392019-06-11 18:24:34 -04001913
SJW2c317da2020-03-23 07:39:13 -05001914 // Avoid pointer casts. Instead generate the correct number of stores
1915 // and rely on drivers to coalesce appropriately.
1916 IRBuilder<> builder(CI);
1917 auto elems_const = builder.getInt32(elems);
1918 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001919 for (size_t i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001920 auto idx = builder.getInt32(i);
1921 auto add = builder.CreateAdd(adjust, idx);
alan-baker67d639b2022-05-09 11:23:31 -04001922 auto gep = builder.CreateGEP(vec_data_type->getScalarType(), ptr, add);
SJW2c317da2020-03-23 07:39:13 -05001923 auto extract = builder.CreateExtractElement(data, i);
1924 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001925 }
SJW2c317da2020-03-23 07:39:13 -05001926 return V;
1927 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001928}
1929
SJW2c317da2020-03-23 07:39:13 -05001930bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001931 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1932 Value *V = nullptr;
1933 auto ret_type = F.getReturnType();
1934 if (!ret_type->isVectorTy())
1935 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001936
James Pricecf53df42020-04-20 14:41:24 -04001937 auto vec_ret_type = cast<VectorType>(ret_type);
1938
alan-baker5a8c3be2020-09-09 13:44:26 -04001939 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001940 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1941 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001942
SJW2c317da2020-03-23 07:39:13 -05001943 auto offset = CI->getOperand(0);
1944 auto ptr = CI->getOperand(1);
Derek Chowcfd368b2017-10-19 20:58:45 -07001945
SJW2c317da2020-03-23 07:39:13 -05001946 // Avoid pointer casts. Instead generate the correct number of loads
1947 // and rely on drivers to coalesce appropriately.
1948 IRBuilder<> builder(CI);
1949 auto elems_const = builder.getInt32(elems);
1950 V = UndefValue::get(ret_type);
1951 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001952 for (unsigned i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001953 auto idx = builder.getInt32(i);
1954 auto add = builder.CreateAdd(adjust, idx);
alan-baker67d639b2022-05-09 11:23:31 -04001955 auto gep = builder.CreateGEP(vec_ret_type->getScalarType(), ptr, add);
1956 auto load = builder.CreateLoad(vec_ret_type->getScalarType(), gep);
SJW2c317da2020-03-23 07:39:13 -05001957 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001958 }
SJW2c317da2020-03-23 07:39:13 -05001959 return V;
1960 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001961}
1962
SJW2c317da2020-03-23 07:39:13 -05001963bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1964 const std::string &name,
Romaric Jodin71fdb322022-05-03 17:01:10 +02001965 int vec_size, bool aligned) {
SJW2c317da2020-03-23 07:39:13 -05001966 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1967 if (!vec_size) {
Romaric Jodin71fdb322022-05-03 17:01:10 +02001968 // deduce vec_size from last characters of name (e.g. vload_half4)
1969 std::string half = "half";
1970 vec_size = std::atoi(
1971 name.substr(name.find(half) + half.size(), std::string::npos).c_str());
David Neto22f144c2017-06-12 14:26:21 -04001972 }
SJW2c317da2020-03-23 07:39:13 -05001973 switch (vec_size) {
1974 case 2:
1975 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02001976 case 3:
1977 if (!is_clspv_version) {
1978 return aligned ? replaceVloadaHalf3(F) : replaceVloadHalf3(F);
1979 }
1980 break;
SJW2c317da2020-03-23 07:39:13 -05001981 case 4:
1982 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02001983 case 8:
1984 if (!is_clspv_version) {
1985 return replaceVloadHalf8(F);
1986 }
1987 break;
1988 case 16:
1989 if (!is_clspv_version) {
1990 return replaceVloadHalf16(F);
1991 }
1992 break;
SJW2c317da2020-03-23 07:39:13 -05001993 case 0:
1994 if (!is_clspv_version) {
1995 return replaceVloadHalf(F);
1996 }
SJW2c317da2020-03-23 07:39:13 -05001997 break;
1998 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02001999 llvm_unreachable("Unsupported vload_half vector size");
David Neto22f144c2017-06-12 14:26:21 -04002000}
2001
SJW2c317da2020-03-23 07:39:13 -05002002bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
2003 Module &M = *F.getParent();
2004 return replaceCallsWithValue(F, [&](CallInst *CI) {
2005 // The index argument from vload_half.
2006 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002007
SJW2c317da2020-03-23 07:39:13 -05002008 // The pointer argument from vload_half.
2009 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002010
SJW2c317da2020-03-23 07:39:13 -05002011 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002012 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05002013 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2014
2015 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002016 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05002017
2018 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2019
2020 Value *V = nullptr;
2021
alan-baker7efcaaa2020-05-06 19:33:27 -04002022 bool supports_16bit_storage = true;
2023 switch (Arg1->getType()->getPointerAddressSpace()) {
2024 case clspv::AddressSpace::Global:
2025 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2026 clspv::Option::StorageClass::kSSBO);
2027 break;
2028 case clspv::AddressSpace::Constant:
2029 if (clspv::Option::ConstantArgsInUniformBuffer())
2030 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2031 clspv::Option::StorageClass::kUBO);
2032 else
2033 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2034 clspv::Option::StorageClass::kSSBO);
2035 break;
2036 default:
2037 // Clspv will emit the Float16 capability if the half type is
2038 // encountered. That capability covers private and local addressspaces.
2039 break;
2040 }
2041
2042 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05002043 auto ShortTy = Type::getInt16Ty(M.getContext());
2044 auto ShortPointerTy =
2045 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
2046
2047 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002048 // TODO(#816): remove after final transition.
2049 Value *Cast = Arg1;
2050 if (Arg1->getType() != ShortPointerTy) {
2051 Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
2052 }
SJW2c317da2020-03-23 07:39:13 -05002053
2054 // Index into the correct address of the casted pointer.
2055 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2056
2057 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002058 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002059
2060 // ZExt the short -> int.
2061 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2062
2063 // Get our float2.
2064 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2065
2066 // Extract out the bottom element which is our float result.
2067 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
2068 } else {
2069 // Assume the pointer argument points to storage aligned to 32bits
2070 // or more.
2071 // TODO(dneto): Do more analysis to make sure this is true?
2072 //
2073 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2074 // with:
2075 //
2076 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2077 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2078 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2079 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2080 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2081 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2082 // x float> %converted, %index_is_odd32
2083
2084 auto IntPointerTy =
2085 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
2086
2087 // Cast the base pointer to int*.
2088 // In a valid call (according to assumptions), this should get
2089 // optimized away in the simplify GEP pass.
alan-bakerb8365b62022-07-18 21:59:45 -04002090 // TODO(#816): remove after final transition.
2091 Value *Cast = Arg1;
2092 if (Arg1->getType() != IntPointerTy) {
2093 Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2094 }
SJW2c317da2020-03-23 07:39:13 -05002095
2096 auto One = ConstantInt::get(IntTy, 1);
2097 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2098 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2099
2100 // Index into the correct address of the casted pointer.
2101 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2102
2103 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002104 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002105
2106 // Get our float2.
2107 auto Call = CallInst::Create(NewF, Load, "", CI);
2108
2109 // Extract out the float result, where the element number is
2110 // determined by whether the original index was even or odd.
2111 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2112 }
2113 return V;
2114 });
2115}
2116
2117bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
2118 Module &M = *F.getParent();
2119 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002120 // The index argument from vload_half.
2121 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002122
Kévin Petite8edce32019-04-10 14:23:32 +01002123 // The pointer argument from vload_half.
2124 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002125
Kévin Petite8edce32019-04-10 14:23:32 +01002126 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002127 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002128 auto NewPointerTy =
2129 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002130 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002131
Kévin Petite8edce32019-04-10 14:23:32 +01002132 // Cast the half* pointer to int*.
alan-bakerb8365b62022-07-18 21:59:45 -04002133 // TODO(#816): remove after final transition.
2134 Value *Cast = Arg1;
2135 if (Arg1->getType() != NewPointerTy) {
2136 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2137 }
David Neto22f144c2017-06-12 14:26:21 -04002138
Kévin Petite8edce32019-04-10 14:23:32 +01002139 // Index into the correct address of the casted pointer.
2140 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002141
Kévin Petite8edce32019-04-10 14:23:32 +01002142 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002143 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002144
Kévin Petite8edce32019-04-10 14:23:32 +01002145 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002146 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002147
Kévin Petite8edce32019-04-10 14:23:32 +01002148 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002149
Kévin Petite8edce32019-04-10 14:23:32 +01002150 // Get our float2.
2151 return CallInst::Create(NewF, Load, "", CI);
2152 });
David Neto22f144c2017-06-12 14:26:21 -04002153}
2154
Romaric Jodin71fdb322022-05-03 17:01:10 +02002155bool ReplaceOpenCLBuiltinPass::replaceVloadHalf3(Function &F) {
2156 Module &M = *F.getParent();
2157 return replaceCallsWithValue(F, [&](CallInst *CI) {
2158 // The index argument from vload_half.
2159 auto Arg0 = CI->getOperand(0);
2160
2161 // The pointer argument from vload_half.
2162 auto Arg1 = CI->getOperand(1);
2163
2164 auto IntTy = Type::getInt32Ty(M.getContext());
2165 auto ShortTy = Type::getInt16Ty(M.getContext());
2166 auto FloatTy = Type::getFloatTy(M.getContext());
2167 auto Float2Ty = FixedVectorType::get(FloatTy, 2);
2168 auto Float3Ty = FixedVectorType::get(FloatTy, 3);
2169 auto NewPointerTy =
2170 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
2171 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2172
2173 auto Int0 = ConstantInt::get(IntTy, 0);
2174 auto Int1 = ConstantInt::get(IntTy, 1);
2175 auto Int2 = ConstantInt::get(IntTy, 2);
2176
2177 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002178 // TODO(#816): remove after final transition.
2179 Value *Cast = Arg1;
2180 if (Arg1->getType() != NewPointerTy) {
2181 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2182 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002183
2184 // Load the first element
2185 auto Index0 = BinaryOperator::Create(
2186 Instruction::Add,
2187 BinaryOperator::Create(Instruction::Shl, Arg0, Int1, "", CI), Arg0, "",
2188 CI);
2189 auto GEP0 = GetElementPtrInst::Create(ShortTy, Cast, Index0, "", CI);
2190 auto Load0 = new LoadInst(ShortTy, GEP0, "", CI);
2191
2192 // Load the second element
2193 auto Index1 =
2194 BinaryOperator::Create(Instruction::Add, Index0, Int1, "", CI);
2195 auto GEP1 = GetElementPtrInst::Create(ShortTy, Cast, Index1, "", CI);
2196 auto Load1 = new LoadInst(ShortTy, GEP1, "", CI);
2197
2198 // Load the third element
2199 auto Index2 =
2200 BinaryOperator::Create(Instruction::Add, Index1, Int1, "", CI);
2201 auto GEP2 = GetElementPtrInst::Create(ShortTy, Cast, Index2, "", CI);
2202 auto Load2 = new LoadInst(ShortTy, GEP2, "", CI);
2203
2204 // Extend each short to int.
2205 auto X0 = CastInst::Create(Instruction::ZExt, Load0, IntTy, "", CI);
2206 auto X1 = CastInst::Create(Instruction::ZExt, Load1, IntTy, "", CI);
2207 auto X2 = CastInst::Create(Instruction::ZExt, Load2, IntTy, "", CI);
2208
2209 // Our intrinsic to unpack a float2 from an int.
2210 auto SPIRVIntrinsic = clspv::UnpackFunction();
2211
2212 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2213
2214 // Convert int to float2 and extract the uniq meaningful float
2215 auto Y0 = ExtractElementInst::Create(CallInst::Create(NewF, X0, "", CI),
2216 Int0, "", CI);
2217 auto Y1 = ExtractElementInst::Create(CallInst::Create(NewF, X1, "", CI),
2218 Int0, "", CI);
2219 auto Y2 = ExtractElementInst::Create(CallInst::Create(NewF, X2, "", CI),
2220 Int0, "", CI);
2221
2222 // Create the final float3 to be returned
2223 auto Combine =
2224 InsertElementInst::Create(UndefValue::get(Float3Ty), Y0, Int0, "", CI);
2225 Combine = InsertElementInst::Create(Combine, Y1, Int1, "", CI);
2226 Combine = InsertElementInst::Create(Combine, Y2, Int2, "", CI);
2227
2228 return Combine;
2229 });
2230}
2231
2232bool ReplaceOpenCLBuiltinPass::replaceVloadaHalf3(Function &F) {
2233 Module &M = *F.getParent();
2234 return replaceCallsWithValue(F, [&](CallInst *CI) {
2235 // The index argument from vload_half.
2236 auto Arg0 = CI->getOperand(0);
2237
2238 // The pointer argument from vload_half.
2239 auto Arg1 = CI->getOperand(1);
2240
2241 auto IntTy = Type::getInt32Ty(M.getContext());
2242 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2243 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
2244 auto NewPointerTy =
2245 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
2246 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2247
2248 // Cast the half* pointer to int2*.
alan-bakerb8365b62022-07-18 21:59:45 -04002249 // TODO(#816): remove after final transition.
2250 Value *Cast = Arg1;
2251 if (Arg1->getType() != NewPointerTy) {
2252 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2253 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002254
2255 // Index into the correct address of the casted pointer.
2256 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
2257
2258 // Load from the int2* we casted to.
2259 auto Load = new LoadInst(Int2Ty, Index, "", CI);
2260
2261 // Extract each element from the loaded int2.
2262 auto X =
2263 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2264 auto Y =
2265 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
2266
2267 // Our intrinsic to unpack a float2 from an int.
2268 auto SPIRVIntrinsic = clspv::UnpackFunction();
2269
2270 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2271
2272 // Get the lower (x & y) components of our final float4.
2273 auto Lo = CallInst::Create(NewF, X, "", CI);
2274
2275 // Get the higher (z & w) components of our final float4.
2276 auto Hi = CallInst::Create(NewF, Y, "", CI);
2277
2278 Constant *ShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2279 ConstantInt::get(IntTy, 1),
2280 ConstantInt::get(IntTy, 2)};
2281
2282 // Combine our two float2's into one float4.
2283 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2284 CI);
2285 });
2286}
2287
SJW2c317da2020-03-23 07:39:13 -05002288bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
2289 Module &M = *F.getParent();
2290 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002291 // The index argument from vload_half.
2292 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002293
Kévin Petite8edce32019-04-10 14:23:32 +01002294 // The pointer argument from vload_half.
2295 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002296
Kévin Petite8edce32019-04-10 14:23:32 +01002297 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002298 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2299 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002300 auto NewPointerTy =
2301 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002302 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002303
Kévin Petite8edce32019-04-10 14:23:32 +01002304 // Cast the half* pointer to int2*.
alan-bakerb8365b62022-07-18 21:59:45 -04002305 // TODO(#816): remove after final transition.
2306 Value *Cast = Arg1;
2307 if (Arg1->getType() != NewPointerTy) {
2308 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2309 }
David Neto22f144c2017-06-12 14:26:21 -04002310
Kévin Petite8edce32019-04-10 14:23:32 +01002311 // Index into the correct address of the casted pointer.
2312 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002313
Kévin Petite8edce32019-04-10 14:23:32 +01002314 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002315 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002316
Kévin Petite8edce32019-04-10 14:23:32 +01002317 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002318 auto X =
2319 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2320 auto Y =
2321 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002322
Kévin Petite8edce32019-04-10 14:23:32 +01002323 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002324 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002325
Kévin Petite8edce32019-04-10 14:23:32 +01002326 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002327
Kévin Petite8edce32019-04-10 14:23:32 +01002328 // Get the lower (x & y) components of our final float4.
2329 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002330
Kévin Petite8edce32019-04-10 14:23:32 +01002331 // Get the higher (z & w) components of our final float4.
2332 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002333
Kévin Petite8edce32019-04-10 14:23:32 +01002334 Constant *ShuffleMask[4] = {
2335 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2336 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002337
Kévin Petite8edce32019-04-10 14:23:32 +01002338 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002339 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2340 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002341 });
David Neto22f144c2017-06-12 14:26:21 -04002342}
2343
Romaric Jodin71fdb322022-05-03 17:01:10 +02002344bool ReplaceOpenCLBuiltinPass::replaceVloadHalf8(Function &F) {
2345 Module &M = *F.getParent();
2346 return replaceCallsWithValue(F, [&](CallInst *CI) {
2347 // The index argument from vload_half.
2348 auto Arg0 = CI->getOperand(0);
2349
2350 // The pointer argument from vload_half.
2351 auto Arg1 = CI->getOperand(1);
2352
2353 auto IntTy = Type::getInt32Ty(M.getContext());
2354 auto Int4Ty = FixedVectorType::get(IntTy, 4);
2355 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
2356 auto NewPointerTy =
2357 PointerType::get(Int4Ty, Arg1->getType()->getPointerAddressSpace());
2358 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2359
2360 // Cast the half* pointer to int4*.
alan-bakerb8365b62022-07-18 21:59:45 -04002361 // TODO(#816): remove after final transition.
2362 Value *Cast = Arg1;
2363 if (Arg1->getType() != NewPointerTy) {
2364 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2365 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002366
2367 // Index into the correct address of the casted pointer.
2368 auto Index = GetElementPtrInst::Create(Int4Ty, Cast, Arg0, "", CI);
2369
2370 // Load from the int4* we casted to.
2371 auto Load = new LoadInst(Int4Ty, Index, "", CI);
2372
2373 // Extract each element from the loaded int4.
2374 auto X1 =
2375 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2376 auto X2 =
2377 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
2378 auto X3 =
2379 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 2), "", CI);
2380 auto X4 =
2381 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 3), "", CI);
2382
2383 // Our intrinsic to unpack a float2 from an int.
2384 auto SPIRVIntrinsic = clspv::UnpackFunction();
2385
2386 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2387
2388 // Convert the 4 int into 4 float2
2389 auto Y1 = CallInst::Create(NewF, X1, "", CI);
2390 auto Y2 = CallInst::Create(NewF, X2, "", CI);
2391 auto Y3 = CallInst::Create(NewF, X3, "", CI);
2392 auto Y4 = CallInst::Create(NewF, X4, "", CI);
2393
2394 Constant *ShuffleMask4[4] = {
2395 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2396 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2397
2398 // Combine our two float2's into one float4.
2399 auto Z1 = new ShuffleVectorInst(Y1, Y2, ConstantVector::get(ShuffleMask4),
2400 "", CI);
2401 auto Z2 = new ShuffleVectorInst(Y3, Y4, ConstantVector::get(ShuffleMask4),
2402 "", CI);
2403
2404 Constant *ShuffleMask8[8] = {
2405 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2406 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3),
2407 ConstantInt::get(IntTy, 4), ConstantInt::get(IntTy, 5),
2408 ConstantInt::get(IntTy, 6), ConstantInt::get(IntTy, 7)};
2409
2410 // Combine our two float4's into one float8.
2411 return new ShuffleVectorInst(Z1, Z2, ConstantVector::get(ShuffleMask8), "",
2412 CI);
2413 });
2414}
2415
2416bool ReplaceOpenCLBuiltinPass::replaceVloadHalf16(Function &F) {
2417 Module &M = *F.getParent();
2418 return replaceCallsWithValue(F, [&](CallInst *CI) {
2419 // The index argument from vload_half.
2420 auto Arg0 = CI->getOperand(0);
2421
2422 // The pointer argument from vload_half.
2423 auto Arg1 = CI->getOperand(1);
2424
2425 auto IntTy = Type::getInt32Ty(M.getContext());
2426 auto Int4Ty = FixedVectorType::get(IntTy, 4);
2427 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
2428 auto NewPointerTy =
2429 PointerType::get(Int4Ty, Arg1->getType()->getPointerAddressSpace());
2430 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2431
2432 // Cast the half* pointer to int4*.
alan-bakerb8365b62022-07-18 21:59:45 -04002433 // TODO(#816): remove after final transition.
2434 Value *Cast = Arg1;
2435 if (Arg1->getType() != NewPointerTy) {
2436 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2437 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002438
2439 // Index into the correct address of the casted pointer.
2440 auto Arg0x2 = BinaryOperator::Create(Instruction::Shl, Arg0, ConstantInt::get(IntTy, 1), "", CI);
2441 auto Index1 = GetElementPtrInst::Create(Int4Ty, Cast, Arg0x2, "", CI);
2442 auto Arg0x2p1 = BinaryOperator::Create(Instruction::Add, Arg0x2, ConstantInt::get(IntTy, 1), "", CI);
2443 auto Index2 = GetElementPtrInst::Create(Int4Ty, Cast, Arg0x2p1, "", CI);
2444
2445 // Load from the int4* we casted to.
2446 auto Load1 = new LoadInst(Int4Ty, Index1, "", CI);
2447 auto Load2 = new LoadInst(Int4Ty, Index2, "", CI);
2448
2449 // Extract each element from the two loaded int4.
2450 auto X1 =
2451 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 0), "", CI);
2452 auto X2 =
2453 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 1), "", CI);
2454 auto X3 =
2455 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 2), "", CI);
2456 auto X4 =
2457 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 3), "", CI);
2458 auto X5 =
2459 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 0), "", CI);
2460 auto X6 =
2461 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 1), "", CI);
2462 auto X7 =
2463 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 2), "", CI);
2464 auto X8 =
2465 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 3), "", CI);
2466
2467 // Our intrinsic to unpack a float2 from an int.
2468 auto SPIRVIntrinsic = clspv::UnpackFunction();
2469
2470 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2471
2472 // Convert the eight int into float2
2473 auto Y1 = CallInst::Create(NewF, X1, "", CI);
2474 auto Y2 = CallInst::Create(NewF, X2, "", CI);
2475 auto Y3 = CallInst::Create(NewF, X3, "", CI);
2476 auto Y4 = CallInst::Create(NewF, X4, "", CI);
2477 auto Y5 = CallInst::Create(NewF, X5, "", CI);
2478 auto Y6 = CallInst::Create(NewF, X6, "", CI);
2479 auto Y7 = CallInst::Create(NewF, X7, "", CI);
2480 auto Y8 = CallInst::Create(NewF, X8, "", CI);
2481
2482 Constant *ShuffleMask4[4] = {
2483 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2484 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2485
2486 // Combine our two float2's into one float4.
2487 auto Z1 = new ShuffleVectorInst(Y1, Y2, ConstantVector::get(ShuffleMask4),
2488 "", CI);
2489 auto Z2 = new ShuffleVectorInst(Y3, Y4, ConstantVector::get(ShuffleMask4),
2490 "", CI);
2491 auto Z3 = new ShuffleVectorInst(Y5, Y6, ConstantVector::get(ShuffleMask4),
2492 "", CI);
2493 auto Z4 = new ShuffleVectorInst(Y7, Y8, ConstantVector::get(ShuffleMask4),
2494 "", CI);
2495
2496 Constant *ShuffleMask8[8] = {
2497 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2498 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3),
2499 ConstantInt::get(IntTy, 4), ConstantInt::get(IntTy, 5),
2500 ConstantInt::get(IntTy, 6), ConstantInt::get(IntTy, 7)};
2501
2502 // Combine our two float4's into one float8.
2503 auto Z5 = new ShuffleVectorInst(Z1, Z2, ConstantVector::get(ShuffleMask8),
2504 "", CI);
2505 auto Z6 = new ShuffleVectorInst(Z3, Z4, ConstantVector::get(ShuffleMask8),
2506 "", CI);
2507 Constant *ShuffleMask16[16] = {
2508 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2509 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3),
2510 ConstantInt::get(IntTy, 4), ConstantInt::get(IntTy, 5),
2511 ConstantInt::get(IntTy, 6), ConstantInt::get(IntTy, 7),
2512 ConstantInt::get(IntTy, 8), ConstantInt::get(IntTy, 9),
2513 ConstantInt::get(IntTy, 10), ConstantInt::get(IntTy, 11),
2514 ConstantInt::get(IntTy, 12), ConstantInt::get(IntTy, 13),
2515 ConstantInt::get(IntTy, 14), ConstantInt::get(IntTy, 15)};
2516 // Combine our two float8's into one float16.
2517 return new ShuffleVectorInst(Z5, Z6, ConstantVector::get(ShuffleMask16), "",
2518 CI);
2519 });
2520}
2521
SJW2c317da2020-03-23 07:39:13 -05002522bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002523
2524 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2525 //
2526 // %u = load i32 %ptr
Romaric Jodin71fdb322022-05-03 17:01:10 +02002527 // %result = call <2 x float> Unpack2xHalf(u)
SJW2c317da2020-03-23 07:39:13 -05002528 Module &M = *F.getParent();
2529 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002530 auto Index = CI->getOperand(0);
2531 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002532
Kévin Petite8edce32019-04-10 14:23:32 +01002533 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002534 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002535 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002536
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002537 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002538 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002539
Kévin Petite8edce32019-04-10 14:23:32 +01002540 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002541 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002542
Kévin Petite8edce32019-04-10 14:23:32 +01002543 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002544
Kévin Petite8edce32019-04-10 14:23:32 +01002545 // Get our final float2.
2546 return CallInst::Create(NewF, Load, "", CI);
2547 });
David Neto6ad93232018-06-07 15:42:58 -07002548}
2549
SJW2c317da2020-03-23 07:39:13 -05002550bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002551
2552 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2553 //
2554 // %u2 = load <2 x i32> %ptr
2555 // %u2xy = extractelement %u2, 0
2556 // %u2zw = extractelement %u2, 1
2557 // %fxy = call <2 x float> Unpack2xHalf(uint)
2558 // %fzw = call <2 x float> Unpack2xHalf(uint)
Romaric Jodin71fdb322022-05-03 17:01:10 +02002559 // %result = shufflevector %fxy %fzw <4 x float> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05002560 Module &M = *F.getParent();
2561 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002562 auto Index = CI->getOperand(0);
2563 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002564
Kévin Petite8edce32019-04-10 14:23:32 +01002565 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002566 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2567 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002568 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002569
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002570 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002571 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002572
Kévin Petite8edce32019-04-10 14:23:32 +01002573 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002574 auto X =
2575 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2576 auto Y =
2577 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002578
Kévin Petite8edce32019-04-10 14:23:32 +01002579 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002580 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002581
Kévin Petite8edce32019-04-10 14:23:32 +01002582 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002583
Kévin Petite8edce32019-04-10 14:23:32 +01002584 // Get the lower (x & y) components of our final float4.
2585 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002586
Kévin Petite8edce32019-04-10 14:23:32 +01002587 // Get the higher (z & w) components of our final float4.
2588 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002589
Kévin Petite8edce32019-04-10 14:23:32 +01002590 Constant *ShuffleMask[4] = {
2591 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2592 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002593
Kévin Petite8edce32019-04-10 14:23:32 +01002594 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002595 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2596 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002597 });
David Neto6ad93232018-06-07 15:42:58 -07002598}
2599
Romaric Jodin71fdb322022-05-03 17:01:10 +02002600bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size, bool aligned) {
SJW2c317da2020-03-23 07:39:13 -05002601 switch (vec_size) {
2602 case 0:
2603 return replaceVstoreHalf(F);
2604 case 2:
2605 return replaceVstoreHalf2(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02002606 case 3:
2607 return aligned ? replaceVstoreaHalf3(F) : replaceVstoreHalf3(F);
SJW2c317da2020-03-23 07:39:13 -05002608 case 4:
2609 return replaceVstoreHalf4(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02002610 case 8:
2611 return replaceVstoreHalf8(F);
2612 case 16:
2613 return replaceVstoreHalf16(F);
SJW2c317da2020-03-23 07:39:13 -05002614 default:
2615 llvm_unreachable("Unsupported vstore_half vector size");
2616 break;
2617 }
2618 return false;
2619}
David Neto22f144c2017-06-12 14:26:21 -04002620
SJW2c317da2020-03-23 07:39:13 -05002621bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
2622 Module &M = *F.getParent();
2623 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002624 // The value to store.
2625 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002626
Kévin Petite8edce32019-04-10 14:23:32 +01002627 // The index argument from vstore_half.
2628 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002629
Kévin Petite8edce32019-04-10 14:23:32 +01002630 // The pointer argument from vstore_half.
2631 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002632
Kévin Petite8edce32019-04-10 14:23:32 +01002633 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002634 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002635 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2636 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002637
Kévin Petite8edce32019-04-10 14:23:32 +01002638 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002639 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002640
Kévin Petite8edce32019-04-10 14:23:32 +01002641 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002642
Kévin Petite8edce32019-04-10 14:23:32 +01002643 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002644 auto TempVec = InsertElementInst::Create(
2645 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002646
Kévin Petite8edce32019-04-10 14:23:32 +01002647 // Pack the float2 -> half2 (in an int).
2648 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002649
alan-baker7efcaaa2020-05-06 19:33:27 -04002650 bool supports_16bit_storage = true;
2651 switch (Arg2->getType()->getPointerAddressSpace()) {
2652 case clspv::AddressSpace::Global:
2653 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2654 clspv::Option::StorageClass::kSSBO);
2655 break;
2656 case clspv::AddressSpace::Constant:
2657 if (clspv::Option::ConstantArgsInUniformBuffer())
2658 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2659 clspv::Option::StorageClass::kUBO);
2660 else
2661 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2662 clspv::Option::StorageClass::kSSBO);
2663 break;
2664 default:
2665 // Clspv will emit the Float16 capability if the half type is
2666 // encountered. That capability covers private and local addressspaces.
2667 break;
2668 }
2669
SJW2c317da2020-03-23 07:39:13 -05002670 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002671 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002672 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002673 auto ShortPointerTy =
2674 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002675
Kévin Petite8edce32019-04-10 14:23:32 +01002676 // Truncate our i32 to an i16.
2677 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002678
Kévin Petite8edce32019-04-10 14:23:32 +01002679 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002680 // TODO(#816): remove after final transition.
2681 Value *Cast = Arg2;
2682 if (Arg2->getType() != ShortPointerTy) {
2683 Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
2684 }
David Neto22f144c2017-06-12 14:26:21 -04002685
Kévin Petite8edce32019-04-10 14:23:32 +01002686 // Index into the correct address of the casted pointer.
2687 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002688
Kévin Petite8edce32019-04-10 14:23:32 +01002689 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002690 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002691 } else {
2692 // We can only write to 32-bit aligned words.
2693 //
2694 // Assuming base is aligned to 32-bits, replace the equivalent of
2695 // vstore_half(value, index, base)
2696 // with:
2697 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2698 // uint32_t write_to_upper_half = index & 1u;
2699 // uint32_t shift = write_to_upper_half << 4;
2700 //
2701 // // Pack the float value as a half number in bottom 16 bits
2702 // // of an i32.
2703 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2704 //
2705 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2706 // ^ ((packed & 0xffff) << shift)
2707 // // We only need relaxed consistency, but OpenCL 1.2 only has
2708 // // sequentially consistent atomics.
2709 // // TODO(dneto): Use relaxed consistency.
2710 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002711 auto IntPointerTy =
2712 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002713
Kévin Petite8edce32019-04-10 14:23:32 +01002714 auto Four = ConstantInt::get(IntTy, 4);
2715 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002716
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002717 auto IndexIsOdd =
2718 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002719 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002720 auto IndexIntoI32 =
2721 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
alan-bakerb8365b62022-07-18 21:59:45 -04002722 // TODO(#816): remove after final transition.
2723 Value *BaseI32Ptr = Arg2;
2724 if (Arg2->getType() != IntPointerTy) {
2725 BaseI32Ptr =
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002726 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
alan-bakerb8365b62022-07-18 21:59:45 -04002727 }
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002728 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2729 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002730 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002731 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002732 auto MaskBitsToWrite =
2733 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2734 auto MaskedCurrent = BinaryOperator::CreateAnd(
2735 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002736
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002737 auto XLowerBits =
2738 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2739 auto NewBitsToWrite =
2740 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2741 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2742 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002743
Kévin Petite8edce32019-04-10 14:23:32 +01002744 // Generate the call to atomi_xor.
2745 SmallVector<Type *, 5> ParamTypes;
2746 // The pointer type.
2747 ParamTypes.push_back(IntPointerTy);
2748 // The Types for memory scope, semantics, and value.
2749 ParamTypes.push_back(IntTy);
2750 ParamTypes.push_back(IntTy);
2751 ParamTypes.push_back(IntTy);
2752 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2753 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002754
Kévin Petite8edce32019-04-10 14:23:32 +01002755 const auto ConstantScopeDevice =
2756 ConstantInt::get(IntTy, spv::ScopeDevice);
2757 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2758 // (SPIR-V Workgroup).
2759 const auto AddrSpaceSemanticsBits =
2760 IntPointerTy->getPointerAddressSpace() == 1
2761 ? spv::MemorySemanticsUniformMemoryMask
2762 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002763
Kévin Petite8edce32019-04-10 14:23:32 +01002764 // We're using relaxed consistency here.
2765 const auto ConstantMemorySemantics =
2766 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2767 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002768
Kévin Petite8edce32019-04-10 14:23:32 +01002769 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2770 ConstantMemorySemantics, ValueToXor};
2771 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002772
2773 // Return a Nop so the old Call is removed
2774 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2775 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002776 }
David Neto22f144c2017-06-12 14:26:21 -04002777
SJW2c317da2020-03-23 07:39:13 -05002778 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002779 });
David Neto22f144c2017-06-12 14:26:21 -04002780}
2781
SJW2c317da2020-03-23 07:39:13 -05002782bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2783 Module &M = *F.getParent();
2784 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002785 // The value to store.
2786 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002787
Kévin Petite8edce32019-04-10 14:23:32 +01002788 // The index argument from vstore_half.
2789 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002790
Kévin Petite8edce32019-04-10 14:23:32 +01002791 // The pointer argument from vstore_half.
2792 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002793
Kévin Petite8edce32019-04-10 14:23:32 +01002794 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002795 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002796 auto NewPointerTy =
2797 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002798 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002799
Kévin Petite8edce32019-04-10 14:23:32 +01002800 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002801 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002802
Kévin Petite8edce32019-04-10 14:23:32 +01002803 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002804
Kévin Petite8edce32019-04-10 14:23:32 +01002805 // Turn the packed x & y into the final packing.
2806 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002807
Kévin Petite8edce32019-04-10 14:23:32 +01002808 // Cast the half* pointer to int*.
alan-bakerb8365b62022-07-18 21:59:45 -04002809 // TODO(#816): remove after final transition.
2810 Value *Cast = Arg2;
2811 if (Arg2->getType() != NewPointerTy) {
2812 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2813 }
David Neto22f144c2017-06-12 14:26:21 -04002814
Kévin Petite8edce32019-04-10 14:23:32 +01002815 // Index into the correct address of the casted pointer.
2816 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002817
Kévin Petite8edce32019-04-10 14:23:32 +01002818 // Store to the int* we casted to.
2819 return new StoreInst(X, Index, CI);
2820 });
David Neto22f144c2017-06-12 14:26:21 -04002821}
2822
Romaric Jodin71fdb322022-05-03 17:01:10 +02002823bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf3(Function &F) {
2824 Module &M = *F.getParent();
2825 return replaceCallsWithValue(F, [&](CallInst *CI) {
2826 // The value to store.
2827 auto Arg0 = CI->getOperand(0);
2828
2829 // The index argument from vstore_half.
2830 auto Arg1 = CI->getOperand(1);
2831
2832 // The pointer argument from vstore_half.
2833 auto Arg2 = CI->getOperand(2);
2834
2835 auto IntTy = Type::getInt32Ty(M.getContext());
2836 auto ShortTy = Type::getInt16Ty(M.getContext());
2837 auto FloatTy = Type::getFloatTy(M.getContext());
2838 auto Float2Ty = FixedVectorType::get(FloatTy, 2);
2839 auto NewPointerTy =
2840 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
2841 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2842
2843 auto Int0 = ConstantInt::get(IntTy, 0);
2844 auto Int1 = ConstantInt::get(IntTy, 1);
2845 auto Int2 = ConstantInt::get(IntTy, 2);
2846
2847 auto X0 = InsertElementInst::Create(
2848 UndefValue::get(Float2Ty),
2849 ExtractElementInst::Create(Arg0, Int0, "", CI), Int0, "", CI);
2850 auto X1 = InsertElementInst::Create(
2851 UndefValue::get(Float2Ty),
2852 ExtractElementInst::Create(Arg0, Int1, "", CI), Int0, "", CI);
2853 auto X2 = InsertElementInst::Create(
2854 UndefValue::get(Float2Ty),
2855 ExtractElementInst::Create(Arg0, Int2, "", CI), Int0, "", CI);
2856
2857 // Our intrinsic to pack a float2 to an int.
2858 auto SPIRVIntrinsic = clspv::PackFunction();
2859
2860 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2861
2862 // Convert float2 into int and trunc to short to keep only the meaningful
2863 // part of it
2864 auto Y0 =
2865 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X0, "", CI),
2866 ShortTy, "", CI);
2867 auto Y1 =
2868 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X1, "", CI),
2869 ShortTy, "", CI);
2870 auto Y2 =
2871 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X2, "", CI),
2872 ShortTy, "", CI);
2873
2874 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002875 // TODO(#816): remove after final transition.
2876 Value *Cast = Arg2;
2877 if (Arg2->getType() != NewPointerTy) {
2878 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2879 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002880
2881 auto Index0 = BinaryOperator::Create(
2882 Instruction::Add,
2883 BinaryOperator::Create(Instruction::Shl, Arg1, Int1, "", CI), Arg1, "",
2884 CI);
2885 auto GEP0 = GetElementPtrInst::Create(ShortTy, Cast, Index0, "", CI);
2886 new StoreInst(Y0, GEP0, CI);
2887
2888 auto Index1 =
2889 BinaryOperator::Create(Instruction::Add, Index0, Int1, "", CI);
2890 auto GEP1 = GetElementPtrInst::Create(ShortTy, Cast, Index1, "", CI);
2891 new StoreInst(Y1, GEP1, CI);
2892
2893 auto Index2 =
2894 BinaryOperator::Create(Instruction::Add, Index1, Int1, "", CI);
2895 auto GEP2 = GetElementPtrInst::Create(ShortTy, Cast, Index2, "", CI);
2896 return new StoreInst(Y2, GEP2, CI);
2897 });
2898}
2899
2900bool ReplaceOpenCLBuiltinPass::replaceVstoreaHalf3(Function &F) {
2901 Module &M = *F.getParent();
2902 return replaceCallsWithValue(F, [&](CallInst *CI) {
2903 // The value to store.
2904 auto Arg0 = CI->getOperand(0);
2905
2906 // The index argument from vstore_half.
2907 auto Arg1 = CI->getOperand(1);
2908
2909 // The pointer argument from vstore_half.
2910 auto Arg2 = CI->getOperand(2);
2911
2912 auto IntTy = Type::getInt32Ty(M.getContext());
2913 auto ShortTy = Type::getInt16Ty(M.getContext());
2914 auto FloatTy = Type::getFloatTy(M.getContext());
2915 auto Float2Ty = FixedVectorType::get(FloatTy, 2);
2916 auto NewPointerTy =
2917 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
2918 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2919
2920 auto Int0 = ConstantInt::get(IntTy, 0);
2921 auto Int1 = ConstantInt::get(IntTy, 1);
2922 auto Int2 = ConstantInt::get(IntTy, 2);
2923
2924 auto X0 = InsertElementInst::Create(
2925 UndefValue::get(Float2Ty),
2926 ExtractElementInst::Create(Arg0, Int0, "", CI), Int0, "", CI);
2927 auto X1 = InsertElementInst::Create(
2928 UndefValue::get(Float2Ty),
2929 ExtractElementInst::Create(Arg0, Int1, "", CI), Int0, "", CI);
2930 auto X2 = InsertElementInst::Create(
2931 UndefValue::get(Float2Ty),
2932 ExtractElementInst::Create(Arg0, Int2, "", CI), Int0, "", CI);
2933
2934 // Our intrinsic to pack a float2 to an int.
2935 auto SPIRVIntrinsic = clspv::PackFunction();
2936
2937 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2938
2939 // Convert float2 into int and trunc to short to keep only the meaningful
2940 // part of it
2941 auto Y0 =
2942 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X0, "", CI),
2943 ShortTy, "", CI);
2944 auto Y1 =
2945 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X1, "", CI),
2946 ShortTy, "", CI);
2947 auto Y2 =
2948 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X2, "", CI),
2949 ShortTy, "", CI);
2950
2951 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002952 // TODO(#816): remove after final transition.
2953 Value *Cast = Arg2;
2954 if (Arg2->getType() != NewPointerTy) {
2955 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2956 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002957
2958 auto Index0 = BinaryOperator::Create(Instruction::Shl, Arg1, Int2, "", CI);
2959 auto GEP0 = GetElementPtrInst::Create(ShortTy, Cast, Index0, "", CI);
2960 new StoreInst(Y0, GEP0, CI);
2961
2962 auto Index1 =
2963 BinaryOperator::Create(Instruction::Add, Index0, Int1, "", CI);
2964 auto GEP1 = GetElementPtrInst::Create(ShortTy, Cast, Index1, "", CI);
2965 new StoreInst(Y1, GEP1, CI);
2966
2967 auto Index2 =
2968 BinaryOperator::Create(Instruction::Add, Index1, Int1, "", CI);
2969 auto GEP2 = GetElementPtrInst::Create(ShortTy, Cast, Index2, "", CI);
2970 return new StoreInst(Y2, GEP2, CI);
2971 });
2972}
2973
SJW2c317da2020-03-23 07:39:13 -05002974bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2975 Module &M = *F.getParent();
2976 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002977 // The value to store.
2978 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002979
Kévin Petite8edce32019-04-10 14:23:32 +01002980 // The index argument from vstore_half.
2981 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002982
Kévin Petite8edce32019-04-10 14:23:32 +01002983 // The pointer argument from vstore_half.
2984 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002985
Kévin Petite8edce32019-04-10 14:23:32 +01002986 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002987 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2988 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002989 auto NewPointerTy =
2990 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002991 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002992
Kévin Petite8edce32019-04-10 14:23:32 +01002993 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2994 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002995
Kévin Petite8edce32019-04-10 14:23:32 +01002996 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002997 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2998 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002999
Kévin Petite8edce32019-04-10 14:23:32 +01003000 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
3001 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04003002
Kévin Petite8edce32019-04-10 14:23:32 +01003003 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003004 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3005 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003006
Kévin Petite8edce32019-04-10 14:23:32 +01003007 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05003008 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04003009
Kévin Petite8edce32019-04-10 14:23:32 +01003010 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003011
Kévin Petite8edce32019-04-10 14:23:32 +01003012 // Turn the packed x & y into the final component of our int2.
3013 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003014
Kévin Petite8edce32019-04-10 14:23:32 +01003015 // Turn the packed z & w into the final component of our int2.
3016 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003017
Kévin Petite8edce32019-04-10 14:23:32 +01003018 auto Combine = InsertElementInst::Create(
3019 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003020 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
3021 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003022
Kévin Petite8edce32019-04-10 14:23:32 +01003023 // Cast the half* pointer to int2*.
alan-bakerb8365b62022-07-18 21:59:45 -04003024 // TODO(#816): remove after final transition.
3025 Value *Cast = Arg2;
3026 if (Arg2->getType() != NewPointerTy) {
3027 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
3028 }
David Neto22f144c2017-06-12 14:26:21 -04003029
Kévin Petite8edce32019-04-10 14:23:32 +01003030 // Index into the correct address of the casted pointer.
3031 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003032
Kévin Petite8edce32019-04-10 14:23:32 +01003033 // Store to the int2* we casted to.
3034 return new StoreInst(Combine, Index, CI);
3035 });
David Neto22f144c2017-06-12 14:26:21 -04003036}
3037
Romaric Jodin71fdb322022-05-03 17:01:10 +02003038bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf8(Function &F) {
3039 Module &M = *F.getParent();
3040 return replaceCallsWithValue(F, [&](CallInst *CI) {
3041 // The value to store.
3042 auto Arg0 = CI->getOperand(0);
3043
3044 // The index argument from vstore_half.
3045 auto Arg1 = CI->getOperand(1);
3046
3047 // The pointer argument from vstore_half.
3048 auto Arg2 = CI->getOperand(2);
3049
3050 auto IntTy = Type::getInt32Ty(M.getContext());
3051 auto Int4Ty = FixedVectorType::get(IntTy, 4);
3052 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
3053 auto NewPointerTy =
3054 PointerType::get(Int4Ty, Arg2->getType()->getPointerAddressSpace());
3055 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
3056
3057 Constant *ShuffleMask01[2] = {ConstantInt::get(IntTy, 0),
3058 ConstantInt::get(IntTy, 1)};
3059 auto X01 =
3060 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3061 ConstantVector::get(ShuffleMask01), "", CI);
3062 Constant *ShuffleMask23[2] = {ConstantInt::get(IntTy, 2),
3063 ConstantInt::get(IntTy, 3)};
3064 auto X23 =
3065 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3066 ConstantVector::get(ShuffleMask23), "", CI);
3067 Constant *ShuffleMask45[2] = {ConstantInt::get(IntTy, 4),
3068 ConstantInt::get(IntTy, 5)};
3069 auto X45 =
3070 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3071 ConstantVector::get(ShuffleMask45), "", CI);
3072 Constant *ShuffleMask67[2] = {ConstantInt::get(IntTy, 6),
3073 ConstantInt::get(IntTy, 7)};
3074 auto X67 =
3075 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3076 ConstantVector::get(ShuffleMask67), "", CI);
3077
3078 // Our intrinsic to pack a float2 to an int.
3079 auto SPIRVIntrinsic = clspv::PackFunction();
3080
3081 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
3082
3083 auto Y01 = CallInst::Create(NewF, X01, "", CI);
3084 auto Y23 = CallInst::Create(NewF, X23, "", CI);
3085 auto Y45 = CallInst::Create(NewF, X45, "", CI);
3086 auto Y67 = CallInst::Create(NewF, X67, "", CI);
3087
3088 auto Combine = InsertElementInst::Create(
3089 UndefValue::get(Int4Ty), Y01, ConstantInt::get(IntTy, 0), "", CI);
3090 Combine = InsertElementInst::Create(Combine, Y23,
3091 ConstantInt::get(IntTy, 1), "", CI);
3092 Combine = InsertElementInst::Create(Combine, Y45,
3093 ConstantInt::get(IntTy, 2), "", CI);
3094 Combine = InsertElementInst::Create(Combine, Y67,
3095 ConstantInt::get(IntTy, 3), "", CI);
3096
3097 // Cast the half* pointer to int4*.
alan-bakerb8365b62022-07-18 21:59:45 -04003098 // TODO(#816): remove after final transition.
3099 Value *Cast = Arg2;
3100 if (Arg2->getType() != NewPointerTy) {
3101 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
3102 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02003103
3104 // Index into the correct address of the casted pointer.
3105 auto Index = GetElementPtrInst::Create(Int4Ty, Cast, Arg1, "", CI);
3106
3107 // Store to the int4* we casted to.
3108 return new StoreInst(Combine, Index, CI);
3109 });
3110}
3111
3112bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf16(Function &F) {
3113 Module &M = *F.getParent();
3114 return replaceCallsWithValue(F, [&](CallInst *CI) {
3115 // The value to store.
3116 auto Arg0 = CI->getOperand(0);
3117
3118 // The index argument from vstore_half.
3119 auto Arg1 = CI->getOperand(1);
3120
3121 // The pointer argument from vstore_half.
3122 auto Arg2 = CI->getOperand(2);
3123
3124 auto IntTy = Type::getInt32Ty(M.getContext());
3125 auto Int4Ty = FixedVectorType::get(IntTy, 4);
3126 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
3127 auto NewPointerTy =
3128 PointerType::get(Int4Ty, Arg2->getType()->getPointerAddressSpace());
3129 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
3130
3131 Constant *ShuffleMask0[2] = {ConstantInt::get(IntTy, 0),
3132 ConstantInt::get(IntTy, 1)};
3133 auto X0 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3134 ConstantVector::get(ShuffleMask0), "", CI);
3135 Constant *ShuffleMask1[2] = {ConstantInt::get(IntTy, 2),
3136 ConstantInt::get(IntTy, 3)};
3137 auto X1 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3138 ConstantVector::get(ShuffleMask1), "", CI);
3139 Constant *ShuffleMask2[2] = {ConstantInt::get(IntTy, 4),
3140 ConstantInt::get(IntTy, 5)};
3141 auto X2 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3142 ConstantVector::get(ShuffleMask2), "", CI);
3143 Constant *ShuffleMask3[2] = {ConstantInt::get(IntTy, 6),
3144 ConstantInt::get(IntTy, 7)};
3145 auto X3 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3146 ConstantVector::get(ShuffleMask3), "", CI);
3147 Constant *ShuffleMask4[2] = {ConstantInt::get(IntTy, 8),
3148 ConstantInt::get(IntTy, 9)};
3149 auto X4 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3150 ConstantVector::get(ShuffleMask4), "", CI);
3151 Constant *ShuffleMask5[2] = {ConstantInt::get(IntTy, 10),
3152 ConstantInt::get(IntTy, 11)};
3153 auto X5 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3154 ConstantVector::get(ShuffleMask5), "", CI);
3155 Constant *ShuffleMask6[2] = {ConstantInt::get(IntTy, 12),
3156 ConstantInt::get(IntTy, 13)};
3157 auto X6 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3158 ConstantVector::get(ShuffleMask6), "", CI);
3159 Constant *ShuffleMask7[2] = {ConstantInt::get(IntTy, 14),
3160 ConstantInt::get(IntTy, 15)};
3161 auto X7 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3162 ConstantVector::get(ShuffleMask7), "", CI);
3163
3164 // Our intrinsic to pack a float2 to an int.
3165 auto SPIRVIntrinsic = clspv::PackFunction();
3166
3167 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
3168
3169 auto Y0 = CallInst::Create(NewF, X0, "", CI);
3170 auto Y1 = CallInst::Create(NewF, X1, "", CI);
3171 auto Y2 = CallInst::Create(NewF, X2, "", CI);
3172 auto Y3 = CallInst::Create(NewF, X3, "", CI);
3173 auto Y4 = CallInst::Create(NewF, X4, "", CI);
3174 auto Y5 = CallInst::Create(NewF, X5, "", CI);
3175 auto Y6 = CallInst::Create(NewF, X6, "", CI);
3176 auto Y7 = CallInst::Create(NewF, X7, "", CI);
3177
3178 auto Combine1 = InsertElementInst::Create(
3179 UndefValue::get(Int4Ty), Y0, ConstantInt::get(IntTy, 0), "", CI);
3180 Combine1 = InsertElementInst::Create(Combine1, Y1,
3181 ConstantInt::get(IntTy, 1), "", CI);
3182 Combine1 = InsertElementInst::Create(Combine1, Y2,
3183 ConstantInt::get(IntTy, 2), "", CI);
3184 Combine1 = InsertElementInst::Create(Combine1, Y3,
3185 ConstantInt::get(IntTy, 3), "", CI);
3186
3187 auto Combine2 = InsertElementInst::Create(
3188 UndefValue::get(Int4Ty), Y4, ConstantInt::get(IntTy, 0), "", CI);
3189 Combine2 = InsertElementInst::Create(Combine2, Y5,
3190 ConstantInt::get(IntTy, 1), "", CI);
3191 Combine2 = InsertElementInst::Create(Combine2, Y6,
3192 ConstantInt::get(IntTy, 2), "", CI);
3193 Combine2 = InsertElementInst::Create(Combine2, Y7,
3194 ConstantInt::get(IntTy, 3), "", CI);
3195
3196 // Cast the half* pointer to int4*.
alan-bakerb8365b62022-07-18 21:59:45 -04003197 // TODO(#816): remove after final transition.
3198 Value *Cast = Arg2;
3199 if (Arg2->getType() != NewPointerTy) {
3200 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
3201 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02003202
3203 // Index into the correct address of the casted pointer.
3204 auto Arg1x2 = BinaryOperator::Create(Instruction::Shl, Arg1,
3205 ConstantInt::get(IntTy, 1), "", CI);
3206 auto Index1 = GetElementPtrInst::Create(Int4Ty, Cast, Arg1x2, "", CI);
3207
3208 // Store to the int4* we casted to.
3209 new StoreInst(Combine1, Index1, CI);
3210
3211 // Index into the correct address of the casted pointer.
3212 auto Arg1Plus1 = BinaryOperator::Create(Instruction::Add, Arg1x2,
3213 ConstantInt::get(IntTy, 1), "", CI);
3214 auto Index2 = GetElementPtrInst::Create(Int4Ty, Cast, Arg1Plus1, "", CI);
3215
3216 // Store to the int4* we casted to.
3217 return new StoreInst(Combine2, Index2, CI);
3218 });
3219}
3220
SJW2c317da2020-03-23 07:39:13 -05003221bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
3222 // convert half to float
3223 Module &M = *F.getParent();
3224 return replaceCallsWithValue(F, [&](CallInst *CI) {
3225 SmallVector<Type *, 3> types;
3226 SmallVector<Value *, 3> args;
alan-baker5641f5c2021-10-15 09:16:04 -04003227 for (size_t i = 0; i < CI->arg_size(); ++i) {
SJW2c317da2020-03-23 07:39:13 -05003228 types.push_back(CI->getArgOperand(i)->getType());
3229 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05003230 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05003231
alan-baker5a8c3be2020-09-09 13:44:26 -04003232 auto NewFType =
3233 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
3234 cast<VectorType>(CI->getType())
3235 ->getElementCount()
3236 .getKnownMinValue()),
3237 types, false);
SJW2c317da2020-03-23 07:39:13 -05003238
SJW61531372020-06-09 07:31:08 -05003239 std::string NewFName =
3240 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05003241
3242 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
3243
3244 auto NewCI = CallInst::Create(NewF, args, "", CI);
3245
3246 // Convert to the half type.
3247 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
3248 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05003249}
3250
SJW2c317da2020-03-23 07:39:13 -05003251bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
3252 // convert half to float
3253 Module &M = *F.getParent();
3254 return replaceCallsWithValue(F, [&](CallInst *CI) {
3255 SmallVector<Type *, 3> types(3);
3256 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003257
SJW2c317da2020-03-23 07:39:13 -05003258 // Image
3259 types[0] = CI->getArgOperand(0)->getType();
3260 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003261
SJW2c317da2020-03-23 07:39:13 -05003262 // Coord
3263 types[1] = CI->getArgOperand(1)->getType();
3264 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003265
SJW2c317da2020-03-23 07:39:13 -05003266 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04003267 types[2] =
3268 FixedVectorType::get(Type::getFloatTy(M.getContext()),
3269 cast<VectorType>(CI->getArgOperand(2)->getType())
3270 ->getElementCount()
3271 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05003272
SJW2c317da2020-03-23 07:39:13 -05003273 auto NewFType =
3274 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003275
SJW61531372020-06-09 07:31:08 -05003276 std::string NewFName =
3277 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003278
SJW2c317da2020-03-23 07:39:13 -05003279 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003280
SJW2c317da2020-03-23 07:39:13 -05003281 // Convert data to the float type.
3282 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
3283 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05003284
SJW2c317da2020-03-23 07:39:13 -05003285 return CallInst::Create(NewF, args, "", CI);
3286 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05003287}
3288
SJW2c317da2020-03-23 07:39:13 -05003289bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
3290 Function &F) {
3291 // convert read_image with int coords to float coords
3292 Module &M = *F.getParent();
3293 return replaceCallsWithValue(F, [&](CallInst *CI) {
3294 // The image.
3295 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04003296
SJW2c317da2020-03-23 07:39:13 -05003297 // The sampler.
3298 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04003299
SJW2c317da2020-03-23 07:39:13 -05003300 // The coordinate (integer type that we can't handle).
3301 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04003302
alan-baker6792f982022-06-23 14:55:40 -04003303 auto *image_ty =
3304 cast<StructType>(InferType(Arg0, M.getContext(), &InferredTypeCache));
3305 uint32_t dim = clspv::ImageNumDimensions(image_ty);
3306 uint32_t components = dim + (clspv::IsArrayImageType(image_ty) ? 1 : 0);
SJW2c317da2020-03-23 07:39:13 -05003307 Type *float_ty = nullptr;
3308 if (components == 1) {
3309 float_ty = Type::getFloatTy(M.getContext());
3310 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04003311 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
3312 cast<VectorType>(Arg2->getType())
3313 ->getElementCount()
3314 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04003315 }
David Neto22f144c2017-06-12 14:26:21 -04003316
SJW2c317da2020-03-23 07:39:13 -05003317 auto NewFType = FunctionType::get(
3318 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
3319
3320 std::string NewFName = F.getName().str();
3321 NewFName[NewFName.length() - 1] = 'f';
3322
3323 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
3324
3325 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
3326
3327 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
3328 });
David Neto22f144c2017-06-12 14:26:21 -04003329}
3330
SJW2c317da2020-03-23 07:39:13 -05003331bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
3332 return replaceCallsWithValue(F, [&](CallInst *CI) {
3333 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04003334
SJW2c317da2020-03-23 07:39:13 -05003335 // We need to map the OpenCL constants to the SPIR-V equivalents.
3336 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
3337 const auto ConstantMemorySemantics = ConstantInt::get(
3338 IntTy, spv::MemorySemanticsUniformMemoryMask |
3339 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04003340
SJW2c317da2020-03-23 07:39:13 -05003341 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04003342
SJW2c317da2020-03-23 07:39:13 -05003343 // The pointer.
3344 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04003345
SJW2c317da2020-03-23 07:39:13 -05003346 // The memory scope.
3347 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04003348
SJW2c317da2020-03-23 07:39:13 -05003349 // The memory semantics.
3350 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04003351
alan-baker5641f5c2021-10-15 09:16:04 -04003352 if (2 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05003353 // The unequal memory semantics.
3354 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04003355
SJW2c317da2020-03-23 07:39:13 -05003356 // The value.
3357 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04003358
SJW2c317da2020-03-23 07:39:13 -05003359 // The comparator.
3360 Params.push_back(CI->getArgOperand(1));
alan-baker5641f5c2021-10-15 09:16:04 -04003361 } else if (1 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05003362 // The value.
3363 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04003364 }
David Neto22f144c2017-06-12 14:26:21 -04003365
SJW2c317da2020-03-23 07:39:13 -05003366 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
3367 });
David Neto22f144c2017-06-12 14:26:21 -04003368}
3369
SJW2c317da2020-03-23 07:39:13 -05003370bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
3371 llvm::AtomicRMWInst::BinOp Op) {
3372 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04003373 auto align = F.getParent()->getDataLayout().getABITypeAlign(
3374 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05003375 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04003376 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05003377 SyncScope::System, CI);
3378 });
3379}
David Neto22f144c2017-06-12 14:26:21 -04003380
SJW2c317da2020-03-23 07:39:13 -05003381bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
3382 Module &M = *F.getParent();
3383 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04003384 auto IntTy = Type::getInt32Ty(M.getContext());
3385 auto FloatTy = Type::getFloatTy(M.getContext());
3386
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003387 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
3388 ConstantInt::get(IntTy, 1),
3389 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04003390
3391 Constant *UpShuffleMask[4] = {
3392 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3393 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3394
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003395 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
3396 UndefValue::get(FloatTy),
3397 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04003398
Kévin Petite8edce32019-04-10 14:23:32 +01003399 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003400 auto Arg0 =
3401 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
3402 ConstantVector::get(DownShuffleMask), "", CI);
3403 auto Arg1 =
3404 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
3405 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003406 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04003407
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003408 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05003409 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003410
SJW61531372020-06-09 07:31:08 -05003411 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003412
Kévin Petite8edce32019-04-10 14:23:32 +01003413 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003414
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003415 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
3416 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003417 });
David Neto22f144c2017-06-12 14:26:21 -04003418}
David Neto62653202017-10-16 19:05:18 -04003419
SJW2c317da2020-03-23 07:39:13 -05003420bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04003421 // OpenCL's float result = fract(float x, float* ptr)
3422 //
3423 // In the LLVM domain:
3424 //
3425 // %floor_result = call spir_func float @floor(float %x)
3426 // store float %floor_result, float * %ptr
3427 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3428 // %result = call spir_func float
3429 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3430 //
3431 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3432 // and clspv.fract occur in the SPIR-V generator pass:
3433 //
3434 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3435 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3436 // ...
3437 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3438 // OpStore %ptr %floor_result
3439 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3440 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01003441 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04003442
David Neto62653202017-10-16 19:05:18 -04003443 using std::string;
3444
3445 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3446 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04003447
SJW2c317da2020-03-23 07:39:13 -05003448 Module &M = *F.getParent();
3449 return replaceCallsWithValue(F, [&](CallInst *CI) {
SJW2c317da2020-03-23 07:39:13 -05003450 // This is either float or a float vector. All the float-like
3451 // types are this type.
3452 auto result_ty = F.getReturnType();
3453
SJW61531372020-06-09 07:31:08 -05003454 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05003455 Function *fmin_fn = M.getFunction(fmin_name);
3456 if (!fmin_fn) {
3457 // Make the fmin function.
3458 FunctionType *fn_ty =
3459 FunctionType::get(result_ty, {result_ty, result_ty}, false);
3460 fmin_fn =
3461 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
3462 fmin_fn->addFnAttr(Attribute::ReadNone);
3463 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3464 }
3465
SJW61531372020-06-09 07:31:08 -05003466 std::string floor_name =
3467 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05003468 Function *floor_fn = M.getFunction(floor_name);
3469 if (!floor_fn) {
3470 // Make the floor function.
3471 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
3472 floor_fn =
3473 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
3474 floor_fn->addFnAttr(Attribute::ReadNone);
3475 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3476 }
3477
SJW61531372020-06-09 07:31:08 -05003478 std::string clspv_fract_name =
3479 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05003480 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
3481 if (!clspv_fract_fn) {
3482 // Make the clspv_fract function.
3483 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
3484 clspv_fract_fn = cast<Function>(
3485 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
3486 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3487 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3488 }
3489
3490 // Number of significant significand bits, whether represented or not.
3491 unsigned num_significand_bits;
3492 switch (result_ty->getScalarType()->getTypeID()) {
3493 case Type::HalfTyID:
3494 num_significand_bits = 11;
3495 break;
3496 case Type::FloatTyID:
3497 num_significand_bits = 24;
3498 break;
3499 case Type::DoubleTyID:
3500 num_significand_bits = 53;
3501 break;
3502 default:
3503 llvm_unreachable("Unhandled float type when processing fract builtin");
3504 break;
3505 }
3506 // Beware that the disassembler displays this value as
3507 // OpConstant %float 1
3508 // which is not quite right.
3509 const double kJustUnderOneScalar =
3510 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3511
3512 Constant *just_under_one =
3513 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3514 if (result_ty->isVectorTy()) {
3515 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04003516 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05003517 }
3518
3519 IRBuilder<> Builder(CI);
3520
3521 auto arg = CI->getArgOperand(0);
3522 auto ptr = CI->getArgOperand(1);
3523
3524 // Compute floor result and store it.
3525 auto floor = Builder.CreateCall(floor_fn, {arg});
3526 Builder.CreateStore(floor, ptr);
3527
3528 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
3529 auto fract_result =
3530 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
3531
3532 return fract_result;
3533 });
David Neto62653202017-10-16 19:05:18 -04003534}
alan-bakera52b7312020-10-26 08:58:51 -04003535
Kévin Petit8576f682020-11-02 14:51:32 +00003536bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04003537 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00003538 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04003539 // a_shr = a >> 1
3540 // b_shr = b >> 1
3541 // add1 = a_shr + b_shr
3542 // join = a |join_opcode| b
3543 // and = join & 1
3544 // add = add1 + and
3545 const auto a = Call->getArgOperand(0);
3546 const auto b = Call->getArgOperand(1);
3547 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00003548 Value *a_shift, *b_shift;
3549 if (is_signed) {
3550 a_shift = builder.CreateAShr(a, 1);
3551 b_shift = builder.CreateAShr(b, 1);
3552 } else {
3553 a_shift = builder.CreateLShr(a, 1);
3554 b_shift = builder.CreateLShr(b, 1);
3555 }
alan-bakerb6da5132020-10-29 15:59:06 -04003556 auto add = builder.CreateAdd(a_shift, b_shift);
3557 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
3558 auto constant_one = ConstantInt::get(a->getType(), 1);
3559 auto and_bit = builder.CreateAnd(join, constant_one);
3560 return builder.CreateAdd(add, and_bit);
3561 });
3562}
3563
alan-baker3f1bf492020-11-05 09:07:36 -05003564bool ReplaceOpenCLBuiltinPass::replaceAddSubSat(Function &F, bool is_signed,
3565 bool is_add) {
3566 return replaceCallsWithValue(F, [&F, this, is_signed,
3567 is_add](CallInst *Call) {
gnl21f8847012022-05-13 15:11:08 +01003568 auto intrinsic_type =
3569 is_signed ? (is_add ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
3570 : (is_add ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
alan-baker3f1bf492020-11-05 09:07:36 -05003571 auto a = Call->getArgOperand(0);
3572 auto b = Call->getArgOperand(1);
gnl21f8847012022-05-13 15:11:08 +01003573 auto intrinsic = Intrinsic::getDeclaration(F.getParent(), intrinsic_type,
3574 Call->getType());
3575 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {a, b}, "",
3576 Call);
alan-bakera52b7312020-10-26 08:58:51 -04003577 });
3578}
alan-baker4986eff2020-10-29 13:38:00 -04003579
3580bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
3581 return replaceCallsWithValue(F, [](CallInst *Call) {
3582 auto pointer = Call->getArgOperand(0);
3583 // Clang emits an address space cast to the generic address space. Skip the
3584 // cast and use the input directly.
3585 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3586 pointer = cast->getPointerOperand();
3587 }
alan-baker5641f5c2021-10-15 09:16:04 -04003588 Value *order_arg = Call->arg_size() > 1 ? Call->getArgOperand(1) : nullptr;
3589 Value *scope_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003590 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3591 clspv::AddressSpace::Global;
3592 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
3593 spv::MemorySemanticsAcquireMask);
3594 auto scope = MemoryScope(scope_arg, is_global, Call);
3595 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
3596 Call->getType(), {pointer, scope, order});
3597 });
3598}
3599
3600bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
3601 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
3602 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
3603 auto pointer = Call->getArgOperand(0);
3604 // Clang emits an address space cast to the generic address space. Skip the
3605 // cast and use the input directly.
3606 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3607 pointer = cast->getPointerOperand();
3608 }
3609 Value *value = Call->getArgOperand(1);
alan-baker5641f5c2021-10-15 09:16:04 -04003610 Value *order_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
3611 Value *scope_arg = Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003612 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3613 clspv::AddressSpace::Global;
3614 auto scope = MemoryScope(scope_arg, is_global, Call);
3615 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
3616 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
3617 {pointer, scope, order, value});
3618 });
3619}
3620
3621bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
3622 return replaceCallsWithValue(F, [](CallInst *Call) {
3623 auto pointer = Call->getArgOperand(0);
3624 // Clang emits an address space cast to the generic address space. Skip the
3625 // cast and use the input directly.
3626 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3627 pointer = cast->getPointerOperand();
3628 }
3629 auto expected = Call->getArgOperand(1);
3630 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
3631 expected = cast->getPointerOperand();
3632 }
3633 auto value = Call->getArgOperand(2);
3634 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3635 clspv::AddressSpace::Global;
3636 Value *success_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04003637 Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003638 Value *failure_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04003639 Call->arg_size() > 4 ? Call->getArgOperand(4) : nullptr;
3640 Value *scope_arg = Call->arg_size() > 5 ? Call->getArgOperand(5) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003641 auto scope = MemoryScope(scope_arg, is_global, Call);
3642 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
3643 spv::MemorySemanticsAcquireReleaseMask);
3644 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
3645 spv::MemorySemanticsAcquireMask);
3646
3647 // If the value pointed to by |expected| equals the value pointed to by
3648 // |pointer|, |value| is written into |pointer|, otherwise the value in
3649 // |pointer| is written into |expected|. In order to avoid extra stores,
3650 // the basic block with the original atomic is split and the store is
3651 // performed in the |then| block. The condition is the inversion of the
3652 // comparison result.
3653 IRBuilder<> builder(Call);
alan-baker67d639b2022-05-09 11:23:31 -04003654 auto load = builder.CreateLoad(value->getType(), expected);
alan-baker4986eff2020-10-29 13:38:00 -04003655 auto cmp_xchg = InsertSPIRVOp(
3656 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
3657 value->getType(), {pointer, scope, success, failure, value, load});
3658 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
3659 auto not_cmp = builder.CreateNot(cmp);
3660 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
3661 builder.SetInsertPoint(then_branch);
3662 builder.CreateStore(cmp_xchg, expected);
3663 return cmp;
3664 });
3665}
alan-bakercc2bafb2020-11-02 08:30:18 -05003666
alan-baker2cecaa72020-11-05 14:05:20 -05003667bool ReplaceOpenCLBuiltinPass::replaceCountZeroes(Function &F, bool leading) {
alan-bakercc2bafb2020-11-02 08:30:18 -05003668 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3669 return false;
3670
3671 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
alan-baker5f2e88e2020-12-07 15:24:04 -05003672 if (bitwidth > 64)
alan-bakercc2bafb2020-11-02 08:30:18 -05003673 return false;
3674
alan-baker5f2e88e2020-12-07 15:24:04 -05003675 return replaceCallsWithValue(F, [&F, leading](CallInst *Call) {
3676 Function *intrinsic = Intrinsic::getDeclaration(
3677 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz,
3678 Call->getType());
3679 const auto c_false = ConstantInt::getFalse(Call->getContext());
3680 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
3681 {Call->getArgOperand(0), c_false}, "", Call);
alan-bakercc2bafb2020-11-02 08:30:18 -05003682 });
3683}
alan-baker6b9d1ee2020-11-03 23:11:32 -05003684
3685bool ReplaceOpenCLBuiltinPass::replaceMadSat(Function &F, bool is_signed) {
3686 return replaceCallsWithValue(F, [&F, is_signed, this](CallInst *Call) {
3687 const auto ty = Call->getType();
3688 const auto a = Call->getArgOperand(0);
3689 const auto b = Call->getArgOperand(1);
3690 const auto c = Call->getArgOperand(2);
3691 IRBuilder<> builder(Call);
3692 if (is_signed) {
3693 unsigned bitwidth = Call->getType()->getScalarSizeInBits();
3694 if (bitwidth < 32) {
3695 // mul = sext(a) * sext(b)
3696 // add = mul + sext(c)
3697 // res = clamp(add, MIN, MAX)
3698 unsigned extended_width = bitwidth << 1;
Romaric Jodin73ef1be2022-01-25 17:21:22 +01003699 if (clspv::Option::HackClampWidth() && extended_width < 32) {
3700 extended_width = 32;
3701 }
alan-baker6b9d1ee2020-11-03 23:11:32 -05003702 Type *extended_ty = IntegerType::get(F.getContext(), extended_width);
3703 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3704 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
3705 }
3706 auto a_sext = builder.CreateSExt(a, extended_ty);
3707 auto b_sext = builder.CreateSExt(b, extended_ty);
3708 auto c_sext = builder.CreateSExt(c, extended_ty);
3709 // Extended the size so no overflows occur.
3710 auto mul = builder.CreateMul(a_sext, b_sext, "", true, true);
3711 auto add = builder.CreateAdd(mul, c_sext, "", true, true);
3712 auto func_ty = FunctionType::get(
3713 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
3714 // Don't use function type because we need signed parameters.
3715 std::string clamp_name = Builtins::GetMangledFunctionName("clamp");
3716 // The clamp values are the signed min and max of the original bitwidth
3717 // sign extended to the extended bitwidth.
3718 Constant *min = ConstantInt::get(
3719 Call->getContext(),
3720 APInt::getSignedMinValue(bitwidth).sext(extended_width));
3721 Constant *max = ConstantInt::get(
3722 Call->getContext(),
3723 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
3724 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3725 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3726 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3727 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
3728 if (extended_width == 32)
3729 clamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
3730 else
3731 clamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
3732 } else {
3733 if (extended_width == 32)
3734 clamp_name += "iii";
3735 else
3736 clamp_name += "sss";
3737 }
3738 auto callee = F.getParent()->getOrInsertFunction(clamp_name, func_ty);
3739 auto clamp = builder.CreateCall(callee, {add, min, max});
3740 return builder.CreateTrunc(clamp, ty);
3741 } else {
alan-baker6b9d1ee2020-11-03 23:11:32 -05003742 // Compute
3743 // {hi, lo} = smul_extended(a, b)
3744 // add = lo + c
Romaric Jodinc507f312022-04-08 19:09:45 +02003745 auto mul_ext = InsertOpMulExtended(Call, a, b, true);
3746
alan-baker6b9d1ee2020-11-03 23:11:32 -05003747 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3748 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3749 auto add = builder.CreateAdd(mul_lo, c);
3750
3751 // Constants for use in the calculation.
3752 Constant *min = ConstantInt::get(Call->getContext(),
3753 APInt::getSignedMinValue(bitwidth));
3754 Constant *max = ConstantInt::get(Call->getContext(),
3755 APInt::getSignedMaxValue(bitwidth));
3756 Constant *max_plus_1 = ConstantInt::get(
3757 Call->getContext(),
3758 APInt::getSignedMaxValue(bitwidth) + APInt(bitwidth, 1));
3759 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3760 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3761 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3762 max_plus_1 =
3763 ConstantVector::getSplat(vec_ty->getElementCount(), max_plus_1);
3764 }
3765
3766 auto a_xor_b = builder.CreateXor(a, b);
3767 auto same_sign =
3768 builder.CreateICmpSGT(a_xor_b, Constant::getAllOnesValue(ty));
3769 auto different_sign = builder.CreateNot(same_sign);
3770 auto hi_eq_0 = builder.CreateICmpEQ(mul_hi, Constant::getNullValue(ty));
3771 auto hi_ne_0 = builder.CreateNot(hi_eq_0);
3772 auto lo_ge_max = builder.CreateICmpUGE(mul_lo, max);
3773 auto c_gt_0 = builder.CreateICmpSGT(c, Constant::getNullValue(ty));
3774 auto c_lt_0 = builder.CreateICmpSLT(c, Constant::getNullValue(ty));
3775 auto add_gt_max = builder.CreateICmpUGT(add, max);
3776 auto hi_eq_m1 =
3777 builder.CreateICmpEQ(mul_hi, Constant::getAllOnesValue(ty));
3778 auto hi_ne_m1 = builder.CreateNot(hi_eq_m1);
3779 auto lo_le_max_plus_1 = builder.CreateICmpULE(mul_lo, max_plus_1);
3780 auto max_sub_lo = builder.CreateSub(max, mul_lo);
3781 auto c_lt_max_sub_lo = builder.CreateICmpULT(c, max_sub_lo);
3782
3783 // Equivalent to:
3784 // if (((x < 0) == (y < 0)) && mul_hi != 0)
3785 // return MAX
3786 // if (mul_hi == 0 && mul_lo >= MAX && (z > 0 || add > MAX))
3787 // return MAX
3788 // if (((x < 0) != (y < 0)) && mul_hi != -1)
3789 // return MIN
3790 // if (hi == -1 && mul_lo <= (MAX + 1) && (z < 0 || z < (MAX - mul_lo))
3791 // return MIN
3792 // return add
3793 auto max_clamp_1 = builder.CreateAnd(same_sign, hi_ne_0);
3794 auto max_clamp_2 = builder.CreateOr(c_gt_0, add_gt_max);
3795 auto tmp = builder.CreateAnd(hi_eq_0, lo_ge_max);
3796 max_clamp_2 = builder.CreateAnd(tmp, max_clamp_2);
3797 auto max_clamp = builder.CreateOr(max_clamp_1, max_clamp_2);
3798 auto min_clamp_1 = builder.CreateAnd(different_sign, hi_ne_m1);
3799 auto min_clamp_2 = builder.CreateOr(c_lt_0, c_lt_max_sub_lo);
3800 tmp = builder.CreateAnd(hi_eq_m1, lo_le_max_plus_1);
3801 min_clamp_2 = builder.CreateAnd(tmp, min_clamp_2);
3802 auto min_clamp = builder.CreateOr(min_clamp_1, min_clamp_2);
3803 auto sel = builder.CreateSelect(min_clamp, min, add);
3804 return builder.CreateSelect(max_clamp, max, sel);
3805 }
3806 } else {
3807 // {lo, hi} = mul_extended(a, b)
3808 // {add, carry} = add_carry(lo, c)
3809 // cmp = (mul_hi | carry) == 0
3810 // mad_sat = cmp ? add : MAX
3811 auto struct_ty = GetPairStruct(ty);
Romaric Jodinc507f312022-04-08 19:09:45 +02003812 auto mul_ext = InsertOpMulExtended(Call, a, b, false);
alan-baker6b9d1ee2020-11-03 23:11:32 -05003813 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3814 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3815 auto add_carry =
3816 InsertSPIRVOp(Call, spv::OpIAddCarry, {Attribute::ReadNone},
3817 struct_ty, {mul_lo, c});
3818 auto add = builder.CreateExtractValue(add_carry, {0});
3819 auto carry = builder.CreateExtractValue(add_carry, {1});
3820 auto or_value = builder.CreateOr(mul_hi, carry);
3821 auto cmp = builder.CreateICmpEQ(or_value, Constant::getNullValue(ty));
3822 return builder.CreateSelect(cmp, add, Constant::getAllOnesValue(ty));
3823 }
3824 });
3825}
alan-baker15106572020-11-06 15:08:10 -05003826
3827bool ReplaceOpenCLBuiltinPass::replaceOrdered(Function &F, bool is_ordered) {
3828 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3829 return false;
3830
3831 if (F.getFunctionType()->getNumParams() != 2)
3832 return false;
3833
3834 if (F.getFunctionType()->getParamType(0) !=
3835 F.getFunctionType()->getParamType(1)) {
3836 return false;
3837 }
3838
3839 switch (F.getFunctionType()->getParamType(0)->getScalarType()->getTypeID()) {
3840 case Type::FloatTyID:
3841 case Type::HalfTyID:
3842 case Type::DoubleTyID:
3843 break;
3844 default:
3845 return false;
3846 }
3847
3848 // Scalar versions all return an int, while vector versions return a vector
3849 // of an equally sized integer types (e.g. short, int or long).
3850 if (isa<VectorType>(F.getReturnType())) {
3851 if (F.getReturnType()->getScalarSizeInBits() !=
3852 F.getFunctionType()->getParamType(0)->getScalarSizeInBits()) {
3853 return false;
3854 }
3855 } else {
3856 if (F.getReturnType()->getScalarSizeInBits() != 32)
3857 return false;
3858 }
3859
3860 return replaceCallsWithValue(F, [is_ordered](CallInst *Call) {
3861 // Replace with a floating point [un]ordered comparison followed by an
3862 // extension.
3863 auto x = Call->getArgOperand(0);
3864 auto y = Call->getArgOperand(1);
3865 IRBuilder<> builder(Call);
3866 Value *tmp = nullptr;
3867 if (is_ordered) {
3868 // This leads to a slight inefficiency in the SPIR-V that is easy for
3869 // drivers to optimize where the SPIR-V for the comparison and the
3870 // extension could be fused to drop the inversion of the OpIsNan.
3871 tmp = builder.CreateFCmpORD(x, y);
3872 } else {
3873 tmp = builder.CreateFCmpUNO(x, y);
3874 }
3875 // OpenCL CTS requires that vector versions use sign extension, but scalar
3876 // versions use zero extension.
3877 if (isa<VectorType>(Call->getType()))
3878 return builder.CreateSExt(tmp, Call->getType());
3879 return builder.CreateZExt(tmp, Call->getType());
3880 });
3881}
alan-baker497920b2020-11-09 16:41:36 -05003882
3883bool ReplaceOpenCLBuiltinPass::replaceIsNormal(Function &F) {
alan-baker67d639b2022-05-09 11:23:31 -04003884 return replaceCallsWithValue(F, [](CallInst *Call) {
alan-baker497920b2020-11-09 16:41:36 -05003885 auto ty = Call->getType();
3886 auto x = Call->getArgOperand(0);
3887 unsigned width = x->getType()->getScalarSizeInBits();
3888 Type *int_ty = IntegerType::get(Call->getContext(), width);
3889 uint64_t abs_mask = 0x7fffffff;
3890 uint64_t exp_mask = 0x7f800000;
3891 uint64_t min_mask = 0x00800000;
3892 if (width == 16) {
3893 abs_mask = 0x7fff;
3894 exp_mask = 0x7c00;
3895 min_mask = 0x0400;
3896 } else if (width == 64) {
3897 abs_mask = 0x7fffffffffffffff;
3898 exp_mask = 0x7ff0000000000000;
3899 min_mask = 0x0010000000000000;
3900 }
3901 Constant *abs_const = ConstantInt::get(int_ty, APInt(width, abs_mask));
3902 Constant *exp_const = ConstantInt::get(int_ty, APInt(width, exp_mask));
3903 Constant *min_const = ConstantInt::get(int_ty, APInt(width, min_mask));
3904 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3905 int_ty = VectorType::get(int_ty, vec_ty->getElementCount());
3906 abs_const =
3907 ConstantVector::getSplat(vec_ty->getElementCount(), abs_const);
3908 exp_const =
3909 ConstantVector::getSplat(vec_ty->getElementCount(), exp_const);
3910 min_const =
3911 ConstantVector::getSplat(vec_ty->getElementCount(), min_const);
3912 }
3913 // Drop the sign bit and then check that the number is between
3914 // (exclusive) the min and max exponent values for the bit width.
3915 IRBuilder<> builder(Call);
3916 auto bitcast = builder.CreateBitCast(x, int_ty);
3917 auto abs = builder.CreateAnd(bitcast, abs_const);
3918 auto lt = builder.CreateICmpULT(abs, exp_const);
3919 auto ge = builder.CreateICmpUGE(abs, min_const);
3920 auto tmp = builder.CreateAnd(lt, ge);
3921 // OpenCL CTS requires that vector versions use sign extension, but scalar
3922 // versions use zero extension.
3923 if (isa<VectorType>(ty))
3924 return builder.CreateSExt(tmp, ty);
3925 return builder.CreateZExt(tmp, ty);
3926 });
3927}
alan-bakere0406e72020-11-10 12:32:04 -05003928
3929bool ReplaceOpenCLBuiltinPass::replaceFDim(Function &F) {
3930 return replaceCallsWithValue(F, [](CallInst *Call) {
3931 const auto x = Call->getArgOperand(0);
3932 const auto y = Call->getArgOperand(1);
3933 IRBuilder<> builder(Call);
3934 auto sub = builder.CreateFSub(x, y);
3935 auto cmp = builder.CreateFCmpUGT(x, y);
3936 return builder.CreateSelect(cmp, sub,
3937 Constant::getNullValue(Call->getType()));
3938 });
3939}
alan-baker3e0de472020-12-08 15:57:17 -05003940
3941bool ReplaceOpenCLBuiltinPass::replaceRound(Function &F) {
3942 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3943 const auto x = Call->getArgOperand(0);
3944 const double c_halfway = 0.5;
3945 auto halfway = ConstantFP::get(Call->getType(), c_halfway);
3946
3947 const auto clspv_fract_name =
3948 Builtins::GetMangledFunctionName("clspv.fract", F.getFunctionType());
3949 Function *clspv_fract_fn = F.getParent()->getFunction(clspv_fract_name);
3950 if (!clspv_fract_fn) {
3951 // Make the clspv_fract function.
3952 clspv_fract_fn = cast<Function>(
3953 F.getParent()
3954 ->getOrInsertFunction(clspv_fract_name, F.getFunctionType())
3955 .getCallee());
3956 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3957 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3958 }
3959
3960 auto ceil = Intrinsic::getDeclaration(F.getParent(), Intrinsic::ceil,
3961 Call->getType());
3962 auto floor = Intrinsic::getDeclaration(F.getParent(), Intrinsic::floor,
3963 Call->getType());
3964 auto fabs = Intrinsic::getDeclaration(F.getParent(), Intrinsic::fabs,
3965 Call->getType());
3966 auto copysign = Intrinsic::getDeclaration(
3967 F.getParent(), Intrinsic::copysign, {Call->getType(), Call->getType()});
3968
3969 IRBuilder<> builder(Call);
3970
3971 auto fabs_call = builder.CreateCall(F.getFunctionType(), fabs, {x});
3972 auto ceil_call = builder.CreateCall(F.getFunctionType(), ceil, {fabs_call});
3973 auto floor_call =
3974 builder.CreateCall(F.getFunctionType(), floor, {fabs_call});
3975 auto fract_call =
3976 builder.CreateCall(F.getFunctionType(), clspv_fract_fn, {fabs_call});
3977 auto cmp = builder.CreateFCmpOGE(fract_call, halfway);
3978 auto sel = builder.CreateSelect(cmp, ceil_call, floor_call);
3979 return builder.CreateCall(copysign->getFunctionType(), copysign, {sel, x});
3980 });
3981}
3982
3983bool ReplaceOpenCLBuiltinPass::replaceTrigPi(Function &F,
3984 Builtins::BuiltinType type) {
3985 return replaceCallsWithValue(F, [&F, type](CallInst *Call) -> Value * {
3986 const auto x = Call->getArgOperand(0);
3987 const double k_pi = 0x1.921fb54442d18p+1;
3988 Constant *pi = ConstantFP::get(x->getType(), k_pi);
3989
3990 IRBuilder<> builder(Call);
3991 auto mul = builder.CreateFMul(x, pi);
3992 switch (type) {
3993 case Builtins::kSinpi: {
3994 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3995 x->getType());
3996 return builder.CreateCall(func->getFunctionType(), func, {mul});
3997 }
3998 case Builtins::kCospi: {
3999 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
4000 x->getType());
4001 return builder.CreateCall(func->getFunctionType(), func, {mul});
4002 }
4003 case Builtins::kTanpi: {
4004 auto sin = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
4005 x->getType());
4006 auto sin_call = builder.CreateCall(sin->getFunctionType(), sin, {mul});
4007 auto cos = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
4008 x->getType());
4009 auto cos_call = builder.CreateCall(cos->getFunctionType(), cos, {mul});
4010 return builder.CreateFDiv(sin_call, cos_call);
4011 }
4012 default:
4013 llvm_unreachable("unexpected builtin");
4014 break;
4015 }
4016 return nullptr;
4017 });
4018}
alan-baker8b968112020-12-15 15:53:29 -05004019
4020bool ReplaceOpenCLBuiltinPass::replaceSincos(Function &F) {
4021 return replaceCallsWithValue(F, [&F](CallInst *Call) {
4022 auto sin_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
4023 Call->getType());
4024 auto cos_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
4025 Call->getType());
4026
4027 IRBuilder<> builder(Call);
4028 auto sin = builder.CreateCall(sin_func->getFunctionType(), sin_func,
4029 {Call->getArgOperand(0)});
4030 auto cos = builder.CreateCall(cos_func->getFunctionType(), cos_func,
4031 {Call->getArgOperand(0)});
4032 builder.CreateStore(cos, Call->getArgOperand(1));
4033 return sin;
4034 });
4035}
4036
4037bool ReplaceOpenCLBuiltinPass::replaceExpm1(Function &F) {
4038 return replaceCallsWithValue(F, [&F](CallInst *Call) {
4039 auto exp_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::exp,
4040 Call->getType());
4041
4042 IRBuilder<> builder(Call);
4043 auto exp = builder.CreateCall(exp_func->getFunctionType(), exp_func,
4044 {Call->getArgOperand(0)});
4045 return builder.CreateFSub(exp, ConstantFP::get(Call->getType(), 1.0));
4046 });
4047}
4048
4049bool ReplaceOpenCLBuiltinPass::replacePown(Function &F) {
4050 return replaceCallsWithValue(F, [&F](CallInst *Call) {
4051 auto pow_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::pow,
4052 Call->getType());
4053
4054 IRBuilder<> builder(Call);
4055 auto conv = builder.CreateSIToFP(Call->getArgOperand(1), Call->getType());
4056 return builder.CreateCall(pow_func->getFunctionType(), pow_func,
4057 {Call->getArgOperand(0), conv});
4058 });
4059}