blob: 419abe9fa3085c2019c182b9be15559e9d151027 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
alan-baker57ce1c22022-04-26 19:10:44 -040039#include "ReplaceOpenCLBuiltinPass.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040040#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
David Neto22f144c2017-06-12 14:26:21 -040046namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000047
David Neto22f144c2017-06-12 14:26:21 -040048uint32_t clz(uint32_t v) {
49 uint32_t r;
50 uint32_t shift;
51
52 r = (v > 0xFFFF) << 4;
53 v >>= r;
54 shift = (v > 0xFF) << 3;
55 v >>= shift;
56 r |= shift;
57 shift = (v > 0xF) << 2;
58 v >>= shift;
59 r |= shift;
60 shift = (v > 0x3) << 1;
61 v >>= shift;
62 r |= shift;
63 r |= (v >> 1);
64
65 return r;
66}
67
Kévin Petitfdfa92e2019-09-25 14:20:58 +010068Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
69 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040070 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040071 IntTy = FixedVectorType::get(IntTy,
72 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010073 }
74 return IntTy;
75}
76
alan-baker4986eff2020-10-29 13:38:00 -040077Value *MemoryOrderSemantics(Value *order, bool is_global,
78 Instruction *InsertBefore,
alan-baker36309f92021-02-05 12:28:03 -050079 spv::MemorySemanticsMask base_semantics,
80 bool include_storage = true) {
alan-baker4986eff2020-10-29 13:38:00 -040081 enum AtomicMemoryOrder : uint32_t {
82 kMemoryOrderRelaxed = 0,
83 kMemoryOrderAcquire = 2,
84 kMemoryOrderRelease = 3,
85 kMemoryOrderAcqRel = 4,
86 kMemoryOrderSeqCst = 5
87 };
88
89 IRBuilder<> builder(InsertBefore);
90
91 // Constants for OpenCL C 2.0 memory_order.
92 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
93 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
94 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
95 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
96
97 // Constants for SPIR-V ordering memory semantics.
98 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
99 const auto AcquireSemantics =
100 builder.getInt32(spv::MemorySemanticsAcquireMask);
101 const auto ReleaseSemantics =
102 builder.getInt32(spv::MemorySemanticsReleaseMask);
103 const auto AcqRelSemantics =
104 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
105
106 // Constants for SPIR-V storage class semantics.
107 const auto UniformSemantics =
108 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
109 const auto WorkgroupSemantics =
110 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
111
112 // Instead of sequentially consistent, use acquire, release or acquire
113 // release semantics.
114 Value *base_order = nullptr;
115 switch (base_semantics) {
116 case spv::MemorySemanticsAcquireMask:
117 base_order = AcquireSemantics;
118 break;
119 case spv::MemorySemanticsReleaseMask:
120 base_order = ReleaseSemantics;
121 break;
122 default:
123 base_order = AcqRelSemantics;
124 break;
125 }
126
127 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
alan-baker36309f92021-02-05 12:28:03 -0500128 if (order == nullptr) {
129 if (include_storage)
130 return builder.CreateOr({storage, base_order});
131 else
132 return base_order;
133 }
alan-baker4986eff2020-10-29 13:38:00 -0400134
135 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
136 auto is_acquire = builder.CreateICmpEQ(order, acquire);
137 auto is_release = builder.CreateICmpEQ(order, release);
138 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
139 auto semantics =
140 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
141 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
142 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
143 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
alan-baker36309f92021-02-05 12:28:03 -0500144 if (include_storage)
145 return builder.CreateOr({storage, semantics});
146 else
147 return semantics;
alan-baker4986eff2020-10-29 13:38:00 -0400148}
149
150Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
151 enum AtomicMemoryScope : uint32_t {
152 kMemoryScopeWorkItem = 0,
153 kMemoryScopeWorkGroup = 1,
154 kMemoryScopeDevice = 2,
155 kMemoryScopeAllSVMDevices = 3, // not supported
156 kMemoryScopeSubGroup = 4
157 };
158
159 IRBuilder<> builder(InsertBefore);
160
161 // Constants for OpenCL C 2.0 memory_scope.
162 const auto work_item =
163 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
164 const auto work_group =
165 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
166 const auto sub_group =
167 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
168 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
169
170 // Constants for SPIR-V memory scopes.
171 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
172 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
173 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
174 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
175
176 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
177 if (scope == nullptr)
178 return base_scope;
179
180 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
181 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
182 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
183 auto is_device = builder.CreateICmpEQ(scope, device);
184
185 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
186 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
187 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
188 scope = builder.CreateSelect(is_device, DeviceScope, scope);
189
190 return scope;
191}
192
SJW2c317da2020-03-23 07:39:13 -0500193bool replaceCallsWithValue(Function &F,
194 std::function<Value *(CallInst *)> Replacer) {
195
196 bool Changed = false;
197
198 SmallVector<Instruction *, 4> ToRemoves;
199
200 // Walk the users of the function.
201 for (auto &U : F.uses()) {
202 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
203
204 auto NewValue = Replacer(CI);
205
206 if (NewValue != nullptr) {
207 CI->replaceAllUsesWith(NewValue);
208
209 // Lastly, remember to remove the user.
210 ToRemoves.push_back(CI);
211 }
212 }
213 }
214
215 Changed = !ToRemoves.empty();
216
217 // And cleanup the calls we don't use anymore.
218 for (auto V : ToRemoves) {
219 V->eraseFromParent();
220 }
221
222 return Changed;
223}
224
Kévin Petit91bc72e2019-04-08 15:17:46 +0100225} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400226
alan-baker57ce1c22022-04-26 19:10:44 -0400227PreservedAnalyses ReplaceOpenCLBuiltinPass::run(Module &M,
228 ModuleAnalysisManager &MPM) {
229 PreservedAnalyses PA;
SJW2c317da2020-03-23 07:39:13 -0500230 std::list<Function *> func_list;
231 for (auto &F : M.getFunctionList()) {
232 // process only function declarations
233 if (F.isDeclaration() && runOnFunction(F)) {
234 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000235 }
236 }
SJW2c317da2020-03-23 07:39:13 -0500237 if (func_list.size() != 0) {
238 // recursively convert functions, but first remove dead
239 for (auto *F : func_list) {
240 if (F->use_empty()) {
241 F->eraseFromParent();
242 }
243 }
alan-baker57ce1c22022-04-26 19:10:44 -0400244 PA = run(M, MPM);
245 return PA;
SJW2c317da2020-03-23 07:39:13 -0500246 }
alan-baker57ce1c22022-04-26 19:10:44 -0400247 return PA;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000248}
249
SJW2c317da2020-03-23 07:39:13 -0500250bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
251 auto &FI = Builtins::Lookup(&F);
252 switch (FI.getType()) {
253 case Builtins::kAbs:
254 if (!FI.getParameter(0).is_signed) {
255 return replaceAbs(F);
256 }
257 break;
258 case Builtins::kAbsDiff:
259 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400260
261 case Builtins::kAddSat:
alan-baker3f1bf492020-11-05 09:07:36 -0500262 return replaceAddSubSat(F, FI.getParameter(0).is_signed, true);
alan-bakera52b7312020-10-26 08:58:51 -0400263
alan-bakercc2bafb2020-11-02 08:30:18 -0500264 case Builtins::kClz:
alan-baker2cecaa72020-11-05 14:05:20 -0500265 return replaceCountZeroes(F, true);
266
267 case Builtins::kCtz:
268 return replaceCountZeroes(F, false);
alan-bakercc2bafb2020-11-02 08:30:18 -0500269
alan-bakerb6da5132020-10-29 15:59:06 -0400270 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000271 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400272 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000273 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400274
SJW2c317da2020-03-23 07:39:13 -0500275 case Builtins::kCopysign:
276 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100277
SJW2c317da2020-03-23 07:39:13 -0500278 case Builtins::kNativeRecip:
Romaric Jodinef707982022-07-22 16:25:19 +0200279 return replaceNativeRecip(F);
SJW2c317da2020-03-23 07:39:13 -0500280
281 case Builtins::kDot:
282 return replaceDot(F);
283
284 case Builtins::kExp10:
285 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500286 case Builtins::kNativeExp10:
287 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500288
alan-baker8b968112020-12-15 15:53:29 -0500289 case Builtins::kExpm1:
290 return replaceExpm1(F);
291
SJW2c317da2020-03-23 07:39:13 -0500292 case Builtins::kLog10:
293 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500294 case Builtins::kNativeLog10:
295 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500296
gnl21636e7992020-09-09 16:08:16 +0100297 case Builtins::kLog1p:
298 return replaceLog1p(F);
299
alan-bakere0406e72020-11-10 12:32:04 -0500300 case Builtins::kFdim:
301 return replaceFDim(F);
302
SJW2c317da2020-03-23 07:39:13 -0500303 case Builtins::kFmod:
304 return replaceFmod(F);
305
alan-baker8b968112020-12-15 15:53:29 -0500306 case Builtins::kPown:
307 return replacePown(F);
308
alan-baker3e0de472020-12-08 15:57:17 -0500309 case Builtins::kRound:
310 return replaceRound(F);
311
312 case Builtins::kCospi:
313 case Builtins::kSinpi:
314 case Builtins::kTanpi:
315 return replaceTrigPi(F, FI.getType());
316
alan-baker8b968112020-12-15 15:53:29 -0500317 case Builtins::kSincos:
318 return replaceSincos(F);
319
SJW2c317da2020-03-23 07:39:13 -0500320 case Builtins::kBarrier:
321 case Builtins::kWorkGroupBarrier:
322 return replaceBarrier(F);
323
alan-baker12d2c182020-07-20 08:22:42 -0400324 case Builtins::kSubGroupBarrier:
325 return replaceBarrier(F, true);
326
alan-baker36309f92021-02-05 12:28:03 -0500327 case Builtins::kAtomicWorkItemFence:
328 return replaceMemFence(F, spv::MemorySemanticsMaskNone);
SJW2c317da2020-03-23 07:39:13 -0500329 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400330 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500331 case Builtins::kReadMemFence:
332 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
333 case Builtins::kWriteMemFence:
334 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
335
336 // Relational
337 case Builtins::kIsequal:
alan-baker3e217772020-11-07 17:29:40 -0500338 return replaceRelational(F, CmpInst::FCMP_OEQ);
SJW2c317da2020-03-23 07:39:13 -0500339 case Builtins::kIsgreater:
alan-baker3e217772020-11-07 17:29:40 -0500340 return replaceRelational(F, CmpInst::FCMP_OGT);
SJW2c317da2020-03-23 07:39:13 -0500341 case Builtins::kIsgreaterequal:
alan-baker3e217772020-11-07 17:29:40 -0500342 return replaceRelational(F, CmpInst::FCMP_OGE);
SJW2c317da2020-03-23 07:39:13 -0500343 case Builtins::kIsless:
alan-baker3e217772020-11-07 17:29:40 -0500344 return replaceRelational(F, CmpInst::FCMP_OLT);
SJW2c317da2020-03-23 07:39:13 -0500345 case Builtins::kIslessequal:
alan-baker3e217772020-11-07 17:29:40 -0500346 return replaceRelational(F, CmpInst::FCMP_OLE);
SJW2c317da2020-03-23 07:39:13 -0500347 case Builtins::kIsnotequal:
alan-baker3e217772020-11-07 17:29:40 -0500348 return replaceRelational(F, CmpInst::FCMP_UNE);
349 case Builtins::kIslessgreater:
350 return replaceRelational(F, CmpInst::FCMP_ONE);
SJW2c317da2020-03-23 07:39:13 -0500351
alan-baker15106572020-11-06 15:08:10 -0500352 case Builtins::kIsordered:
353 return replaceOrdered(F, true);
354
355 case Builtins::kIsunordered:
356 return replaceOrdered(F, false);
357
SJW2c317da2020-03-23 07:39:13 -0500358 case Builtins::kIsinf: {
359 bool is_vec = FI.getParameter(0).vector_size != 0;
360 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
361 }
362 case Builtins::kIsnan: {
363 bool is_vec = FI.getParameter(0).vector_size != 0;
364 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
365 }
366
367 case Builtins::kIsfinite:
368 return replaceIsFinite(F);
369
370 case Builtins::kAll: {
371 bool is_vec = FI.getParameter(0).vector_size != 0;
372 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
373 }
374 case Builtins::kAny: {
375 bool is_vec = FI.getParameter(0).vector_size != 0;
376 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
377 }
378
alan-baker497920b2020-11-09 16:41:36 -0500379 case Builtins::kIsnormal:
380 return replaceIsNormal(F);
381
SJW2c317da2020-03-23 07:39:13 -0500382 case Builtins::kUpsample:
383 return replaceUpsample(F);
384
385 case Builtins::kRotate:
386 return replaceRotate(F);
387
388 case Builtins::kConvert:
389 return replaceConvert(F, FI.getParameter(0).is_signed,
390 FI.getReturnType().is_signed);
391
alan-baker4986eff2020-10-29 13:38:00 -0400392 // OpenCL 2.0 explicit atomics have different default scopes and semantics
393 // than legacy atomic functions.
394 case Builtins::kAtomicLoad:
395 case Builtins::kAtomicLoadExplicit:
396 return replaceAtomicLoad(F);
397 case Builtins::kAtomicStore:
398 case Builtins::kAtomicStoreExplicit:
399 return replaceExplicitAtomics(F, spv::OpAtomicStore,
400 spv::MemorySemanticsReleaseMask);
401 case Builtins::kAtomicExchange:
402 case Builtins::kAtomicExchangeExplicit:
403 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
404 case Builtins::kAtomicFetchAdd:
405 case Builtins::kAtomicFetchAddExplicit:
406 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
407 case Builtins::kAtomicFetchSub:
408 case Builtins::kAtomicFetchSubExplicit:
409 return replaceExplicitAtomics(F, spv::OpAtomicISub);
410 case Builtins::kAtomicFetchOr:
411 case Builtins::kAtomicFetchOrExplicit:
412 return replaceExplicitAtomics(F, spv::OpAtomicOr);
413 case Builtins::kAtomicFetchXor:
414 case Builtins::kAtomicFetchXorExplicit:
415 return replaceExplicitAtomics(F, spv::OpAtomicXor);
416 case Builtins::kAtomicFetchAnd:
417 case Builtins::kAtomicFetchAndExplicit:
418 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
419 case Builtins::kAtomicFetchMin:
420 case Builtins::kAtomicFetchMinExplicit:
421 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
422 ? spv::OpAtomicSMin
423 : spv::OpAtomicUMin);
424 case Builtins::kAtomicFetchMax:
425 case Builtins::kAtomicFetchMaxExplicit:
426 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
427 ? spv::OpAtomicSMax
428 : spv::OpAtomicUMax);
429 // Weak compare exchange is generated as strong compare exchange.
430 case Builtins::kAtomicCompareExchangeWeak:
431 case Builtins::kAtomicCompareExchangeWeakExplicit:
432 case Builtins::kAtomicCompareExchangeStrong:
433 case Builtins::kAtomicCompareExchangeStrongExplicit:
434 return replaceAtomicCompareExchange(F);
435
436 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500437 case Builtins::kAtomicInc:
438 return replaceAtomics(F, spv::OpAtomicIIncrement);
439 case Builtins::kAtomicDec:
440 return replaceAtomics(F, spv::OpAtomicIDecrement);
441 case Builtins::kAtomicCmpxchg:
442 return replaceAtomics(F, spv::OpAtomicCompareExchange);
443 case Builtins::kAtomicAdd:
444 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
445 case Builtins::kAtomicSub:
446 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
447 case Builtins::kAtomicXchg:
448 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
449 case Builtins::kAtomicMin:
450 return replaceAtomics(F, FI.getParameter(0).is_signed
451 ? llvm::AtomicRMWInst::Min
452 : llvm::AtomicRMWInst::UMin);
453 case Builtins::kAtomicMax:
454 return replaceAtomics(F, FI.getParameter(0).is_signed
455 ? llvm::AtomicRMWInst::Max
456 : llvm::AtomicRMWInst::UMax);
457 case Builtins::kAtomicAnd:
458 return replaceAtomics(F, llvm::AtomicRMWInst::And);
459 case Builtins::kAtomicOr:
460 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
461 case Builtins::kAtomicXor:
462 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
463
464 case Builtins::kCross:
465 if (FI.getParameter(0).vector_size == 4) {
466 return replaceCross(F);
467 }
468 break;
469
470 case Builtins::kFract:
471 if (FI.getParameterCount()) {
472 return replaceFract(F, FI.getParameter(0).vector_size);
473 }
474 break;
475
476 case Builtins::kMadHi:
477 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
478 case Builtins::kMulHi:
479 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
480
alan-baker6b9d1ee2020-11-03 23:11:32 -0500481 case Builtins::kMadSat:
482 return replaceMadSat(F, FI.getParameter(0).is_signed);
483
SJW2c317da2020-03-23 07:39:13 -0500484 case Builtins::kMad:
485 case Builtins::kMad24:
486 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
487 true);
488 case Builtins::kMul24:
489 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
490 false);
491
492 case Builtins::kSelect:
493 return replaceSelect(F);
494
495 case Builtins::kBitselect:
496 return replaceBitSelect(F);
497
498 case Builtins::kVload:
499 return replaceVload(F);
500
501 case Builtins::kVloadaHalf:
Romaric Jodin71fdb322022-05-03 17:01:10 +0200502 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size,
503 true);
SJW2c317da2020-03-23 07:39:13 -0500504 case Builtins::kVloadHalf:
Romaric Jodin71fdb322022-05-03 17:01:10 +0200505 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size,
506 false);
SJW2c317da2020-03-23 07:39:13 -0500507
508 case Builtins::kVstore:
509 return replaceVstore(F);
510
SJW2c317da2020-03-23 07:39:13 -0500511 case Builtins::kVstoreaHalf:
Romaric Jodin71fdb322022-05-03 17:01:10 +0200512 return replaceVstoreHalf(F, FI.getParameter(0).vector_size, true);
513 case Builtins::kVstoreHalf:
514 return replaceVstoreHalf(F, FI.getParameter(0).vector_size, false);
SJW2c317da2020-03-23 07:39:13 -0500515
516 case Builtins::kSmoothstep: {
517 int vec_size = FI.getLastParameter().vector_size;
518 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500519 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500520 }
521 break;
522 }
523 case Builtins::kStep: {
524 int vec_size = FI.getLastParameter().vector_size;
525 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500526 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500527 }
528 break;
529 }
530
531 case Builtins::kSignbit:
532 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
533
alan-baker3f1bf492020-11-05 09:07:36 -0500534 case Builtins::kSubSat:
535 return replaceAddSubSat(F, FI.getParameter(0).is_signed, false);
536
SJW2c317da2020-03-23 07:39:13 -0500537 case Builtins::kReadImageh:
538 return replaceHalfReadImage(F);
539 case Builtins::kReadImagef:
540 case Builtins::kReadImagei:
541 case Builtins::kReadImageui: {
542 if (FI.getParameter(1).isSampler() &&
543 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
544 return replaceSampledReadImageWithIntCoords(F);
545 }
546 break;
547 }
548
549 case Builtins::kWriteImageh:
550 return replaceHalfWriteImage(F);
551
Kévin Petit1cb45112020-04-27 18:55:48 +0100552 case Builtins::kPrefetch:
553 return replacePrefetch(F);
554
rjodinchr791203f2021-10-07 20:42:41 +0200555 // Asynchronous copies
556 case Builtins::kAsyncWorkGroupCopy:
alan-baker67d639b2022-05-09 11:23:31 -0400557 return replaceAsyncWorkGroupCopy(
558 F, FI.getParameter(0).DataType(F.getParent()->getContext()));
rjodinchr791203f2021-10-07 20:42:41 +0200559 case Builtins::kAsyncWorkGroupStridedCopy:
alan-baker67d639b2022-05-09 11:23:31 -0400560 return replaceAsyncWorkGroupStridedCopy(
561 F, FI.getParameter(0).DataType(F.getParent()->getContext()));
rjodinchr791203f2021-10-07 20:42:41 +0200562 case Builtins::kWaitGroupEvents:
563 return replaceWaitGroupEvents(F);
564
SJW2c317da2020-03-23 07:39:13 -0500565 default:
566 break;
567 }
568
569 return false;
570}
571
alan-baker6b9d1ee2020-11-03 23:11:32 -0500572Type *ReplaceOpenCLBuiltinPass::GetPairStruct(Type *type) {
573 auto iter = PairStructMap.find(type);
574 if (iter != PairStructMap.end())
575 return iter->second;
576
577 auto new_struct = StructType::get(type->getContext(), {type, type});
578 PairStructMap[type] = new_struct;
579 return new_struct;
580}
581
Romaric Jodinc507f312022-04-08 19:09:45 +0200582Value *ReplaceOpenCLBuiltinPass::InsertOpMulExtended(Instruction *InsertPoint,
583 Value *a, Value *b,
alan-baker57ce1c22022-04-26 19:10:44 -0400584 bool IsSigned,
585 bool Int64) {
Romaric Jodinc507f312022-04-08 19:09:45 +0200586
587 Type *Ty = a->getType();
588 Type *RetTy = GetPairStruct(a->getType());
589 assert(Ty == b->getType());
590
591 if (!Option::HackMulExtended()) {
592 spv::Op opcode = IsSigned ? spv::OpSMulExtended : spv::OpUMulExtended;
593
594 return clspv::InsertSPIRVOp(InsertPoint, opcode, {Attribute::ReadNone},
595 RetTy, {a, b});
596 }
597
598 unsigned int ScalarSizeInBits = Ty->getScalarSizeInBits();
599 bool IsVector = Ty->isVectorTy();
600
601 IRBuilder<> Builder(InsertPoint);
602
603 if (ScalarSizeInBits < 32 || (ScalarSizeInBits == 32 && Int64)) {
604 /*
605 * {mul_lo, mul_hi} = OpMulExtended(a, b, IsSigned) {
606 * S = SizeInBits(a)
607 * a_ext = ext2S(a, IsSigned)
608 * b_ext = ext2S(b, IsSigned)
609 * mul = a_ext * b_ext
610 * mul_lo = truncS(mul)
611 * mul_hi = truncS(mul >> S)
612 * return {mul_lo, mul_hi}
613 * }
614 */
615 Type *TyTimes2 =
616 Ty->getIntNTy(InsertPoint->getContext(), ScalarSizeInBits * 2);
617 if (IsVector) {
618 TyTimes2 = VectorType::get(TyTimes2, dyn_cast<VectorType>(Ty));
619 }
620 Value *aExtended, *bExtended;
621 if (IsSigned) {
622 aExtended = Builder.CreateSExt(a, TyTimes2);
623 bExtended = Builder.CreateSExt(b, TyTimes2);
624 } else {
625 aExtended = Builder.CreateZExt(a, TyTimes2);
626 bExtended = Builder.CreateZExt(b, TyTimes2);
627 }
628 auto mul = Builder.CreateMul(aExtended, bExtended);
629 auto mul_lo = Builder.CreateTrunc(mul, Ty);
630 auto mul_hi =
631 Builder.CreateTrunc(Builder.CreateLShr(mul, ScalarSizeInBits), Ty);
632
633 return Builder.CreateInsertValue(
634 Builder.CreateInsertValue(UndefValue::get(RetTy), mul_lo, {0}), mul_hi,
635 {1});
636 } else if (ScalarSizeInBits == 64 || (ScalarSizeInBits == 32 && !Int64)) {
637 /*
638 * {mul_lo, mul_hi} = OpMulExtended(a, b, IsSigned) {
639 * S = SizeInBits(a)
640 * hS = S / 2
641 * if (IsSigned) {
642 * res_neg = (a > 0) ^ (b > 0) = (a ^ b) < 0
643 * a = abs(a)
644 * b = abs(b)
645 * }
646 * a0 = trunchS(a)
647 * a1 = trunchS(a >> hS)
648 * b0 = trunchS(b)
649 * b1 = trunchS(b >> hS)
650 * {a0b0_0, a0b0_1} = zextS(OpUMulExtended(a0, b0))
651 * {a1b0_0, a1b0_1} = zextS(OpUMulExtended(a1, b0))
652 * {a0b1_0, a0b1_1} = zextS(OpUMulExtended(a0, b1))
653 * {a1b1_0, a1b1_1} = zextS(OpUMulExtended(a1, b1))
654 *
655 * mul_lo_hi = a0b0_1 + a1b0_0 + a0b1_0
656 * carry_mul_lo_hi = mul_lo_hi >> hS
657 * mul_hi_lo = a1b1_0 + a1b0_1 + a0b1_1 + carry_mul_lo_hi
658 * mul_lo = a0b0_0 + mul_lo_hi << hS
659 * mul_hi = mul_hi_lo + a1b1_1 << hS
660 *
661 * if (IsSigned) {
662 * mul_lo_xor = mul_lo ^ -1
663 * {mul_lo_inv, carry} = OpIAddCarry(mul_lo_xor, 1)
664 * mul_hi_inv = mul_hi ^ -1 + carry
665 * mul_lo = res_neg ? mul_lo_inv : mul_lo
666 * mul_hi = res_neg ? mul_hi_inv : mul_hi
667 * }
668 * return {mul_lo, mul_hi}
669 * }
670 */
671 Type *TyDiv2 =
672 Ty->getIntNTy(InsertPoint->getContext(), ScalarSizeInBits / 2);
673 if (IsVector) {
674 TyDiv2 = VectorType::get(TyDiv2, dyn_cast<VectorType>(Ty));
675 }
676
677 Value *res_neg;
678 if (IsSigned) {
679 // We want to work with unsigned value.
680 // Convert everything to unsigned and remember the signed of the end
681 // result.
682 auto a_b_xor = Builder.CreateXor(a, b);
683 res_neg = Builder.CreateICmpSLT(a_b_xor, ConstantInt::get(Ty, 0, true));
684
685 auto F = InsertPoint->getFunction();
686 auto abs = Intrinsic::getDeclaration(F->getParent(), Intrinsic::abs, Ty);
687 a = Builder.CreateCall(abs, {a, Builder.getInt1(false)});
688 b = Builder.CreateCall(abs, {b, Builder.getInt1(false)});
689 }
690
691 auto a0 = Builder.CreateTrunc(a, TyDiv2);
692 auto a1 = Builder.CreateTrunc(Builder.CreateLShr(a, ScalarSizeInBits / 2),
693 TyDiv2);
694 auto b0 = Builder.CreateTrunc(b, TyDiv2);
695 auto b1 = Builder.CreateTrunc(Builder.CreateLShr(b, ScalarSizeInBits / 2),
696 TyDiv2);
697
698 auto a0b0 = InsertOpMulExtended(InsertPoint, a0, b0, false, true);
699 auto a1b0 = InsertOpMulExtended(InsertPoint, a1, b0, false, true);
700 auto a0b1 = InsertOpMulExtended(InsertPoint, a0, b1, false, true);
701 auto a1b1 = InsertOpMulExtended(InsertPoint, a1, b1, false, true);
702 auto a0b0_0 = Builder.CreateZExt(Builder.CreateExtractValue(a0b0, {0}), Ty);
703 auto a0b0_1 = Builder.CreateZExt(Builder.CreateExtractValue(a0b0, {1}), Ty);
704 auto a1b0_0 = Builder.CreateZExt(Builder.CreateExtractValue(a1b0, {0}), Ty);
705 auto a1b0_1 = Builder.CreateZExt(Builder.CreateExtractValue(a1b0, {1}), Ty);
706 auto a0b1_0 = Builder.CreateZExt(Builder.CreateExtractValue(a0b1, {0}), Ty);
707 auto a0b1_1 = Builder.CreateZExt(Builder.CreateExtractValue(a0b1, {1}), Ty);
708 auto a1b1_0 = Builder.CreateZExt(Builder.CreateExtractValue(a1b1, {0}), Ty);
709 auto a1b1_1 = Builder.CreateZExt(Builder.CreateExtractValue(a1b1, {1}), Ty);
710
711 auto mul_lo_hi =
712 Builder.CreateAdd(Builder.CreateAdd(a0b0_1, a1b0_0), a0b1_0);
713 auto carry_mul_lo_hi = Builder.CreateLShr(mul_lo_hi, ScalarSizeInBits / 2);
714 auto mul_hi_lo = Builder.CreateAdd(
715 Builder.CreateAdd(Builder.CreateAdd(a1b1_0, a1b0_1), a0b1_1),
716 carry_mul_lo_hi);
717 auto mul_lo = Builder.CreateAdd(
718 a0b0_0, Builder.CreateShl(mul_lo_hi, ScalarSizeInBits / 2));
719 auto mul_hi = Builder.CreateAdd(
720 mul_hi_lo, Builder.CreateShl(a1b1_1, ScalarSizeInBits / 2));
721
722 if (IsSigned) {
723 // Apply the sign that we got from the previous if statement setting
724 // res_neg.
725 auto mul_lo_xor =
726 Builder.CreateXor(mul_lo, Constant::getAllOnesValue(Ty));
727 auto mul_lo_xor_add =
728 InsertSPIRVOp(InsertPoint, spv::OpIAddCarry, {Attribute::ReadNone},
729 RetTy, {mul_lo_xor, ConstantInt::get(Ty, 1)});
730 auto mul_lo_inv = Builder.CreateExtractValue(mul_lo_xor_add, {0});
731 auto carry = Builder.CreateExtractValue(mul_lo_xor_add, {1});
732 auto mul_hi_inv = Builder.CreateAdd(
733 carry, Builder.CreateXor(mul_hi, Constant::getAllOnesValue(Ty)));
734 mul_lo = Builder.CreateSelect(res_neg, mul_lo_inv, mul_lo);
735 mul_hi = Builder.CreateSelect(res_neg, mul_hi_inv, mul_hi);
736 }
737
738 return Builder.CreateInsertValue(
739 Builder.CreateInsertValue(UndefValue::get(RetTy), mul_lo, {0}), mul_hi,
740 {1});
741 } else {
742 llvm_unreachable("Unexpected type for InsertOpMulExtended");
743 }
744}
745
rjodinchr791203f2021-10-07 20:42:41 +0200746bool ReplaceOpenCLBuiltinPass::replaceWaitGroupEvents(Function &F) {
747 /* Simple implementation for wait_group_events to avoid dealing with the event
748 * list:
749 *
750 * void wait_group_events(int num_events, event_t *event_list) {
751 * barrier(CLK_LOCAL_MEM_FENCE);
752 * }
753 *
754 */
755
756 enum {
757 CLK_LOCAL_MEM_FENCE = 0x01,
758 CLK_GLOBAL_MEM_FENCE = 0x02,
759 CLK_IMAGE_MEM_FENCE = 0x04
760 };
761
762 return replaceCallsWithValue(F, [](CallInst *CI) {
763 IRBuilder<> Builder(CI);
764
765 const auto ConstantScopeWorkgroup = Builder.getInt32(spv::ScopeWorkgroup);
766 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
767 Instruction::Shl, Builder.getInt32(CLK_LOCAL_MEM_FENCE),
768 Builder.getInt32(clz(spv::MemorySemanticsWorkgroupMemoryMask) -
769 clz(CLK_LOCAL_MEM_FENCE)),
770 "", CI);
771 auto MemorySemantics = BinaryOperator::Create(
772 Instruction::Or, MemorySemanticsWorkgroup,
773 ConstantInt::get(Builder.getInt32Ty(),
774 spv::MemorySemanticsAcquireReleaseMask),
775 "", CI);
776
777 return clspv::InsertSPIRVOp(
778 CI, spv::OpControlBarrier,
779 {Attribute::NoDuplicate, Attribute::Convergent}, Builder.getVoidTy(),
780 {ConstantScopeWorkgroup, ConstantScopeWorkgroup, MemorySemantics});
781 });
782}
783
784GlobalVariable *ReplaceOpenCLBuiltinPass::getOrCreateGlobalVariable(
785 Module &M, std::string VariableName,
786 AddressSpace::Type VariableAddressSpace) {
787 GlobalVariable *GV = M.getGlobalVariable(VariableName);
788 if (GV == nullptr) {
789 IntegerType *IT = IntegerType::get(M.getContext(), 32);
790 VectorType *VT = FixedVectorType::get(IT, 3);
791
792 GV = new GlobalVariable(M, VT, false, GlobalValue::ExternalLinkage, nullptr,
793 VariableName, nullptr,
794 GlobalValue::ThreadLocalMode::NotThreadLocal,
795 VariableAddressSpace);
796 GV->setInitializer(Constant::getNullValue(VT));
797 }
798 return GV;
799}
800
801Value *ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopies(
alan-baker67d639b2022-05-09 11:23:31 -0400802 Module &M, CallInst *CI, Value *Dst, Value *Src, Type *GenType,
803 Value *NumGentypes, Value *Stride, Value *Event) {
rjodinchr791203f2021-10-07 20:42:41 +0200804 /*
805 * event_t *async_work_group_strided_copy(T *dst, T *src, size_t num_gentypes,
806 * size_t stride, event_t event) {
807 * size_t start_id = ((get_local_id(2) * get_local_size(1))
808 * + get_local_id(1)) * get_local_size(0)
809 * + get_local_id(0);
810 * size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
811 * for (size_t it = start_id; it < num_gentypes; it += incr) {
812 * dst[it] = src[it * stride];
813 * }
814 * return event;
815 * }
816 */
817
818 /* BB:
819 * before
820 * async_work_group_strided_copy
821 * after
822 *
823 * ================================
824 *
825 * BB:
826 * before
827 * start_id = f(get_local_ids, get_local_sizes)
828 * incr = g(get_local_sizes)
829 * br CmpBB
830 *
831 * CmpBB:
832 * it = PHI(start_id, it)
833 * cmp = it < NumGentypes
834 * condBr cmp, LoopBB, ExitBB
835 *
836 * LoopBB:
837 * dstI = dst[it]
838 * srcI = src[it * stride]
839 * OpCopyMemory dstI, srcI
840 * it += incr
841 * br CmpBB
842 *
843 * ExitBB:
844 * after
845 */
846
847 IRBuilder<> Builder(CI);
848
849 auto Cst0 = Builder.getInt32(0);
850 auto Cst1 = Builder.getInt32(1);
851 auto Cst2 = Builder.getInt32(2);
852
alan-baker67d639b2022-05-09 11:23:31 -0400853 auto *IT = IntegerType::get(M.getContext(), 32);
854 auto *VT = FixedVectorType::get(IT, 3);
855
rjodinchr791203f2021-10-07 20:42:41 +0200856 // get_local_id({0, 1, 2});
857 GlobalVariable *GVId =
858 getOrCreateGlobalVariable(M, clspv::LocalInvocationIdVariableName(),
859 clspv::LocalInvocationIdAddressSpace());
alan-baker67d639b2022-05-09 11:23:31 -0400860 Value *GEP0 = Builder.CreateGEP(VT, GVId, {Cst0, Cst0});
861 Value *LocalId0 = Builder.CreateLoad(IT, GEP0);
862 Value *GEP1 = Builder.CreateGEP(VT, GVId, {Cst0, Cst1});
863 Value *LocalId1 = Builder.CreateLoad(IT, GEP1);
864 Value *GEP2 = Builder.CreateGEP(VT, GVId, {Cst0, Cst2});
865 Value *LocalId2 = Builder.CreateLoad(IT, GEP2);
rjodinchr791203f2021-10-07 20:42:41 +0200866
867 // get_local_size({0, 1, 2});
868 GlobalVariable *GVSize =
869 getOrCreateGlobalVariable(M, clspv::WorkgroupSizeVariableName(),
870 clspv::WorkgroupSizeAddressSpace());
alan-baker67d639b2022-05-09 11:23:31 -0400871 auto LocalSize = Builder.CreateLoad(VT, GVSize);
rjodinchr791203f2021-10-07 20:42:41 +0200872 auto LocalSize0 = Builder.CreateExtractElement(LocalSize, Cst0);
873 auto LocalSize1 = Builder.CreateExtractElement(LocalSize, Cst1);
874 auto LocalSize2 = Builder.CreateExtractElement(LocalSize, Cst2);
875
876 // size_t start_id = ((get_local_id(2) * get_local_size(1))
877 // + get_local_id(1)) * get_local_size(0)
878 // + get_local_id(0);
879 auto tmp0 = Builder.CreateMul(LocalId2, LocalSize1);
880 auto tmp1 = Builder.CreateAdd(tmp0, LocalId1);
881 auto tmp2 = Builder.CreateMul(tmp1, LocalSize0);
882 auto StartId = Builder.CreateAdd(tmp2, LocalId0);
883
884 // size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
885 auto tmp3 = Builder.CreateMul(LocalSize0, LocalSize1);
886 auto Incr = Builder.CreateMul(tmp3, LocalSize2);
887
888 // Create BasicBlocks
889 auto BB = CI->getParent();
890 auto CmpBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
891 auto LoopBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
892 auto ExitBB = SplitBlock(BB, CI);
893
894 // BB
895 auto BrCmpBB = BranchInst::Create(CmpBB);
896 ReplaceInstWithInst(BB->getTerminator(), BrCmpBB);
897
898 // CmpBB
899 Builder.SetInsertPoint(CmpBB);
900 auto PHIIterator = Builder.CreatePHI(Builder.getInt32Ty(), 2);
901 auto Cmp = Builder.CreateCmp(CmpInst::ICMP_ULT, PHIIterator, NumGentypes);
902 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
903
904 // LoopBB
905 Builder.SetInsertPoint(LoopBB);
906
907 // default values for non-strided copies
908 Value *SrcIterator = PHIIterator;
909 Value *DstIterator = PHIIterator;
910 if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
911 clspv::AddressSpace::Global)) {
912 // async_work_group_strided_copy local to global case
913 DstIterator = Builder.CreateMul(PHIIterator, Stride);
914 } else if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
915 clspv::AddressSpace::Local)) {
916 // async_work_group_strided_copy global to local case
917 SrcIterator = Builder.CreateMul(PHIIterator, Stride);
918 }
alan-baker67d639b2022-05-09 11:23:31 -0400919 auto DstI = Builder.CreateGEP(GenType, Dst, DstIterator);
920 auto SrcI = Builder.CreateGEP(GenType, Src, SrcIterator);
rjodinchr791203f2021-10-07 20:42:41 +0200921 auto NewIterator = Builder.CreateAdd(PHIIterator, Incr);
922 auto Br = Builder.CreateBr(CmpBB);
923 clspv::InsertSPIRVOp(Br, spv::OpCopyMemory, {}, Builder.getVoidTy(),
924 {DstI, SrcI});
925
926 // Set PHIIterator for CmpBB now that we have NewIterator
927 PHIIterator->addIncoming(StartId, BB);
928 PHIIterator->addIncoming(NewIterator, LoopBB);
929
930 return Event;
931}
932
alan-baker67d639b2022-05-09 11:23:31 -0400933bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopy(Function &F, Type *ty) {
934 return replaceCallsWithValue(F, [&F, ty, this](CallInst *CI) {
rjodinchr791203f2021-10-07 20:42:41 +0200935 Module &M = *F.getParent();
936
937 auto Dst = CI->getOperand(0);
938 auto Src = CI->getOperand(1);
939 auto NumGentypes = CI->getOperand(2);
940 auto Event = CI->getOperand(3);
941
alan-baker67d639b2022-05-09 11:23:31 -0400942 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, ty, NumGentypes,
943 nullptr, Event);
rjodinchr791203f2021-10-07 20:42:41 +0200944 });
945}
946
alan-baker67d639b2022-05-09 11:23:31 -0400947bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupStridedCopy(Function &F, Type *ty) {
948 return replaceCallsWithValue(F, [&F, ty, this](CallInst *CI) {
rjodinchr791203f2021-10-07 20:42:41 +0200949 Module &M = *F.getParent();
950
951 auto Dst = CI->getOperand(0);
952 auto Src = CI->getOperand(1);
953 auto NumGentypes = CI->getOperand(2);
954 auto Stride = CI->getOperand(3);
955 auto Event = CI->getOperand(4);
956
alan-baker67d639b2022-05-09 11:23:31 -0400957 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, ty, NumGentypes, Stride,
rjodinchr791203f2021-10-07 20:42:41 +0200958 Event);
959 });
960}
961
SJW2c317da2020-03-23 07:39:13 -0500962bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
963 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400964 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100965}
966
SJW2c317da2020-03-23 07:39:13 -0500967bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
968 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100969 auto XValue = CI->getOperand(0);
970 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100971
Kévin Petite8edce32019-04-10 14:23:32 +0100972 IRBuilder<> Builder(CI);
973 auto XmY = Builder.CreateSub(XValue, YValue);
974 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100975
SJW2c317da2020-03-23 07:39:13 -0500976 Value *Cmp = nullptr;
977 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100978 Cmp = Builder.CreateICmpSGT(YValue, XValue);
979 } else {
980 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100981 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100982
Kévin Petite8edce32019-04-10 14:23:32 +0100983 return Builder.CreateSelect(Cmp, YmX, XmY);
984 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100985}
986
SJW2c317da2020-03-23 07:39:13 -0500987bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
alan-baker5f2e88e2020-12-07 15:24:04 -0500988 return replaceCallsWithValue(F, [&F](CallInst *Call) {
989 const auto x = Call->getArgOperand(0);
990 const auto y = Call->getArgOperand(1);
991 auto intrinsic = Intrinsic::getDeclaration(
992 F.getParent(), Intrinsic::copysign, Call->getType());
993 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {x, y}, "",
994 Call);
Kévin Petite8edce32019-04-10 14:23:32 +0100995 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100996}
997
Romaric Jodinef707982022-07-22 16:25:19 +0200998bool ReplaceOpenCLBuiltinPass::replaceNativeRecip(Function &F) {
999 Module &M = *F.getParent();
1000 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001001 // Recip has one arg.
1002 auto Arg = CI->getOperand(0);
1003 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
David Neto22f144c2017-06-12 14:26:21 -04001004
Romaric Jodinef707982022-07-22 16:25:19 +02001005 Type *Ty = CI->getType();
1006 SmallVector<Type *, 2> NativeDivideArgsTypes = {Ty, Ty};
1007 const auto NativeDivideType =
1008 FunctionType::get(Ty, NativeDivideArgsTypes, false);
1009 auto NativeDivideName =
1010 Builtins::GetMangledFunctionName("native_divide", NativeDivideType);
1011 auto NativeDivide =
1012 M.getOrInsertFunction(NativeDivideName, NativeDivideType);
1013 return CallInst::Create(NativeDivide, {Cst1, Arg}, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001014 });
David Neto22f144c2017-06-12 14:26:21 -04001015}
1016
SJW2c317da2020-03-23 07:39:13 -05001017bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
1018 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +01001019 auto Op0 = CI->getOperand(0);
1020 auto Op1 = CI->getOperand(1);
1021
SJW2c317da2020-03-23 07:39:13 -05001022 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +01001023 if (Op0->getType()->isVectorTy()) {
1024 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
1025 CI->getType(), {Op0, Op1});
1026 } else {
1027 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
1028 }
1029
1030 return V;
1031 });
1032}
1033
SJW2c317da2020-03-23 07:39:13 -05001034bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -05001035 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001036 // convert to natural
1037 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001038 std::string NewFName = basename.substr(0, slen);
1039 NewFName =
1040 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001041
SJW2c317da2020-03-23 07:39:13 -05001042 Module &M = *F.getParent();
1043 return replaceCallsWithValue(F, [&](CallInst *CI) {
1044 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1045
1046 auto Arg = CI->getOperand(0);
1047
1048 // Constant of the natural log of 10 (ln(10)).
1049 const double Ln10 =
1050 2.302585092994045684017991454684364207601101488628772976033;
1051
1052 auto Mul = BinaryOperator::Create(
1053 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
1054
1055 return CallInst::Create(NewF, Mul, "", CI);
1056 });
David Neto22f144c2017-06-12 14:26:21 -04001057}
1058
SJW2c317da2020-03-23 07:39:13 -05001059bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +01001060 // OpenCL fmod(x,y) is x - y * trunc(x/y)
1061 // The sign for a non-zero result is taken from x.
1062 // (Try an example.)
1063 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -05001064 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +01001065 auto Op0 = CI->getOperand(0);
1066 auto Op1 = CI->getOperand(1);
1067 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
1068 });
1069}
1070
SJW2c317da2020-03-23 07:39:13 -05001071bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -05001072 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001073 // convert to natural
1074 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001075 std::string NewFName = basename.substr(0, slen);
1076 NewFName =
1077 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001078
SJW2c317da2020-03-23 07:39:13 -05001079 Module &M = *F.getParent();
1080 return replaceCallsWithValue(F, [&](CallInst *CI) {
1081 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1082
1083 auto Arg = CI->getOperand(0);
1084
1085 // Constant of the reciprocal of the natural log of 10 (ln(10)).
1086 const double Ln10 =
1087 0.434294481903251827651128918916605082294397005803666566114;
1088
1089 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
1090
1091 return BinaryOperator::Create(Instruction::FMul,
1092 ConstantFP::get(Arg->getType(), Ln10), NewCI,
1093 "", CI);
1094 });
David Neto22f144c2017-06-12 14:26:21 -04001095}
1096
gnl21636e7992020-09-09 16:08:16 +01001097bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
1098 // convert to natural
alan-baker8b968112020-12-15 15:53:29 -05001099 return replaceCallsWithValue(F, [&F](CallInst *CI) {
gnl21636e7992020-09-09 16:08:16 +01001100 auto Arg = CI->getOperand(0);
1101
1102 auto ArgP1 = BinaryOperator::Create(
1103 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
1104
alan-baker8b968112020-12-15 15:53:29 -05001105 auto log =
1106 Intrinsic::getDeclaration(F.getParent(), Intrinsic::log, CI->getType());
1107 return CallInst::Create(log, ArgP1, "", CI);
gnl21636e7992020-09-09 16:08:16 +01001108 });
1109}
1110
alan-baker12d2c182020-07-20 08:22:42 -04001111bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -04001112
alan-bakerf6bc8252020-09-23 14:58:55 -04001113 enum {
1114 CLK_LOCAL_MEM_FENCE = 0x01,
1115 CLK_GLOBAL_MEM_FENCE = 0x02,
1116 CLK_IMAGE_MEM_FENCE = 0x04
1117 };
David Neto22f144c2017-06-12 14:26:21 -04001118
alan-baker12d2c182020-07-20 08:22:42 -04001119 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +01001120 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001121
Kévin Petitc4643922019-06-17 19:32:05 +01001122 // We need to map the OpenCL constants to the SPIR-V equivalents.
1123 const auto LocalMemFence =
1124 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1125 const auto GlobalMemFence =
1126 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001127 const auto ImageMemFence =
1128 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -04001129 const auto ConstantAcquireRelease = ConstantInt::get(
1130 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +01001131 const auto ConstantScopeDevice =
1132 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
1133 const auto ConstantScopeWorkgroup =
1134 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -04001135 const auto ConstantScopeSubgroup =
1136 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -04001137
Kévin Petitc4643922019-06-17 19:32:05 +01001138 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1139 const auto LocalMemFenceMask =
1140 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1141 const auto WorkgroupShiftAmount =
1142 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1143 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1144 Instruction::Shl, LocalMemFenceMask,
1145 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001146
Kévin Petitc4643922019-06-17 19:32:05 +01001147 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1148 const auto GlobalMemFenceMask =
1149 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1150 const auto UniformShiftAmount =
1151 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1152 const auto MemorySemanticsUniform = BinaryOperator::Create(
1153 Instruction::Shl, GlobalMemFenceMask,
1154 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001155
alan-bakerf6bc8252020-09-23 14:58:55 -04001156 // OpenCL 2.0
1157 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1158 const auto ImageMemFenceMask =
1159 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1160 const auto ImageShiftAmount =
1161 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1162 const auto MemorySemanticsImage = BinaryOperator::Create(
1163 Instruction::Shl, ImageMemFenceMask,
1164 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1165
Kévin Petitc4643922019-06-17 19:32:05 +01001166 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -04001167 // MemorySemanticsSequentiallyConsistentMask.
1168 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +01001169 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -04001170 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -04001171 auto MemorySemantics2 = BinaryOperator::Create(
1172 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
1173 auto MemorySemantics = BinaryOperator::Create(
1174 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001175
alan-baker12d2c182020-07-20 08:22:42 -04001176 // If the memory scope is not specified explicitly, it is either Subgroup
1177 // or Workgroup depending on the type of barrier.
1178 Value *MemoryScope =
1179 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
1180 if (CI->data_operands_size() > 1) {
1181 enum {
1182 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
1183 CL_MEMORY_SCOPE_DEVICE = 0x2,
1184 CL_MEMORY_SCOPE_SUBGROUP = 0x4
1185 };
1186 // The call was given an explicit memory scope.
1187 const auto MemoryScopeSubgroup =
1188 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
1189 const auto MemoryScopeDevice =
1190 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -04001191
alan-baker12d2c182020-07-20 08:22:42 -04001192 auto Cmp =
1193 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1194 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
1195 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
1196 ConstantScopeWorkgroup, "", CI);
1197 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1198 MemoryScopeDevice, CI->getOperand(1), "", CI);
1199 MemoryScope =
1200 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
1201 }
1202
1203 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
1204 // the type of barrier;
1205 const auto ExecutionScope =
1206 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -04001207
Kévin Petitc4643922019-06-17 19:32:05 +01001208 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -04001209 {Attribute::NoDuplicate, Attribute::Convergent},
1210 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +01001211 {ExecutionScope, MemoryScope, MemorySemantics});
1212 });
David Neto22f144c2017-06-12 14:26:21 -04001213}
1214
alan-baker36309f92021-02-05 12:28:03 -05001215bool ReplaceOpenCLBuiltinPass::replaceMemFence(
1216 Function &F, spv::MemorySemanticsMask semantics) {
David Neto22f144c2017-06-12 14:26:21 -04001217
SJW2c317da2020-03-23 07:39:13 -05001218 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -04001219 enum {
1220 CLK_LOCAL_MEM_FENCE = 0x01,
1221 CLK_GLOBAL_MEM_FENCE = 0x02,
1222 CLK_IMAGE_MEM_FENCE = 0x04,
1223 };
David Neto22f144c2017-06-12 14:26:21 -04001224
SJW2c317da2020-03-23 07:39:13 -05001225 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001226
SJW2c317da2020-03-23 07:39:13 -05001227 // We need to map the OpenCL constants to the SPIR-V equivalents.
1228 const auto LocalMemFence =
1229 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1230 const auto GlobalMemFence =
1231 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001232 const auto ImageMemFence =
1233 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -05001234 const auto ConstantMemorySemantics =
1235 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -04001236 const auto ConstantScopeWorkgroup =
1237 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -04001238
SJW2c317da2020-03-23 07:39:13 -05001239 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1240 const auto LocalMemFenceMask =
1241 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1242 const auto WorkgroupShiftAmount =
1243 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1244 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1245 Instruction::Shl, LocalMemFenceMask,
1246 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001247
SJW2c317da2020-03-23 07:39:13 -05001248 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1249 const auto GlobalMemFenceMask =
1250 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1251 const auto UniformShiftAmount =
1252 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1253 const auto MemorySemanticsUniform = BinaryOperator::Create(
1254 Instruction::Shl, GlobalMemFenceMask,
1255 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001256
alan-bakerf6bc8252020-09-23 14:58:55 -04001257 // OpenCL 2.0
1258 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1259 const auto ImageMemFenceMask =
1260 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1261 const auto ImageShiftAmount =
1262 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1263 const auto MemorySemanticsImage = BinaryOperator::Create(
1264 Instruction::Shl, ImageMemFenceMask,
1265 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1266
alan-baker36309f92021-02-05 12:28:03 -05001267 Value *MemOrder = ConstantMemorySemantics;
1268 Value *MemScope = ConstantScopeWorkgroup;
1269 IRBuilder<> builder(CI);
alan-baker5641f5c2021-10-15 09:16:04 -04001270 if (CI->arg_size() > 1) {
alan-baker36309f92021-02-05 12:28:03 -05001271 MemOrder = MemoryOrderSemantics(CI->getArgOperand(1), false, CI,
1272 semantics, false);
1273 MemScope = MemoryScope(CI->getArgOperand(2), false, CI);
1274 }
1275 // Join the storage semantics and the order semantics.
alan-bakerf6bc8252020-09-23 14:58:55 -04001276 auto MemorySemantics1 =
alan-baker36309f92021-02-05 12:28:03 -05001277 builder.CreateOr({MemorySemanticsWorkgroup, MemorySemanticsUniform});
1278 auto MemorySemantics2 = builder.CreateOr({MemorySemanticsImage, MemOrder});
1279 auto MemorySemantics =
1280 builder.CreateOr({MemorySemantics1, MemorySemantics2});
David Neto22f144c2017-06-12 14:26:21 -04001281
alan-baker3d905692020-10-28 14:02:37 -04001282 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
1283 {Attribute::Convergent}, CI->getType(),
alan-baker36309f92021-02-05 12:28:03 -05001284 {MemScope, MemorySemantics});
SJW2c317da2020-03-23 07:39:13 -05001285 });
David Neto22f144c2017-06-12 14:26:21 -04001286}
1287
Kévin Petit1cb45112020-04-27 18:55:48 +01001288bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
1289 bool Changed = false;
1290
1291 SmallVector<Instruction *, 4> ToRemoves;
1292
1293 // Find all calls to the function
1294 for (auto &U : F.uses()) {
1295 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1296 ToRemoves.push_back(CI);
1297 }
1298 }
1299
1300 Changed = !ToRemoves.empty();
1301
1302 // Delete them
1303 for (auto V : ToRemoves) {
1304 V->eraseFromParent();
1305 }
1306
1307 return Changed;
1308}
1309
SJW2c317da2020-03-23 07:39:13 -05001310bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
alan-baker3e217772020-11-07 17:29:40 -05001311 CmpInst::Predicate P) {
SJW2c317da2020-03-23 07:39:13 -05001312 return replaceCallsWithValue(F, [&](CallInst *CI) {
1313 // The predicate to use in the CmpInst.
1314 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -04001315
SJW2c317da2020-03-23 07:39:13 -05001316 auto Arg1 = CI->getOperand(0);
1317 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001318
SJW2c317da2020-03-23 07:39:13 -05001319 const auto Cmp =
1320 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
alan-baker3e217772020-11-07 17:29:40 -05001321 if (isa<VectorType>(F.getReturnType()))
1322 return CastInst::Create(Instruction::SExt, Cmp, CI->getType(), "", CI);
1323 return CastInst::Create(Instruction::ZExt, Cmp, CI->getType(), "", CI);
SJW2c317da2020-03-23 07:39:13 -05001324 });
David Neto22f144c2017-06-12 14:26:21 -04001325}
1326
SJW2c317da2020-03-23 07:39:13 -05001327bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
1328 spv::Op SPIRVOp,
1329 int32_t C) {
1330 Module &M = *F.getParent();
1331 return replaceCallsWithValue(F, [&](CallInst *CI) {
1332 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -04001333
SJW2c317da2020-03-23 07:39:13 -05001334 // The value to return for true.
1335 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -04001336
SJW2c317da2020-03-23 07:39:13 -05001337 // The value to return for false.
1338 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001339
SJW2c317da2020-03-23 07:39:13 -05001340 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001341 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001342 CorrespondingBoolTy =
1343 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1344 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001345 }
David Neto22f144c2017-06-12 14:26:21 -04001346
SJW2c317da2020-03-23 07:39:13 -05001347 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1348 CorrespondingBoolTy, {CI->getOperand(0)});
1349
1350 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1351 });
David Neto22f144c2017-06-12 14:26:21 -04001352}
1353
SJW2c317da2020-03-23 07:39:13 -05001354bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1355 Module &M = *F.getParent();
1356 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001357 auto &C = M.getContext();
1358 auto Val = CI->getOperand(0);
1359 auto ValTy = Val->getType();
1360 auto RetTy = CI->getType();
1361
1362 // Get a suitable integer type to represent the number
1363 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1364
1365 // Create Mask
1366 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001367 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001368 switch (ScalarSize) {
1369 case 16:
1370 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1371 break;
1372 case 32:
1373 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1374 break;
1375 case 64:
1376 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1377 break;
1378 default:
1379 llvm_unreachable("Unsupported floating-point type");
1380 }
1381
1382 IRBuilder<> Builder(CI);
1383
1384 // Bitcast to int
1385 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1386
1387 // Mask and compare
1388 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1389 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1390
1391 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001392 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001393 if (ValTy->isVectorTy()) {
1394 RetTrue = ConstantInt::getSigned(RetTy, -1);
1395 } else {
1396 RetTrue = ConstantInt::get(RetTy, 1);
1397 }
1398 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1399 });
1400}
1401
SJW2c317da2020-03-23 07:39:13 -05001402bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1403 Module &M = *F.getParent();
1404 return replaceCallsWithValue(F, [&](CallInst *CI) {
1405 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001406
SJW2c317da2020-03-23 07:39:13 -05001407 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001408
SJW2c317da2020-03-23 07:39:13 -05001409 // If the argument is a 32-bit int, just use a shift
1410 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1411 V = BinaryOperator::Create(Instruction::LShr, Arg,
1412 ConstantInt::get(Arg->getType(), 31), "", CI);
1413 } else {
1414 // The value for zero to compare against.
1415 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001416
SJW2c317da2020-03-23 07:39:13 -05001417 // The value to return for true.
1418 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001419
SJW2c317da2020-03-23 07:39:13 -05001420 // The value to return for false.
1421 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001422
SJW2c317da2020-03-23 07:39:13 -05001423 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1424 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001425
SJW2c317da2020-03-23 07:39:13 -05001426 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001427
SJW2c317da2020-03-23 07:39:13 -05001428 // If we have a function to call, call it!
1429 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001430
SJW2c317da2020-03-23 07:39:13 -05001431 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001432
SJW2c317da2020-03-23 07:39:13 -05001433 const auto NewCI = clspv::InsertSPIRVOp(
1434 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1435 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001436
SJW2c317da2020-03-23 07:39:13 -05001437 } else {
1438 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001439 }
1440
SJW2c317da2020-03-23 07:39:13 -05001441 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001442 }
SJW2c317da2020-03-23 07:39:13 -05001443 return V;
1444 });
David Neto22f144c2017-06-12 14:26:21 -04001445}
1446
SJW2c317da2020-03-23 07:39:13 -05001447bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1448 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1449 // Get arguments
1450 auto HiValue = CI->getOperand(0);
1451 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001452
SJW2c317da2020-03-23 07:39:13 -05001453 // Don't touch overloads that aren't in OpenCL C
1454 auto HiType = HiValue->getType();
1455 auto LoType = LoValue->getType();
1456
1457 if (HiType != LoType) {
1458 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001459 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001460
SJW2c317da2020-03-23 07:39:13 -05001461 if (!HiType->isIntOrIntVectorTy()) {
1462 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001463 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001464
SJW2c317da2020-03-23 07:39:13 -05001465 if (HiType->getScalarSizeInBits() * 2 !=
1466 CI->getType()->getScalarSizeInBits()) {
1467 return nullptr;
1468 }
1469
1470 if ((HiType->getScalarSizeInBits() != 8) &&
1471 (HiType->getScalarSizeInBits() != 16) &&
1472 (HiType->getScalarSizeInBits() != 32)) {
1473 return nullptr;
1474 }
1475
James Pricecf53df42020-04-20 14:41:24 -04001476 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001477 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001478 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1479 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001480 return nullptr;
1481 }
1482 }
1483
1484 // Convert both operands to the result type
1485 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1486 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1487
1488 // Shift high operand
1489 auto ShiftAmount =
1490 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1491 auto HiShifted =
1492 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1493
1494 // OR both results
1495 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1496 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001497}
1498
SJW2c317da2020-03-23 07:39:13 -05001499bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1500 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1501 // Get arguments
1502 auto SrcValue = CI->getOperand(0);
1503 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001504
SJW2c317da2020-03-23 07:39:13 -05001505 // Don't touch overloads that aren't in OpenCL C
1506 auto SrcType = SrcValue->getType();
1507 auto RotType = RotAmount->getType();
1508
1509 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1510 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001511 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001512
SJW2c317da2020-03-23 07:39:13 -05001513 if (!SrcType->isIntOrIntVectorTy()) {
1514 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001515 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001516
SJW2c317da2020-03-23 07:39:13 -05001517 if ((SrcType->getScalarSizeInBits() != 8) &&
1518 (SrcType->getScalarSizeInBits() != 16) &&
1519 (SrcType->getScalarSizeInBits() != 32) &&
1520 (SrcType->getScalarSizeInBits() != 64)) {
1521 return nullptr;
1522 }
1523
James Pricecf53df42020-04-20 14:41:24 -04001524 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001525 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001526 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1527 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001528 return nullptr;
1529 }
1530 }
1531
alan-bakerfd22ae12020-10-29 15:59:22 -04001532 // Replace with LLVM's funnel shift left intrinsic because it is more
1533 // generic than rotate.
1534 Function *intrinsic =
1535 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1536 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1537 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001538 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001539}
1540
SJW2c317da2020-03-23 07:39:13 -05001541bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1542 bool DstIsSigned) {
1543 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1544 Value *V = nullptr;
1545 // Get arguments
1546 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001547
SJW2c317da2020-03-23 07:39:13 -05001548 // Don't touch overloads that aren't in OpenCL C
1549 auto SrcType = SrcValue->getType();
1550 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001551
SJW2c317da2020-03-23 07:39:13 -05001552 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1553 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1554 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001555 }
1556
James Pricecf53df42020-04-20 14:41:24 -04001557 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001558 unsigned SrcNumElements =
1559 SrcVecType->getElementCount().getKnownMinValue();
1560 unsigned DstNumElements =
1561 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001562 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001563 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001564 }
1565
James Pricecf53df42020-04-20 14:41:24 -04001566 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1567 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1568 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001569 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001570 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001571 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001572
SJW2c317da2020-03-23 07:39:13 -05001573 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1574 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1575
1576 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1577 bool DstIsInt = DstType->isIntOrIntVectorTy();
1578
1579 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1580 // Unnecessary cast operation.
1581 V = SrcValue;
1582 } else if (SrcIsFloat && DstIsFloat) {
1583 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1584 } else if (SrcIsFloat && DstIsInt) {
1585 if (DstIsSigned) {
1586 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1587 } else {
1588 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1589 }
1590 } else if (SrcIsInt && DstIsFloat) {
1591 if (SrcIsSigned) {
1592 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1593 } else {
1594 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1595 }
1596 } else if (SrcIsInt && DstIsInt) {
1597 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1598 } else {
1599 // Not something we're supposed to handle, just move on
1600 }
1601
1602 return V;
1603 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001604}
1605
SJW2c317da2020-03-23 07:39:13 -05001606bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1607 bool is_mad) {
1608 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1609 Value *V = nullptr;
1610 // Get arguments
1611 auto AValue = CI->getOperand(0);
1612 auto BValue = CI->getOperand(1);
1613 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001614
SJW2c317da2020-03-23 07:39:13 -05001615 // Don't touch overloads that aren't in OpenCL C
1616 auto AType = AValue->getType();
1617 auto BType = BValue->getType();
1618 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001619
SJW2c317da2020-03-23 07:39:13 -05001620 if ((AType != BType) || (CI->getType() != AType) ||
1621 (is_mad && (AType != CType))) {
1622 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001623 }
1624
SJW2c317da2020-03-23 07:39:13 -05001625 if (!AType->isIntOrIntVectorTy()) {
1626 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001627 }
Kévin Petit8a560882019-03-21 15:24:34 +00001628
SJW2c317da2020-03-23 07:39:13 -05001629 if ((AType->getScalarSizeInBits() != 8) &&
1630 (AType->getScalarSizeInBits() != 16) &&
1631 (AType->getScalarSizeInBits() != 32) &&
1632 (AType->getScalarSizeInBits() != 64)) {
1633 return V;
1634 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001635
James Pricecf53df42020-04-20 14:41:24 -04001636 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001637 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001638 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1639 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001640 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001641 }
1642 }
1643
Romaric Jodinc507f312022-04-08 19:09:45 +02001644 auto Call = InsertOpMulExtended(CI, AValue, BValue, is_signed);
SJW2c317da2020-03-23 07:39:13 -05001645
1646 // Get the high part of the result
1647 unsigned Idxs[] = {1};
1648 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1649
1650 // If we're handling a mad_hi, add the third argument to the result
1651 if (is_mad) {
1652 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001653 }
1654
SJW2c317da2020-03-23 07:39:13 -05001655 return V;
1656 });
Kévin Petit8a560882019-03-21 15:24:34 +00001657}
1658
SJW2c317da2020-03-23 07:39:13 -05001659bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1660 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1661 // Get arguments
1662 auto FalseValue = CI->getOperand(0);
1663 auto TrueValue = CI->getOperand(1);
1664 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001665
SJW2c317da2020-03-23 07:39:13 -05001666 // Don't touch overloads that aren't in OpenCL C
1667 auto FalseType = FalseValue->getType();
1668 auto TrueType = TrueValue->getType();
1669 auto PredicateType = PredicateValue->getType();
1670
1671 if (FalseType != TrueType) {
1672 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001673 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001674
SJW2c317da2020-03-23 07:39:13 -05001675 if (!PredicateType->isIntOrIntVectorTy()) {
1676 return nullptr;
1677 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001678
SJW2c317da2020-03-23 07:39:13 -05001679 if (!FalseType->isIntOrIntVectorTy() &&
1680 !FalseType->getScalarType()->isFloatingPointTy()) {
1681 return nullptr;
1682 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001683
SJW2c317da2020-03-23 07:39:13 -05001684 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1685 return nullptr;
1686 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001687
SJW2c317da2020-03-23 07:39:13 -05001688 if (FalseType->getScalarSizeInBits() !=
1689 PredicateType->getScalarSizeInBits()) {
1690 return nullptr;
1691 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001692
James Pricecf53df42020-04-20 14:41:24 -04001693 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001694 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1695 if (NumElements != cast<VectorType>(PredicateType)
1696 ->getElementCount()
1697 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001698 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001699 }
1700
James Pricecf53df42020-04-20 14:41:24 -04001701 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1702 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001703 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001704 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001705 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001706
SJW2c317da2020-03-23 07:39:13 -05001707 // Create constant
1708 const auto ZeroValue = Constant::getNullValue(PredicateType);
1709
1710 // Scalar and vector are to be treated differently
1711 CmpInst::Predicate Pred;
1712 if (PredicateType->isVectorTy()) {
1713 Pred = CmpInst::ICMP_SLT;
1714 } else {
1715 Pred = CmpInst::ICMP_NE;
1716 }
1717
1718 // Create comparison instruction
1719 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1720 ZeroValue, "", CI);
1721
1722 // Create select
1723 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1724 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001725}
1726
SJW2c317da2020-03-23 07:39:13 -05001727bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1728 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1729 Value *V = nullptr;
1730 if (CI->getNumOperands() != 4) {
1731 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001732 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001733
SJW2c317da2020-03-23 07:39:13 -05001734 // Get arguments
1735 auto FalseValue = CI->getOperand(0);
1736 auto TrueValue = CI->getOperand(1);
1737 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001738
SJW2c317da2020-03-23 07:39:13 -05001739 // Don't touch overloads that aren't in OpenCL C
1740 auto FalseType = FalseValue->getType();
1741 auto TrueType = TrueValue->getType();
1742 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001743
SJW2c317da2020-03-23 07:39:13 -05001744 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1745 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001746 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001747
James Pricecf53df42020-04-20 14:41:24 -04001748 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001749 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1750 !TrueType->getScalarType()->isIntegerTy()) {
1751 return V;
1752 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001753 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001754 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1755 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001756 return V;
1757 }
1758 }
1759
1760 // Remember the type of the operands
1761 auto OpType = TrueType;
1762
1763 // The actual bit selection will always be done on an integer type,
1764 // declare it here
1765 Type *BitType;
1766
1767 // If the operands are float, then bitcast them to int
1768 if (OpType->getScalarType()->isFloatingPointTy()) {
1769
1770 // First create the new type
1771 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1772
1773 // Then bitcast all operands
1774 PredicateValue =
1775 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1776 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1777 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1778
1779 } else {
1780 // The operands have an integer type, use it directly
1781 BitType = OpType;
1782 }
1783
1784 // All the operands are now always integers
1785 // implement as (c & b) | (~c & a)
1786
1787 // Create our negated predicate value
1788 auto AllOnes = Constant::getAllOnesValue(BitType);
1789 auto NotPredicateValue = BinaryOperator::Create(
1790 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1791
1792 // Then put everything together
1793 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1794 FalseValue, "", CI);
1795 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1796 TrueValue, "", CI);
1797
1798 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1799
1800 // If we were dealing with a floating point type, we must bitcast
1801 // the result back to that
1802 if (OpType->getScalarType()->isFloatingPointTy()) {
1803 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1804 }
1805
1806 return V;
1807 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001808}
1809
SJW61531372020-06-09 07:31:08 -05001810bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001811 // convert to vector versions
1812 Module &M = *F.getParent();
1813 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1814 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1815 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001816
SJW2c317da2020-03-23 07:39:13 -05001817 // First figure out which function we're dealing with
1818 if (is_smooth) {
1819 ArgsToSplat.push_back(CI->getOperand(1));
1820 VectorArg = CI->getOperand(2);
1821 } else {
1822 VectorArg = CI->getOperand(1);
1823 }
1824
1825 // Splat arguments that need to be
1826 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001827 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001828
1829 for (auto arg : ArgsToSplat) {
1830 Value *NewVectorArg = UndefValue::get(VecType);
Marco Antognini7e338402021-03-15 12:48:37 +00001831 for (size_t i = 0; i < VecType->getElementCount().getKnownMinValue();
1832 i++) {
SJW2c317da2020-03-23 07:39:13 -05001833 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1834 NewVectorArg =
1835 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1836 }
1837 SplatArgs.push_back(NewVectorArg);
1838 }
1839
1840 // Replace the call with the vector/vector flavour
1841 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1842 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1843
SJW61531372020-06-09 07:31:08 -05001844 std::string NewFName = Builtins::GetMangledFunctionName(
1845 is_smooth ? "smoothstep" : "step", NewFType);
1846
SJW2c317da2020-03-23 07:39:13 -05001847 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1848
1849 SmallVector<Value *, 3> NewArgs;
1850 for (auto arg : SplatArgs) {
1851 NewArgs.push_back(arg);
1852 }
1853 NewArgs.push_back(VectorArg);
1854
1855 return CallInst::Create(NewF, NewArgs, "", CI);
1856 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001857}
1858
SJW2c317da2020-03-23 07:39:13 -05001859bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001860 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1861 auto Arg = CI->getOperand(0);
1862 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001863
SJW2c317da2020-03-23 07:39:13 -05001864 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001865
SJW2c317da2020-03-23 07:39:13 -05001866 return BinaryOperator::Create(Op, Bitcast,
1867 ConstantInt::get(CI->getType(), 31), "", CI);
1868 });
David Neto22f144c2017-06-12 14:26:21 -04001869}
1870
SJW2c317da2020-03-23 07:39:13 -05001871bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1872 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001873 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1874 // The multiply instruction to use.
1875 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001876
SJW2c317da2020-03-23 07:39:13 -05001877 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001878
SJW2c317da2020-03-23 07:39:13 -05001879 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1880 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001881
SJW2c317da2020-03-23 07:39:13 -05001882 if (is_mad) {
1883 // The add instruction to use.
1884 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001885
SJW2c317da2020-03-23 07:39:13 -05001886 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001887 }
David Neto22f144c2017-06-12 14:26:21 -04001888
SJW2c317da2020-03-23 07:39:13 -05001889 return V;
1890 });
David Neto22f144c2017-06-12 14:26:21 -04001891}
1892
SJW2c317da2020-03-23 07:39:13 -05001893bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001894 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1895 Value *V = nullptr;
1896 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001897
SJW2c317da2020-03-23 07:39:13 -05001898 auto data_type = data->getType();
1899 if (!data_type->isVectorTy())
1900 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001901
James Pricecf53df42020-04-20 14:41:24 -04001902 auto vec_data_type = cast<VectorType>(data_type);
1903
alan-baker5a8c3be2020-09-09 13:44:26 -04001904 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001905 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1906 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001907
SJW2c317da2020-03-23 07:39:13 -05001908 auto offset = CI->getOperand(1);
1909 auto ptr = CI->getOperand(2);
alan-bakerf795f392019-06-11 18:24:34 -04001910
SJW2c317da2020-03-23 07:39:13 -05001911 // Avoid pointer casts. Instead generate the correct number of stores
1912 // and rely on drivers to coalesce appropriately.
1913 IRBuilder<> builder(CI);
1914 auto elems_const = builder.getInt32(elems);
1915 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001916 for (size_t i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001917 auto idx = builder.getInt32(i);
1918 auto add = builder.CreateAdd(adjust, idx);
alan-baker67d639b2022-05-09 11:23:31 -04001919 auto gep = builder.CreateGEP(vec_data_type->getScalarType(), ptr, add);
SJW2c317da2020-03-23 07:39:13 -05001920 auto extract = builder.CreateExtractElement(data, i);
1921 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001922 }
SJW2c317da2020-03-23 07:39:13 -05001923 return V;
1924 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001925}
1926
SJW2c317da2020-03-23 07:39:13 -05001927bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001928 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1929 Value *V = nullptr;
1930 auto ret_type = F.getReturnType();
1931 if (!ret_type->isVectorTy())
1932 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001933
James Pricecf53df42020-04-20 14:41:24 -04001934 auto vec_ret_type = cast<VectorType>(ret_type);
1935
alan-baker5a8c3be2020-09-09 13:44:26 -04001936 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001937 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1938 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001939
SJW2c317da2020-03-23 07:39:13 -05001940 auto offset = CI->getOperand(0);
1941 auto ptr = CI->getOperand(1);
Derek Chowcfd368b2017-10-19 20:58:45 -07001942
SJW2c317da2020-03-23 07:39:13 -05001943 // Avoid pointer casts. Instead generate the correct number of loads
1944 // and rely on drivers to coalesce appropriately.
1945 IRBuilder<> builder(CI);
1946 auto elems_const = builder.getInt32(elems);
1947 V = UndefValue::get(ret_type);
1948 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001949 for (unsigned i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001950 auto idx = builder.getInt32(i);
1951 auto add = builder.CreateAdd(adjust, idx);
alan-baker67d639b2022-05-09 11:23:31 -04001952 auto gep = builder.CreateGEP(vec_ret_type->getScalarType(), ptr, add);
1953 auto load = builder.CreateLoad(vec_ret_type->getScalarType(), gep);
SJW2c317da2020-03-23 07:39:13 -05001954 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001955 }
SJW2c317da2020-03-23 07:39:13 -05001956 return V;
1957 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001958}
1959
SJW2c317da2020-03-23 07:39:13 -05001960bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1961 const std::string &name,
Romaric Jodin71fdb322022-05-03 17:01:10 +02001962 int vec_size, bool aligned) {
SJW2c317da2020-03-23 07:39:13 -05001963 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1964 if (!vec_size) {
Romaric Jodin71fdb322022-05-03 17:01:10 +02001965 // deduce vec_size from last characters of name (e.g. vload_half4)
1966 std::string half = "half";
1967 vec_size = std::atoi(
1968 name.substr(name.find(half) + half.size(), std::string::npos).c_str());
David Neto22f144c2017-06-12 14:26:21 -04001969 }
SJW2c317da2020-03-23 07:39:13 -05001970 switch (vec_size) {
1971 case 2:
1972 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02001973 case 3:
1974 if (!is_clspv_version) {
1975 return aligned ? replaceVloadaHalf3(F) : replaceVloadHalf3(F);
1976 }
1977 break;
SJW2c317da2020-03-23 07:39:13 -05001978 case 4:
1979 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02001980 case 8:
1981 if (!is_clspv_version) {
1982 return replaceVloadHalf8(F);
1983 }
1984 break;
1985 case 16:
1986 if (!is_clspv_version) {
1987 return replaceVloadHalf16(F);
1988 }
1989 break;
SJW2c317da2020-03-23 07:39:13 -05001990 case 0:
1991 if (!is_clspv_version) {
1992 return replaceVloadHalf(F);
1993 }
SJW2c317da2020-03-23 07:39:13 -05001994 break;
1995 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02001996 llvm_unreachable("Unsupported vload_half vector size");
David Neto22f144c2017-06-12 14:26:21 -04001997}
1998
SJW2c317da2020-03-23 07:39:13 -05001999bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
2000 Module &M = *F.getParent();
2001 return replaceCallsWithValue(F, [&](CallInst *CI) {
2002 // The index argument from vload_half.
2003 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002004
SJW2c317da2020-03-23 07:39:13 -05002005 // The pointer argument from vload_half.
2006 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002007
SJW2c317da2020-03-23 07:39:13 -05002008 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002009 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05002010 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2011
2012 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002013 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05002014
2015 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2016
2017 Value *V = nullptr;
2018
alan-baker7efcaaa2020-05-06 19:33:27 -04002019 bool supports_16bit_storage = true;
2020 switch (Arg1->getType()->getPointerAddressSpace()) {
2021 case clspv::AddressSpace::Global:
2022 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2023 clspv::Option::StorageClass::kSSBO);
2024 break;
2025 case clspv::AddressSpace::Constant:
2026 if (clspv::Option::ConstantArgsInUniformBuffer())
2027 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2028 clspv::Option::StorageClass::kUBO);
2029 else
2030 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2031 clspv::Option::StorageClass::kSSBO);
2032 break;
2033 default:
2034 // Clspv will emit the Float16 capability if the half type is
2035 // encountered. That capability covers private and local addressspaces.
2036 break;
2037 }
2038
2039 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05002040 auto ShortTy = Type::getInt16Ty(M.getContext());
2041 auto ShortPointerTy =
2042 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
2043
2044 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002045 // TODO(#816): remove after final transition.
2046 Value *Cast = Arg1;
2047 if (Arg1->getType() != ShortPointerTy) {
2048 Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
2049 }
SJW2c317da2020-03-23 07:39:13 -05002050
2051 // Index into the correct address of the casted pointer.
2052 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2053
2054 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002055 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002056
2057 // ZExt the short -> int.
2058 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2059
2060 // Get our float2.
2061 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2062
2063 // Extract out the bottom element which is our float result.
2064 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
2065 } else {
2066 // Assume the pointer argument points to storage aligned to 32bits
2067 // or more.
2068 // TODO(dneto): Do more analysis to make sure this is true?
2069 //
2070 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2071 // with:
2072 //
2073 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2074 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2075 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2076 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2077 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2078 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2079 // x float> %converted, %index_is_odd32
2080
2081 auto IntPointerTy =
2082 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
2083
2084 // Cast the base pointer to int*.
2085 // In a valid call (according to assumptions), this should get
2086 // optimized away in the simplify GEP pass.
alan-bakerb8365b62022-07-18 21:59:45 -04002087 // TODO(#816): remove after final transition.
2088 Value *Cast = Arg1;
2089 if (Arg1->getType() != IntPointerTy) {
2090 Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2091 }
SJW2c317da2020-03-23 07:39:13 -05002092
2093 auto One = ConstantInt::get(IntTy, 1);
2094 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2095 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2096
2097 // Index into the correct address of the casted pointer.
2098 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2099
2100 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002101 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002102
2103 // Get our float2.
2104 auto Call = CallInst::Create(NewF, Load, "", CI);
2105
2106 // Extract out the float result, where the element number is
2107 // determined by whether the original index was even or odd.
2108 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2109 }
2110 return V;
2111 });
2112}
2113
2114bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
2115 Module &M = *F.getParent();
2116 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002117 // The index argument from vload_half.
2118 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002119
Kévin Petite8edce32019-04-10 14:23:32 +01002120 // The pointer argument from vload_half.
2121 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002122
Kévin Petite8edce32019-04-10 14:23:32 +01002123 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002124 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002125 auto NewPointerTy =
2126 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002127 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002128
Kévin Petite8edce32019-04-10 14:23:32 +01002129 // Cast the half* pointer to int*.
alan-bakerb8365b62022-07-18 21:59:45 -04002130 // TODO(#816): remove after final transition.
2131 Value *Cast = Arg1;
2132 if (Arg1->getType() != NewPointerTy) {
2133 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2134 }
David Neto22f144c2017-06-12 14:26:21 -04002135
Kévin Petite8edce32019-04-10 14:23:32 +01002136 // Index into the correct address of the casted pointer.
2137 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002138
Kévin Petite8edce32019-04-10 14:23:32 +01002139 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002140 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002141
Kévin Petite8edce32019-04-10 14:23:32 +01002142 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002143 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002144
Kévin Petite8edce32019-04-10 14:23:32 +01002145 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002146
Kévin Petite8edce32019-04-10 14:23:32 +01002147 // Get our float2.
2148 return CallInst::Create(NewF, Load, "", CI);
2149 });
David Neto22f144c2017-06-12 14:26:21 -04002150}
2151
Romaric Jodin71fdb322022-05-03 17:01:10 +02002152bool ReplaceOpenCLBuiltinPass::replaceVloadHalf3(Function &F) {
2153 Module &M = *F.getParent();
2154 return replaceCallsWithValue(F, [&](CallInst *CI) {
2155 // The index argument from vload_half.
2156 auto Arg0 = CI->getOperand(0);
2157
2158 // The pointer argument from vload_half.
2159 auto Arg1 = CI->getOperand(1);
2160
2161 auto IntTy = Type::getInt32Ty(M.getContext());
2162 auto ShortTy = Type::getInt16Ty(M.getContext());
2163 auto FloatTy = Type::getFloatTy(M.getContext());
2164 auto Float2Ty = FixedVectorType::get(FloatTy, 2);
2165 auto Float3Ty = FixedVectorType::get(FloatTy, 3);
2166 auto NewPointerTy =
2167 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
2168 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2169
2170 auto Int0 = ConstantInt::get(IntTy, 0);
2171 auto Int1 = ConstantInt::get(IntTy, 1);
2172 auto Int2 = ConstantInt::get(IntTy, 2);
2173
2174 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002175 // TODO(#816): remove after final transition.
2176 Value *Cast = Arg1;
2177 if (Arg1->getType() != NewPointerTy) {
2178 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2179 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002180
2181 // Load the first element
2182 auto Index0 = BinaryOperator::Create(
2183 Instruction::Add,
2184 BinaryOperator::Create(Instruction::Shl, Arg0, Int1, "", CI), Arg0, "",
2185 CI);
2186 auto GEP0 = GetElementPtrInst::Create(ShortTy, Cast, Index0, "", CI);
2187 auto Load0 = new LoadInst(ShortTy, GEP0, "", CI);
2188
2189 // Load the second element
2190 auto Index1 =
2191 BinaryOperator::Create(Instruction::Add, Index0, Int1, "", CI);
2192 auto GEP1 = GetElementPtrInst::Create(ShortTy, Cast, Index1, "", CI);
2193 auto Load1 = new LoadInst(ShortTy, GEP1, "", CI);
2194
2195 // Load the third element
2196 auto Index2 =
2197 BinaryOperator::Create(Instruction::Add, Index1, Int1, "", CI);
2198 auto GEP2 = GetElementPtrInst::Create(ShortTy, Cast, Index2, "", CI);
2199 auto Load2 = new LoadInst(ShortTy, GEP2, "", CI);
2200
2201 // Extend each short to int.
2202 auto X0 = CastInst::Create(Instruction::ZExt, Load0, IntTy, "", CI);
2203 auto X1 = CastInst::Create(Instruction::ZExt, Load1, IntTy, "", CI);
2204 auto X2 = CastInst::Create(Instruction::ZExt, Load2, IntTy, "", CI);
2205
2206 // Our intrinsic to unpack a float2 from an int.
2207 auto SPIRVIntrinsic = clspv::UnpackFunction();
2208
2209 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2210
2211 // Convert int to float2 and extract the uniq meaningful float
2212 auto Y0 = ExtractElementInst::Create(CallInst::Create(NewF, X0, "", CI),
2213 Int0, "", CI);
2214 auto Y1 = ExtractElementInst::Create(CallInst::Create(NewF, X1, "", CI),
2215 Int0, "", CI);
2216 auto Y2 = ExtractElementInst::Create(CallInst::Create(NewF, X2, "", CI),
2217 Int0, "", CI);
2218
2219 // Create the final float3 to be returned
2220 auto Combine =
2221 InsertElementInst::Create(UndefValue::get(Float3Ty), Y0, Int0, "", CI);
2222 Combine = InsertElementInst::Create(Combine, Y1, Int1, "", CI);
2223 Combine = InsertElementInst::Create(Combine, Y2, Int2, "", CI);
2224
2225 return Combine;
2226 });
2227}
2228
2229bool ReplaceOpenCLBuiltinPass::replaceVloadaHalf3(Function &F) {
2230 Module &M = *F.getParent();
2231 return replaceCallsWithValue(F, [&](CallInst *CI) {
2232 // The index argument from vload_half.
2233 auto Arg0 = CI->getOperand(0);
2234
2235 // The pointer argument from vload_half.
2236 auto Arg1 = CI->getOperand(1);
2237
2238 auto IntTy = Type::getInt32Ty(M.getContext());
2239 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2240 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
2241 auto NewPointerTy =
2242 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
2243 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2244
2245 // Cast the half* pointer to int2*.
alan-bakerb8365b62022-07-18 21:59:45 -04002246 // TODO(#816): remove after final transition.
2247 Value *Cast = Arg1;
2248 if (Arg1->getType() != NewPointerTy) {
2249 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2250 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002251
2252 // Index into the correct address of the casted pointer.
2253 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
2254
2255 // Load from the int2* we casted to.
2256 auto Load = new LoadInst(Int2Ty, Index, "", CI);
2257
2258 // Extract each element from the loaded int2.
2259 auto X =
2260 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2261 auto Y =
2262 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
2263
2264 // Our intrinsic to unpack a float2 from an int.
2265 auto SPIRVIntrinsic = clspv::UnpackFunction();
2266
2267 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2268
2269 // Get the lower (x & y) components of our final float4.
2270 auto Lo = CallInst::Create(NewF, X, "", CI);
2271
2272 // Get the higher (z & w) components of our final float4.
2273 auto Hi = CallInst::Create(NewF, Y, "", CI);
2274
2275 Constant *ShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2276 ConstantInt::get(IntTy, 1),
2277 ConstantInt::get(IntTy, 2)};
2278
2279 // Combine our two float2's into one float4.
2280 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2281 CI);
2282 });
2283}
2284
SJW2c317da2020-03-23 07:39:13 -05002285bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
2286 Module &M = *F.getParent();
2287 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002288 // The index argument from vload_half.
2289 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002290
Kévin Petite8edce32019-04-10 14:23:32 +01002291 // The pointer argument from vload_half.
2292 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002293
Kévin Petite8edce32019-04-10 14:23:32 +01002294 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002295 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2296 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002297 auto NewPointerTy =
2298 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002299 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002300
Kévin Petite8edce32019-04-10 14:23:32 +01002301 // Cast the half* pointer to int2*.
alan-bakerb8365b62022-07-18 21:59:45 -04002302 // TODO(#816): remove after final transition.
2303 Value *Cast = Arg1;
2304 if (Arg1->getType() != NewPointerTy) {
2305 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2306 }
David Neto22f144c2017-06-12 14:26:21 -04002307
Kévin Petite8edce32019-04-10 14:23:32 +01002308 // Index into the correct address of the casted pointer.
2309 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002310
Kévin Petite8edce32019-04-10 14:23:32 +01002311 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002312 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002313
Kévin Petite8edce32019-04-10 14:23:32 +01002314 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002315 auto X =
2316 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2317 auto Y =
2318 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002319
Kévin Petite8edce32019-04-10 14:23:32 +01002320 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002321 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002322
Kévin Petite8edce32019-04-10 14:23:32 +01002323 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002324
Kévin Petite8edce32019-04-10 14:23:32 +01002325 // Get the lower (x & y) components of our final float4.
2326 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002327
Kévin Petite8edce32019-04-10 14:23:32 +01002328 // Get the higher (z & w) components of our final float4.
2329 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002330
Kévin Petite8edce32019-04-10 14:23:32 +01002331 Constant *ShuffleMask[4] = {
2332 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2333 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002334
Kévin Petite8edce32019-04-10 14:23:32 +01002335 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002336 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2337 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002338 });
David Neto22f144c2017-06-12 14:26:21 -04002339}
2340
Romaric Jodin71fdb322022-05-03 17:01:10 +02002341bool ReplaceOpenCLBuiltinPass::replaceVloadHalf8(Function &F) {
2342 Module &M = *F.getParent();
2343 return replaceCallsWithValue(F, [&](CallInst *CI) {
2344 // The index argument from vload_half.
2345 auto Arg0 = CI->getOperand(0);
2346
2347 // The pointer argument from vload_half.
2348 auto Arg1 = CI->getOperand(1);
2349
2350 auto IntTy = Type::getInt32Ty(M.getContext());
2351 auto Int4Ty = FixedVectorType::get(IntTy, 4);
2352 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
2353 auto NewPointerTy =
2354 PointerType::get(Int4Ty, Arg1->getType()->getPointerAddressSpace());
2355 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2356
2357 // Cast the half* pointer to int4*.
alan-bakerb8365b62022-07-18 21:59:45 -04002358 // TODO(#816): remove after final transition.
2359 Value *Cast = Arg1;
2360 if (Arg1->getType() != NewPointerTy) {
2361 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2362 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002363
2364 // Index into the correct address of the casted pointer.
2365 auto Index = GetElementPtrInst::Create(Int4Ty, Cast, Arg0, "", CI);
2366
2367 // Load from the int4* we casted to.
2368 auto Load = new LoadInst(Int4Ty, Index, "", CI);
2369
2370 // Extract each element from the loaded int4.
2371 auto X1 =
2372 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2373 auto X2 =
2374 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
2375 auto X3 =
2376 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 2), "", CI);
2377 auto X4 =
2378 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 3), "", CI);
2379
2380 // Our intrinsic to unpack a float2 from an int.
2381 auto SPIRVIntrinsic = clspv::UnpackFunction();
2382
2383 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2384
2385 // Convert the 4 int into 4 float2
2386 auto Y1 = CallInst::Create(NewF, X1, "", CI);
2387 auto Y2 = CallInst::Create(NewF, X2, "", CI);
2388 auto Y3 = CallInst::Create(NewF, X3, "", CI);
2389 auto Y4 = CallInst::Create(NewF, X4, "", CI);
2390
2391 Constant *ShuffleMask4[4] = {
2392 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2393 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2394
2395 // Combine our two float2's into one float4.
2396 auto Z1 = new ShuffleVectorInst(Y1, Y2, ConstantVector::get(ShuffleMask4),
2397 "", CI);
2398 auto Z2 = new ShuffleVectorInst(Y3, Y4, ConstantVector::get(ShuffleMask4),
2399 "", CI);
2400
2401 Constant *ShuffleMask8[8] = {
2402 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2403 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3),
2404 ConstantInt::get(IntTy, 4), ConstantInt::get(IntTy, 5),
2405 ConstantInt::get(IntTy, 6), ConstantInt::get(IntTy, 7)};
2406
2407 // Combine our two float4's into one float8.
2408 return new ShuffleVectorInst(Z1, Z2, ConstantVector::get(ShuffleMask8), "",
2409 CI);
2410 });
2411}
2412
2413bool ReplaceOpenCLBuiltinPass::replaceVloadHalf16(Function &F) {
2414 Module &M = *F.getParent();
2415 return replaceCallsWithValue(F, [&](CallInst *CI) {
2416 // The index argument from vload_half.
2417 auto Arg0 = CI->getOperand(0);
2418
2419 // The pointer argument from vload_half.
2420 auto Arg1 = CI->getOperand(1);
2421
2422 auto IntTy = Type::getInt32Ty(M.getContext());
2423 auto Int4Ty = FixedVectorType::get(IntTy, 4);
2424 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
2425 auto NewPointerTy =
2426 PointerType::get(Int4Ty, Arg1->getType()->getPointerAddressSpace());
2427 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2428
2429 // Cast the half* pointer to int4*.
alan-bakerb8365b62022-07-18 21:59:45 -04002430 // TODO(#816): remove after final transition.
2431 Value *Cast = Arg1;
2432 if (Arg1->getType() != NewPointerTy) {
2433 Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2434 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002435
2436 // Index into the correct address of the casted pointer.
2437 auto Arg0x2 = BinaryOperator::Create(Instruction::Shl, Arg0, ConstantInt::get(IntTy, 1), "", CI);
2438 auto Index1 = GetElementPtrInst::Create(Int4Ty, Cast, Arg0x2, "", CI);
2439 auto Arg0x2p1 = BinaryOperator::Create(Instruction::Add, Arg0x2, ConstantInt::get(IntTy, 1), "", CI);
2440 auto Index2 = GetElementPtrInst::Create(Int4Ty, Cast, Arg0x2p1, "", CI);
2441
2442 // Load from the int4* we casted to.
2443 auto Load1 = new LoadInst(Int4Ty, Index1, "", CI);
2444 auto Load2 = new LoadInst(Int4Ty, Index2, "", CI);
2445
2446 // Extract each element from the two loaded int4.
2447 auto X1 =
2448 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 0), "", CI);
2449 auto X2 =
2450 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 1), "", CI);
2451 auto X3 =
2452 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 2), "", CI);
2453 auto X4 =
2454 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 3), "", CI);
2455 auto X5 =
2456 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 0), "", CI);
2457 auto X6 =
2458 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 1), "", CI);
2459 auto X7 =
2460 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 2), "", CI);
2461 auto X8 =
2462 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 3), "", CI);
2463
2464 // Our intrinsic to unpack a float2 from an int.
2465 auto SPIRVIntrinsic = clspv::UnpackFunction();
2466
2467 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2468
2469 // Convert the eight int into float2
2470 auto Y1 = CallInst::Create(NewF, X1, "", CI);
2471 auto Y2 = CallInst::Create(NewF, X2, "", CI);
2472 auto Y3 = CallInst::Create(NewF, X3, "", CI);
2473 auto Y4 = CallInst::Create(NewF, X4, "", CI);
2474 auto Y5 = CallInst::Create(NewF, X5, "", CI);
2475 auto Y6 = CallInst::Create(NewF, X6, "", CI);
2476 auto Y7 = CallInst::Create(NewF, X7, "", CI);
2477 auto Y8 = CallInst::Create(NewF, X8, "", CI);
2478
2479 Constant *ShuffleMask4[4] = {
2480 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2481 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2482
2483 // Combine our two float2's into one float4.
2484 auto Z1 = new ShuffleVectorInst(Y1, Y2, ConstantVector::get(ShuffleMask4),
2485 "", CI);
2486 auto Z2 = new ShuffleVectorInst(Y3, Y4, ConstantVector::get(ShuffleMask4),
2487 "", CI);
2488 auto Z3 = new ShuffleVectorInst(Y5, Y6, ConstantVector::get(ShuffleMask4),
2489 "", CI);
2490 auto Z4 = new ShuffleVectorInst(Y7, Y8, ConstantVector::get(ShuffleMask4),
2491 "", CI);
2492
2493 Constant *ShuffleMask8[8] = {
2494 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2495 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3),
2496 ConstantInt::get(IntTy, 4), ConstantInt::get(IntTy, 5),
2497 ConstantInt::get(IntTy, 6), ConstantInt::get(IntTy, 7)};
2498
2499 // Combine our two float4's into one float8.
2500 auto Z5 = new ShuffleVectorInst(Z1, Z2, ConstantVector::get(ShuffleMask8),
2501 "", CI);
2502 auto Z6 = new ShuffleVectorInst(Z3, Z4, ConstantVector::get(ShuffleMask8),
2503 "", CI);
2504 Constant *ShuffleMask16[16] = {
2505 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2506 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3),
2507 ConstantInt::get(IntTy, 4), ConstantInt::get(IntTy, 5),
2508 ConstantInt::get(IntTy, 6), ConstantInt::get(IntTy, 7),
2509 ConstantInt::get(IntTy, 8), ConstantInt::get(IntTy, 9),
2510 ConstantInt::get(IntTy, 10), ConstantInt::get(IntTy, 11),
2511 ConstantInt::get(IntTy, 12), ConstantInt::get(IntTy, 13),
2512 ConstantInt::get(IntTy, 14), ConstantInt::get(IntTy, 15)};
2513 // Combine our two float8's into one float16.
2514 return new ShuffleVectorInst(Z5, Z6, ConstantVector::get(ShuffleMask16), "",
2515 CI);
2516 });
2517}
2518
SJW2c317da2020-03-23 07:39:13 -05002519bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002520
2521 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2522 //
2523 // %u = load i32 %ptr
Romaric Jodin71fdb322022-05-03 17:01:10 +02002524 // %result = call <2 x float> Unpack2xHalf(u)
SJW2c317da2020-03-23 07:39:13 -05002525 Module &M = *F.getParent();
2526 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002527 auto Index = CI->getOperand(0);
2528 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002529
Kévin Petite8edce32019-04-10 14:23:32 +01002530 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002531 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002532 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002533
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002534 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002535 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002536
Kévin Petite8edce32019-04-10 14:23:32 +01002537 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002538 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002539
Kévin Petite8edce32019-04-10 14:23:32 +01002540 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002541
Kévin Petite8edce32019-04-10 14:23:32 +01002542 // Get our final float2.
2543 return CallInst::Create(NewF, Load, "", CI);
2544 });
David Neto6ad93232018-06-07 15:42:58 -07002545}
2546
SJW2c317da2020-03-23 07:39:13 -05002547bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002548
2549 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2550 //
2551 // %u2 = load <2 x i32> %ptr
2552 // %u2xy = extractelement %u2, 0
2553 // %u2zw = extractelement %u2, 1
2554 // %fxy = call <2 x float> Unpack2xHalf(uint)
2555 // %fzw = call <2 x float> Unpack2xHalf(uint)
Romaric Jodin71fdb322022-05-03 17:01:10 +02002556 // %result = shufflevector %fxy %fzw <4 x float> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05002557 Module &M = *F.getParent();
2558 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002559 auto Index = CI->getOperand(0);
2560 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002561
Kévin Petite8edce32019-04-10 14:23:32 +01002562 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002563 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2564 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002565 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002566
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002567 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002568 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002569
Kévin Petite8edce32019-04-10 14:23:32 +01002570 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002571 auto X =
2572 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2573 auto Y =
2574 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002575
Kévin Petite8edce32019-04-10 14:23:32 +01002576 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002577 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002578
Kévin Petite8edce32019-04-10 14:23:32 +01002579 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002580
Kévin Petite8edce32019-04-10 14:23:32 +01002581 // Get the lower (x & y) components of our final float4.
2582 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002583
Kévin Petite8edce32019-04-10 14:23:32 +01002584 // Get the higher (z & w) components of our final float4.
2585 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002586
Kévin Petite8edce32019-04-10 14:23:32 +01002587 Constant *ShuffleMask[4] = {
2588 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2589 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002590
Kévin Petite8edce32019-04-10 14:23:32 +01002591 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002592 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2593 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002594 });
David Neto6ad93232018-06-07 15:42:58 -07002595}
2596
Romaric Jodin71fdb322022-05-03 17:01:10 +02002597bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size, bool aligned) {
SJW2c317da2020-03-23 07:39:13 -05002598 switch (vec_size) {
2599 case 0:
2600 return replaceVstoreHalf(F);
2601 case 2:
2602 return replaceVstoreHalf2(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02002603 case 3:
2604 return aligned ? replaceVstoreaHalf3(F) : replaceVstoreHalf3(F);
SJW2c317da2020-03-23 07:39:13 -05002605 case 4:
2606 return replaceVstoreHalf4(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02002607 case 8:
2608 return replaceVstoreHalf8(F);
2609 case 16:
2610 return replaceVstoreHalf16(F);
SJW2c317da2020-03-23 07:39:13 -05002611 default:
2612 llvm_unreachable("Unsupported vstore_half vector size");
2613 break;
2614 }
2615 return false;
2616}
David Neto22f144c2017-06-12 14:26:21 -04002617
SJW2c317da2020-03-23 07:39:13 -05002618bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
2619 Module &M = *F.getParent();
2620 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002621 // The value to store.
2622 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002623
Kévin Petite8edce32019-04-10 14:23:32 +01002624 // The index argument from vstore_half.
2625 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002626
Kévin Petite8edce32019-04-10 14:23:32 +01002627 // The pointer argument from vstore_half.
2628 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002629
Kévin Petite8edce32019-04-10 14:23:32 +01002630 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002631 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002632 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2633 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002634
Kévin Petite8edce32019-04-10 14:23:32 +01002635 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002636 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002637
Kévin Petite8edce32019-04-10 14:23:32 +01002638 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002639
Kévin Petite8edce32019-04-10 14:23:32 +01002640 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002641 auto TempVec = InsertElementInst::Create(
2642 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002643
Kévin Petite8edce32019-04-10 14:23:32 +01002644 // Pack the float2 -> half2 (in an int).
2645 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002646
alan-baker7efcaaa2020-05-06 19:33:27 -04002647 bool supports_16bit_storage = true;
2648 switch (Arg2->getType()->getPointerAddressSpace()) {
2649 case clspv::AddressSpace::Global:
2650 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2651 clspv::Option::StorageClass::kSSBO);
2652 break;
2653 case clspv::AddressSpace::Constant:
2654 if (clspv::Option::ConstantArgsInUniformBuffer())
2655 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2656 clspv::Option::StorageClass::kUBO);
2657 else
2658 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2659 clspv::Option::StorageClass::kSSBO);
2660 break;
2661 default:
2662 // Clspv will emit the Float16 capability if the half type is
2663 // encountered. That capability covers private and local addressspaces.
2664 break;
2665 }
2666
SJW2c317da2020-03-23 07:39:13 -05002667 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002668 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002669 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002670 auto ShortPointerTy =
2671 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002672
Kévin Petite8edce32019-04-10 14:23:32 +01002673 // Truncate our i32 to an i16.
2674 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002675
Kévin Petite8edce32019-04-10 14:23:32 +01002676 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002677 // TODO(#816): remove after final transition.
2678 Value *Cast = Arg2;
2679 if (Arg2->getType() != ShortPointerTy) {
2680 Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
2681 }
David Neto22f144c2017-06-12 14:26:21 -04002682
Kévin Petite8edce32019-04-10 14:23:32 +01002683 // Index into the correct address of the casted pointer.
2684 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002685
Kévin Petite8edce32019-04-10 14:23:32 +01002686 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002687 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002688 } else {
2689 // We can only write to 32-bit aligned words.
2690 //
2691 // Assuming base is aligned to 32-bits, replace the equivalent of
2692 // vstore_half(value, index, base)
2693 // with:
2694 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2695 // uint32_t write_to_upper_half = index & 1u;
2696 // uint32_t shift = write_to_upper_half << 4;
2697 //
2698 // // Pack the float value as a half number in bottom 16 bits
2699 // // of an i32.
2700 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2701 //
2702 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2703 // ^ ((packed & 0xffff) << shift)
2704 // // We only need relaxed consistency, but OpenCL 1.2 only has
2705 // // sequentially consistent atomics.
2706 // // TODO(dneto): Use relaxed consistency.
2707 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002708 auto IntPointerTy =
2709 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002710
Kévin Petite8edce32019-04-10 14:23:32 +01002711 auto Four = ConstantInt::get(IntTy, 4);
2712 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002713
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002714 auto IndexIsOdd =
2715 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002716 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002717 auto IndexIntoI32 =
2718 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
alan-bakerb8365b62022-07-18 21:59:45 -04002719 // TODO(#816): remove after final transition.
2720 Value *BaseI32Ptr = Arg2;
2721 if (Arg2->getType() != IntPointerTy) {
2722 BaseI32Ptr =
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002723 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
alan-bakerb8365b62022-07-18 21:59:45 -04002724 }
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002725 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2726 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002727 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002728 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002729 auto MaskBitsToWrite =
2730 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2731 auto MaskedCurrent = BinaryOperator::CreateAnd(
2732 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002733
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002734 auto XLowerBits =
2735 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2736 auto NewBitsToWrite =
2737 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2738 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2739 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002740
Kévin Petite8edce32019-04-10 14:23:32 +01002741 // Generate the call to atomi_xor.
2742 SmallVector<Type *, 5> ParamTypes;
2743 // The pointer type.
2744 ParamTypes.push_back(IntPointerTy);
2745 // The Types for memory scope, semantics, and value.
2746 ParamTypes.push_back(IntTy);
2747 ParamTypes.push_back(IntTy);
2748 ParamTypes.push_back(IntTy);
2749 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2750 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002751
Kévin Petite8edce32019-04-10 14:23:32 +01002752 const auto ConstantScopeDevice =
2753 ConstantInt::get(IntTy, spv::ScopeDevice);
2754 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2755 // (SPIR-V Workgroup).
2756 const auto AddrSpaceSemanticsBits =
2757 IntPointerTy->getPointerAddressSpace() == 1
2758 ? spv::MemorySemanticsUniformMemoryMask
2759 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002760
Kévin Petite8edce32019-04-10 14:23:32 +01002761 // We're using relaxed consistency here.
2762 const auto ConstantMemorySemantics =
2763 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2764 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002765
Kévin Petite8edce32019-04-10 14:23:32 +01002766 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2767 ConstantMemorySemantics, ValueToXor};
2768 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002769
2770 // Return a Nop so the old Call is removed
2771 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2772 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002773 }
David Neto22f144c2017-06-12 14:26:21 -04002774
SJW2c317da2020-03-23 07:39:13 -05002775 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002776 });
David Neto22f144c2017-06-12 14:26:21 -04002777}
2778
SJW2c317da2020-03-23 07:39:13 -05002779bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2780 Module &M = *F.getParent();
2781 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002782 // The value to store.
2783 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002784
Kévin Petite8edce32019-04-10 14:23:32 +01002785 // The index argument from vstore_half.
2786 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002787
Kévin Petite8edce32019-04-10 14:23:32 +01002788 // The pointer argument from vstore_half.
2789 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002790
Kévin Petite8edce32019-04-10 14:23:32 +01002791 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002792 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002793 auto NewPointerTy =
2794 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002795 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002796
Kévin Petite8edce32019-04-10 14:23:32 +01002797 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002798 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002799
Kévin Petite8edce32019-04-10 14:23:32 +01002800 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002801
Kévin Petite8edce32019-04-10 14:23:32 +01002802 // Turn the packed x & y into the final packing.
2803 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002804
Kévin Petite8edce32019-04-10 14:23:32 +01002805 // Cast the half* pointer to int*.
alan-bakerb8365b62022-07-18 21:59:45 -04002806 // TODO(#816): remove after final transition.
2807 Value *Cast = Arg2;
2808 if (Arg2->getType() != NewPointerTy) {
2809 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2810 }
David Neto22f144c2017-06-12 14:26:21 -04002811
Kévin Petite8edce32019-04-10 14:23:32 +01002812 // Index into the correct address of the casted pointer.
2813 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002814
Kévin Petite8edce32019-04-10 14:23:32 +01002815 // Store to the int* we casted to.
2816 return new StoreInst(X, Index, CI);
2817 });
David Neto22f144c2017-06-12 14:26:21 -04002818}
2819
Romaric Jodin71fdb322022-05-03 17:01:10 +02002820bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf3(Function &F) {
2821 Module &M = *F.getParent();
2822 return replaceCallsWithValue(F, [&](CallInst *CI) {
2823 // The value to store.
2824 auto Arg0 = CI->getOperand(0);
2825
2826 // The index argument from vstore_half.
2827 auto Arg1 = CI->getOperand(1);
2828
2829 // The pointer argument from vstore_half.
2830 auto Arg2 = CI->getOperand(2);
2831
2832 auto IntTy = Type::getInt32Ty(M.getContext());
2833 auto ShortTy = Type::getInt16Ty(M.getContext());
2834 auto FloatTy = Type::getFloatTy(M.getContext());
2835 auto Float2Ty = FixedVectorType::get(FloatTy, 2);
2836 auto NewPointerTy =
2837 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
2838 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2839
2840 auto Int0 = ConstantInt::get(IntTy, 0);
2841 auto Int1 = ConstantInt::get(IntTy, 1);
2842 auto Int2 = ConstantInt::get(IntTy, 2);
2843
2844 auto X0 = InsertElementInst::Create(
2845 UndefValue::get(Float2Ty),
2846 ExtractElementInst::Create(Arg0, Int0, "", CI), Int0, "", CI);
2847 auto X1 = InsertElementInst::Create(
2848 UndefValue::get(Float2Ty),
2849 ExtractElementInst::Create(Arg0, Int1, "", CI), Int0, "", CI);
2850 auto X2 = InsertElementInst::Create(
2851 UndefValue::get(Float2Ty),
2852 ExtractElementInst::Create(Arg0, Int2, "", CI), Int0, "", CI);
2853
2854 // Our intrinsic to pack a float2 to an int.
2855 auto SPIRVIntrinsic = clspv::PackFunction();
2856
2857 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2858
2859 // Convert float2 into int and trunc to short to keep only the meaningful
2860 // part of it
2861 auto Y0 =
2862 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X0, "", CI),
2863 ShortTy, "", CI);
2864 auto Y1 =
2865 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X1, "", CI),
2866 ShortTy, "", CI);
2867 auto Y2 =
2868 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X2, "", CI),
2869 ShortTy, "", CI);
2870
2871 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002872 // TODO(#816): remove after final transition.
2873 Value *Cast = Arg2;
2874 if (Arg2->getType() != NewPointerTy) {
2875 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2876 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002877
2878 auto Index0 = BinaryOperator::Create(
2879 Instruction::Add,
2880 BinaryOperator::Create(Instruction::Shl, Arg1, Int1, "", CI), Arg1, "",
2881 CI);
2882 auto GEP0 = GetElementPtrInst::Create(ShortTy, Cast, Index0, "", CI);
2883 new StoreInst(Y0, GEP0, CI);
2884
2885 auto Index1 =
2886 BinaryOperator::Create(Instruction::Add, Index0, Int1, "", CI);
2887 auto GEP1 = GetElementPtrInst::Create(ShortTy, Cast, Index1, "", CI);
2888 new StoreInst(Y1, GEP1, CI);
2889
2890 auto Index2 =
2891 BinaryOperator::Create(Instruction::Add, Index1, Int1, "", CI);
2892 auto GEP2 = GetElementPtrInst::Create(ShortTy, Cast, Index2, "", CI);
2893 return new StoreInst(Y2, GEP2, CI);
2894 });
2895}
2896
2897bool ReplaceOpenCLBuiltinPass::replaceVstoreaHalf3(Function &F) {
2898 Module &M = *F.getParent();
2899 return replaceCallsWithValue(F, [&](CallInst *CI) {
2900 // The value to store.
2901 auto Arg0 = CI->getOperand(0);
2902
2903 // The index argument from vstore_half.
2904 auto Arg1 = CI->getOperand(1);
2905
2906 // The pointer argument from vstore_half.
2907 auto Arg2 = CI->getOperand(2);
2908
2909 auto IntTy = Type::getInt32Ty(M.getContext());
2910 auto ShortTy = Type::getInt16Ty(M.getContext());
2911 auto FloatTy = Type::getFloatTy(M.getContext());
2912 auto Float2Ty = FixedVectorType::get(FloatTy, 2);
2913 auto NewPointerTy =
2914 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
2915 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2916
2917 auto Int0 = ConstantInt::get(IntTy, 0);
2918 auto Int1 = ConstantInt::get(IntTy, 1);
2919 auto Int2 = ConstantInt::get(IntTy, 2);
2920
2921 auto X0 = InsertElementInst::Create(
2922 UndefValue::get(Float2Ty),
2923 ExtractElementInst::Create(Arg0, Int0, "", CI), Int0, "", CI);
2924 auto X1 = InsertElementInst::Create(
2925 UndefValue::get(Float2Ty),
2926 ExtractElementInst::Create(Arg0, Int1, "", CI), Int0, "", CI);
2927 auto X2 = InsertElementInst::Create(
2928 UndefValue::get(Float2Ty),
2929 ExtractElementInst::Create(Arg0, Int2, "", CI), Int0, "", CI);
2930
2931 // Our intrinsic to pack a float2 to an int.
2932 auto SPIRVIntrinsic = clspv::PackFunction();
2933
2934 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2935
2936 // Convert float2 into int and trunc to short to keep only the meaningful
2937 // part of it
2938 auto Y0 =
2939 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X0, "", CI),
2940 ShortTy, "", CI);
2941 auto Y1 =
2942 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X1, "", CI),
2943 ShortTy, "", CI);
2944 auto Y2 =
2945 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X2, "", CI),
2946 ShortTy, "", CI);
2947
2948 // Cast the half* pointer to short*.
alan-bakerb8365b62022-07-18 21:59:45 -04002949 // TODO(#816): remove after final transition.
2950 Value *Cast = Arg2;
2951 if (Arg2->getType() != NewPointerTy) {
2952 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2953 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002954
2955 auto Index0 = BinaryOperator::Create(Instruction::Shl, Arg1, Int2, "", CI);
2956 auto GEP0 = GetElementPtrInst::Create(ShortTy, Cast, Index0, "", CI);
2957 new StoreInst(Y0, GEP0, CI);
2958
2959 auto Index1 =
2960 BinaryOperator::Create(Instruction::Add, Index0, Int1, "", CI);
2961 auto GEP1 = GetElementPtrInst::Create(ShortTy, Cast, Index1, "", CI);
2962 new StoreInst(Y1, GEP1, CI);
2963
2964 auto Index2 =
2965 BinaryOperator::Create(Instruction::Add, Index1, Int1, "", CI);
2966 auto GEP2 = GetElementPtrInst::Create(ShortTy, Cast, Index2, "", CI);
2967 return new StoreInst(Y2, GEP2, CI);
2968 });
2969}
2970
SJW2c317da2020-03-23 07:39:13 -05002971bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2972 Module &M = *F.getParent();
2973 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002974 // The value to store.
2975 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002976
Kévin Petite8edce32019-04-10 14:23:32 +01002977 // The index argument from vstore_half.
2978 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002979
Kévin Petite8edce32019-04-10 14:23:32 +01002980 // The pointer argument from vstore_half.
2981 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002982
Kévin Petite8edce32019-04-10 14:23:32 +01002983 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002984 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2985 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002986 auto NewPointerTy =
2987 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002988 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002989
Kévin Petite8edce32019-04-10 14:23:32 +01002990 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2991 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002992
Kévin Petite8edce32019-04-10 14:23:32 +01002993 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002994 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2995 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002996
Kévin Petite8edce32019-04-10 14:23:32 +01002997 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2998 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002999
Kévin Petite8edce32019-04-10 14:23:32 +01003000 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003001 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3002 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003003
Kévin Petite8edce32019-04-10 14:23:32 +01003004 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05003005 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04003006
Kévin Petite8edce32019-04-10 14:23:32 +01003007 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003008
Kévin Petite8edce32019-04-10 14:23:32 +01003009 // Turn the packed x & y into the final component of our int2.
3010 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003011
Kévin Petite8edce32019-04-10 14:23:32 +01003012 // Turn the packed z & w into the final component of our int2.
3013 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003014
Kévin Petite8edce32019-04-10 14:23:32 +01003015 auto Combine = InsertElementInst::Create(
3016 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003017 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
3018 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003019
Kévin Petite8edce32019-04-10 14:23:32 +01003020 // Cast the half* pointer to int2*.
alan-bakerb8365b62022-07-18 21:59:45 -04003021 // TODO(#816): remove after final transition.
3022 Value *Cast = Arg2;
3023 if (Arg2->getType() != NewPointerTy) {
3024 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
3025 }
David Neto22f144c2017-06-12 14:26:21 -04003026
Kévin Petite8edce32019-04-10 14:23:32 +01003027 // Index into the correct address of the casted pointer.
3028 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003029
Kévin Petite8edce32019-04-10 14:23:32 +01003030 // Store to the int2* we casted to.
3031 return new StoreInst(Combine, Index, CI);
3032 });
David Neto22f144c2017-06-12 14:26:21 -04003033}
3034
Romaric Jodin71fdb322022-05-03 17:01:10 +02003035bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf8(Function &F) {
3036 Module &M = *F.getParent();
3037 return replaceCallsWithValue(F, [&](CallInst *CI) {
3038 // The value to store.
3039 auto Arg0 = CI->getOperand(0);
3040
3041 // The index argument from vstore_half.
3042 auto Arg1 = CI->getOperand(1);
3043
3044 // The pointer argument from vstore_half.
3045 auto Arg2 = CI->getOperand(2);
3046
3047 auto IntTy = Type::getInt32Ty(M.getContext());
3048 auto Int4Ty = FixedVectorType::get(IntTy, 4);
3049 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
3050 auto NewPointerTy =
3051 PointerType::get(Int4Ty, Arg2->getType()->getPointerAddressSpace());
3052 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
3053
3054 Constant *ShuffleMask01[2] = {ConstantInt::get(IntTy, 0),
3055 ConstantInt::get(IntTy, 1)};
3056 auto X01 =
3057 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3058 ConstantVector::get(ShuffleMask01), "", CI);
3059 Constant *ShuffleMask23[2] = {ConstantInt::get(IntTy, 2),
3060 ConstantInt::get(IntTy, 3)};
3061 auto X23 =
3062 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3063 ConstantVector::get(ShuffleMask23), "", CI);
3064 Constant *ShuffleMask45[2] = {ConstantInt::get(IntTy, 4),
3065 ConstantInt::get(IntTy, 5)};
3066 auto X45 =
3067 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3068 ConstantVector::get(ShuffleMask45), "", CI);
3069 Constant *ShuffleMask67[2] = {ConstantInt::get(IntTy, 6),
3070 ConstantInt::get(IntTy, 7)};
3071 auto X67 =
3072 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3073 ConstantVector::get(ShuffleMask67), "", CI);
3074
3075 // Our intrinsic to pack a float2 to an int.
3076 auto SPIRVIntrinsic = clspv::PackFunction();
3077
3078 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
3079
3080 auto Y01 = CallInst::Create(NewF, X01, "", CI);
3081 auto Y23 = CallInst::Create(NewF, X23, "", CI);
3082 auto Y45 = CallInst::Create(NewF, X45, "", CI);
3083 auto Y67 = CallInst::Create(NewF, X67, "", CI);
3084
3085 auto Combine = InsertElementInst::Create(
3086 UndefValue::get(Int4Ty), Y01, ConstantInt::get(IntTy, 0), "", CI);
3087 Combine = InsertElementInst::Create(Combine, Y23,
3088 ConstantInt::get(IntTy, 1), "", CI);
3089 Combine = InsertElementInst::Create(Combine, Y45,
3090 ConstantInt::get(IntTy, 2), "", CI);
3091 Combine = InsertElementInst::Create(Combine, Y67,
3092 ConstantInt::get(IntTy, 3), "", CI);
3093
3094 // Cast the half* pointer to int4*.
alan-bakerb8365b62022-07-18 21:59:45 -04003095 // TODO(#816): remove after final transition.
3096 Value *Cast = Arg2;
3097 if (Arg2->getType() != NewPointerTy) {
3098 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
3099 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02003100
3101 // Index into the correct address of the casted pointer.
3102 auto Index = GetElementPtrInst::Create(Int4Ty, Cast, Arg1, "", CI);
3103
3104 // Store to the int4* we casted to.
3105 return new StoreInst(Combine, Index, CI);
3106 });
3107}
3108
3109bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf16(Function &F) {
3110 Module &M = *F.getParent();
3111 return replaceCallsWithValue(F, [&](CallInst *CI) {
3112 // The value to store.
3113 auto Arg0 = CI->getOperand(0);
3114
3115 // The index argument from vstore_half.
3116 auto Arg1 = CI->getOperand(1);
3117
3118 // The pointer argument from vstore_half.
3119 auto Arg2 = CI->getOperand(2);
3120
3121 auto IntTy = Type::getInt32Ty(M.getContext());
3122 auto Int4Ty = FixedVectorType::get(IntTy, 4);
3123 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
3124 auto NewPointerTy =
3125 PointerType::get(Int4Ty, Arg2->getType()->getPointerAddressSpace());
3126 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
3127
3128 Constant *ShuffleMask0[2] = {ConstantInt::get(IntTy, 0),
3129 ConstantInt::get(IntTy, 1)};
3130 auto X0 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3131 ConstantVector::get(ShuffleMask0), "", CI);
3132 Constant *ShuffleMask1[2] = {ConstantInt::get(IntTy, 2),
3133 ConstantInt::get(IntTy, 3)};
3134 auto X1 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3135 ConstantVector::get(ShuffleMask1), "", CI);
3136 Constant *ShuffleMask2[2] = {ConstantInt::get(IntTy, 4),
3137 ConstantInt::get(IntTy, 5)};
3138 auto X2 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3139 ConstantVector::get(ShuffleMask2), "", CI);
3140 Constant *ShuffleMask3[2] = {ConstantInt::get(IntTy, 6),
3141 ConstantInt::get(IntTy, 7)};
3142 auto X3 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3143 ConstantVector::get(ShuffleMask3), "", CI);
3144 Constant *ShuffleMask4[2] = {ConstantInt::get(IntTy, 8),
3145 ConstantInt::get(IntTy, 9)};
3146 auto X4 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3147 ConstantVector::get(ShuffleMask4), "", CI);
3148 Constant *ShuffleMask5[2] = {ConstantInt::get(IntTy, 10),
3149 ConstantInt::get(IntTy, 11)};
3150 auto X5 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3151 ConstantVector::get(ShuffleMask5), "", CI);
3152 Constant *ShuffleMask6[2] = {ConstantInt::get(IntTy, 12),
3153 ConstantInt::get(IntTy, 13)};
3154 auto X6 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3155 ConstantVector::get(ShuffleMask6), "", CI);
3156 Constant *ShuffleMask7[2] = {ConstantInt::get(IntTy, 14),
3157 ConstantInt::get(IntTy, 15)};
3158 auto X7 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3159 ConstantVector::get(ShuffleMask7), "", CI);
3160
3161 // Our intrinsic to pack a float2 to an int.
3162 auto SPIRVIntrinsic = clspv::PackFunction();
3163
3164 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
3165
3166 auto Y0 = CallInst::Create(NewF, X0, "", CI);
3167 auto Y1 = CallInst::Create(NewF, X1, "", CI);
3168 auto Y2 = CallInst::Create(NewF, X2, "", CI);
3169 auto Y3 = CallInst::Create(NewF, X3, "", CI);
3170 auto Y4 = CallInst::Create(NewF, X4, "", CI);
3171 auto Y5 = CallInst::Create(NewF, X5, "", CI);
3172 auto Y6 = CallInst::Create(NewF, X6, "", CI);
3173 auto Y7 = CallInst::Create(NewF, X7, "", CI);
3174
3175 auto Combine1 = InsertElementInst::Create(
3176 UndefValue::get(Int4Ty), Y0, ConstantInt::get(IntTy, 0), "", CI);
3177 Combine1 = InsertElementInst::Create(Combine1, Y1,
3178 ConstantInt::get(IntTy, 1), "", CI);
3179 Combine1 = InsertElementInst::Create(Combine1, Y2,
3180 ConstantInt::get(IntTy, 2), "", CI);
3181 Combine1 = InsertElementInst::Create(Combine1, Y3,
3182 ConstantInt::get(IntTy, 3), "", CI);
3183
3184 auto Combine2 = InsertElementInst::Create(
3185 UndefValue::get(Int4Ty), Y4, ConstantInt::get(IntTy, 0), "", CI);
3186 Combine2 = InsertElementInst::Create(Combine2, Y5,
3187 ConstantInt::get(IntTy, 1), "", CI);
3188 Combine2 = InsertElementInst::Create(Combine2, Y6,
3189 ConstantInt::get(IntTy, 2), "", CI);
3190 Combine2 = InsertElementInst::Create(Combine2, Y7,
3191 ConstantInt::get(IntTy, 3), "", CI);
3192
3193 // Cast the half* pointer to int4*.
alan-bakerb8365b62022-07-18 21:59:45 -04003194 // TODO(#816): remove after final transition.
3195 Value *Cast = Arg2;
3196 if (Arg2->getType() != NewPointerTy) {
3197 Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
3198 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02003199
3200 // Index into the correct address of the casted pointer.
3201 auto Arg1x2 = BinaryOperator::Create(Instruction::Shl, Arg1,
3202 ConstantInt::get(IntTy, 1), "", CI);
3203 auto Index1 = GetElementPtrInst::Create(Int4Ty, Cast, Arg1x2, "", CI);
3204
3205 // Store to the int4* we casted to.
3206 new StoreInst(Combine1, Index1, CI);
3207
3208 // Index into the correct address of the casted pointer.
3209 auto Arg1Plus1 = BinaryOperator::Create(Instruction::Add, Arg1x2,
3210 ConstantInt::get(IntTy, 1), "", CI);
3211 auto Index2 = GetElementPtrInst::Create(Int4Ty, Cast, Arg1Plus1, "", CI);
3212
3213 // Store to the int4* we casted to.
3214 return new StoreInst(Combine2, Index2, CI);
3215 });
3216}
3217
SJW2c317da2020-03-23 07:39:13 -05003218bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
3219 // convert half to float
3220 Module &M = *F.getParent();
3221 return replaceCallsWithValue(F, [&](CallInst *CI) {
3222 SmallVector<Type *, 3> types;
3223 SmallVector<Value *, 3> args;
alan-baker5641f5c2021-10-15 09:16:04 -04003224 for (size_t i = 0; i < CI->arg_size(); ++i) {
SJW2c317da2020-03-23 07:39:13 -05003225 types.push_back(CI->getArgOperand(i)->getType());
3226 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05003227 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05003228
alan-baker5a8c3be2020-09-09 13:44:26 -04003229 auto NewFType =
3230 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
3231 cast<VectorType>(CI->getType())
3232 ->getElementCount()
3233 .getKnownMinValue()),
3234 types, false);
SJW2c317da2020-03-23 07:39:13 -05003235
SJW61531372020-06-09 07:31:08 -05003236 std::string NewFName =
3237 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05003238
3239 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
3240
3241 auto NewCI = CallInst::Create(NewF, args, "", CI);
3242
3243 // Convert to the half type.
3244 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
3245 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05003246}
3247
SJW2c317da2020-03-23 07:39:13 -05003248bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
3249 // convert half to float
3250 Module &M = *F.getParent();
3251 return replaceCallsWithValue(F, [&](CallInst *CI) {
3252 SmallVector<Type *, 3> types(3);
3253 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003254
SJW2c317da2020-03-23 07:39:13 -05003255 // Image
3256 types[0] = CI->getArgOperand(0)->getType();
3257 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003258
SJW2c317da2020-03-23 07:39:13 -05003259 // Coord
3260 types[1] = CI->getArgOperand(1)->getType();
3261 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003262
SJW2c317da2020-03-23 07:39:13 -05003263 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04003264 types[2] =
3265 FixedVectorType::get(Type::getFloatTy(M.getContext()),
3266 cast<VectorType>(CI->getArgOperand(2)->getType())
3267 ->getElementCount()
3268 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05003269
SJW2c317da2020-03-23 07:39:13 -05003270 auto NewFType =
3271 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003272
SJW61531372020-06-09 07:31:08 -05003273 std::string NewFName =
3274 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003275
SJW2c317da2020-03-23 07:39:13 -05003276 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003277
SJW2c317da2020-03-23 07:39:13 -05003278 // Convert data to the float type.
3279 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
3280 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05003281
SJW2c317da2020-03-23 07:39:13 -05003282 return CallInst::Create(NewF, args, "", CI);
3283 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05003284}
3285
SJW2c317da2020-03-23 07:39:13 -05003286bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
3287 Function &F) {
3288 // convert read_image with int coords to float coords
3289 Module &M = *F.getParent();
3290 return replaceCallsWithValue(F, [&](CallInst *CI) {
3291 // The image.
3292 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04003293
SJW2c317da2020-03-23 07:39:13 -05003294 // The sampler.
3295 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04003296
SJW2c317da2020-03-23 07:39:13 -05003297 // The coordinate (integer type that we can't handle).
3298 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04003299
alan-baker6792f982022-06-23 14:55:40 -04003300 auto *image_ty =
3301 cast<StructType>(InferType(Arg0, M.getContext(), &InferredTypeCache));
3302 uint32_t dim = clspv::ImageNumDimensions(image_ty);
3303 uint32_t components = dim + (clspv::IsArrayImageType(image_ty) ? 1 : 0);
SJW2c317da2020-03-23 07:39:13 -05003304 Type *float_ty = nullptr;
3305 if (components == 1) {
3306 float_ty = Type::getFloatTy(M.getContext());
3307 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04003308 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
3309 cast<VectorType>(Arg2->getType())
3310 ->getElementCount()
3311 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04003312 }
David Neto22f144c2017-06-12 14:26:21 -04003313
SJW2c317da2020-03-23 07:39:13 -05003314 auto NewFType = FunctionType::get(
3315 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
3316
3317 std::string NewFName = F.getName().str();
3318 NewFName[NewFName.length() - 1] = 'f';
3319
3320 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
3321
3322 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
3323
3324 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
3325 });
David Neto22f144c2017-06-12 14:26:21 -04003326}
3327
SJW2c317da2020-03-23 07:39:13 -05003328bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
3329 return replaceCallsWithValue(F, [&](CallInst *CI) {
3330 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04003331
SJW2c317da2020-03-23 07:39:13 -05003332 // We need to map the OpenCL constants to the SPIR-V equivalents.
3333 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
3334 const auto ConstantMemorySemantics = ConstantInt::get(
3335 IntTy, spv::MemorySemanticsUniformMemoryMask |
3336 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04003337
SJW2c317da2020-03-23 07:39:13 -05003338 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04003339
SJW2c317da2020-03-23 07:39:13 -05003340 // The pointer.
3341 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04003342
SJW2c317da2020-03-23 07:39:13 -05003343 // The memory scope.
3344 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04003345
SJW2c317da2020-03-23 07:39:13 -05003346 // The memory semantics.
3347 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04003348
alan-baker5641f5c2021-10-15 09:16:04 -04003349 if (2 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05003350 // The unequal memory semantics.
3351 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04003352
SJW2c317da2020-03-23 07:39:13 -05003353 // The value.
3354 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04003355
SJW2c317da2020-03-23 07:39:13 -05003356 // The comparator.
3357 Params.push_back(CI->getArgOperand(1));
alan-baker5641f5c2021-10-15 09:16:04 -04003358 } else if (1 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05003359 // The value.
3360 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04003361 }
David Neto22f144c2017-06-12 14:26:21 -04003362
SJW2c317da2020-03-23 07:39:13 -05003363 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
3364 });
David Neto22f144c2017-06-12 14:26:21 -04003365}
3366
SJW2c317da2020-03-23 07:39:13 -05003367bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
3368 llvm::AtomicRMWInst::BinOp Op) {
3369 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04003370 auto align = F.getParent()->getDataLayout().getABITypeAlign(
3371 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05003372 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04003373 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05003374 SyncScope::System, CI);
3375 });
3376}
David Neto22f144c2017-06-12 14:26:21 -04003377
SJW2c317da2020-03-23 07:39:13 -05003378bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
3379 Module &M = *F.getParent();
3380 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04003381 auto IntTy = Type::getInt32Ty(M.getContext());
3382 auto FloatTy = Type::getFloatTy(M.getContext());
3383
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003384 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
3385 ConstantInt::get(IntTy, 1),
3386 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04003387
3388 Constant *UpShuffleMask[4] = {
3389 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3390 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3391
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003392 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
3393 UndefValue::get(FloatTy),
3394 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04003395
Kévin Petite8edce32019-04-10 14:23:32 +01003396 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003397 auto Arg0 =
3398 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
3399 ConstantVector::get(DownShuffleMask), "", CI);
3400 auto Arg1 =
3401 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
3402 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003403 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04003404
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003405 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05003406 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003407
SJW61531372020-06-09 07:31:08 -05003408 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003409
Kévin Petite8edce32019-04-10 14:23:32 +01003410 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003411
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003412 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
3413 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003414 });
David Neto22f144c2017-06-12 14:26:21 -04003415}
David Neto62653202017-10-16 19:05:18 -04003416
SJW2c317da2020-03-23 07:39:13 -05003417bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04003418 // OpenCL's float result = fract(float x, float* ptr)
3419 //
3420 // In the LLVM domain:
3421 //
3422 // %floor_result = call spir_func float @floor(float %x)
3423 // store float %floor_result, float * %ptr
3424 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3425 // %result = call spir_func float
3426 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3427 //
3428 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3429 // and clspv.fract occur in the SPIR-V generator pass:
3430 //
3431 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3432 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3433 // ...
3434 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3435 // OpStore %ptr %floor_result
3436 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3437 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01003438 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04003439
David Neto62653202017-10-16 19:05:18 -04003440 using std::string;
3441
3442 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3443 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04003444
SJW2c317da2020-03-23 07:39:13 -05003445 Module &M = *F.getParent();
3446 return replaceCallsWithValue(F, [&](CallInst *CI) {
SJW2c317da2020-03-23 07:39:13 -05003447 // This is either float or a float vector. All the float-like
3448 // types are this type.
3449 auto result_ty = F.getReturnType();
3450
SJW61531372020-06-09 07:31:08 -05003451 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05003452 Function *fmin_fn = M.getFunction(fmin_name);
3453 if (!fmin_fn) {
3454 // Make the fmin function.
3455 FunctionType *fn_ty =
3456 FunctionType::get(result_ty, {result_ty, result_ty}, false);
3457 fmin_fn =
3458 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
3459 fmin_fn->addFnAttr(Attribute::ReadNone);
3460 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3461 }
3462
SJW61531372020-06-09 07:31:08 -05003463 std::string floor_name =
3464 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05003465 Function *floor_fn = M.getFunction(floor_name);
3466 if (!floor_fn) {
3467 // Make the floor function.
3468 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
3469 floor_fn =
3470 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
3471 floor_fn->addFnAttr(Attribute::ReadNone);
3472 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3473 }
3474
SJW61531372020-06-09 07:31:08 -05003475 std::string clspv_fract_name =
3476 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05003477 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
3478 if (!clspv_fract_fn) {
3479 // Make the clspv_fract function.
3480 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
3481 clspv_fract_fn = cast<Function>(
3482 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
3483 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3484 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3485 }
3486
3487 // Number of significant significand bits, whether represented or not.
3488 unsigned num_significand_bits;
3489 switch (result_ty->getScalarType()->getTypeID()) {
3490 case Type::HalfTyID:
3491 num_significand_bits = 11;
3492 break;
3493 case Type::FloatTyID:
3494 num_significand_bits = 24;
3495 break;
3496 case Type::DoubleTyID:
3497 num_significand_bits = 53;
3498 break;
3499 default:
3500 llvm_unreachable("Unhandled float type when processing fract builtin");
3501 break;
3502 }
3503 // Beware that the disassembler displays this value as
3504 // OpConstant %float 1
3505 // which is not quite right.
3506 const double kJustUnderOneScalar =
3507 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3508
3509 Constant *just_under_one =
3510 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3511 if (result_ty->isVectorTy()) {
3512 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04003513 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05003514 }
3515
3516 IRBuilder<> Builder(CI);
3517
3518 auto arg = CI->getArgOperand(0);
3519 auto ptr = CI->getArgOperand(1);
3520
3521 // Compute floor result and store it.
3522 auto floor = Builder.CreateCall(floor_fn, {arg});
3523 Builder.CreateStore(floor, ptr);
3524
3525 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
3526 auto fract_result =
3527 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
3528
3529 return fract_result;
3530 });
David Neto62653202017-10-16 19:05:18 -04003531}
alan-bakera52b7312020-10-26 08:58:51 -04003532
Kévin Petit8576f682020-11-02 14:51:32 +00003533bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04003534 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00003535 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04003536 // a_shr = a >> 1
3537 // b_shr = b >> 1
3538 // add1 = a_shr + b_shr
3539 // join = a |join_opcode| b
3540 // and = join & 1
3541 // add = add1 + and
3542 const auto a = Call->getArgOperand(0);
3543 const auto b = Call->getArgOperand(1);
3544 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00003545 Value *a_shift, *b_shift;
3546 if (is_signed) {
3547 a_shift = builder.CreateAShr(a, 1);
3548 b_shift = builder.CreateAShr(b, 1);
3549 } else {
3550 a_shift = builder.CreateLShr(a, 1);
3551 b_shift = builder.CreateLShr(b, 1);
3552 }
alan-bakerb6da5132020-10-29 15:59:06 -04003553 auto add = builder.CreateAdd(a_shift, b_shift);
3554 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
3555 auto constant_one = ConstantInt::get(a->getType(), 1);
3556 auto and_bit = builder.CreateAnd(join, constant_one);
3557 return builder.CreateAdd(add, and_bit);
3558 });
3559}
3560
alan-baker3f1bf492020-11-05 09:07:36 -05003561bool ReplaceOpenCLBuiltinPass::replaceAddSubSat(Function &F, bool is_signed,
3562 bool is_add) {
3563 return replaceCallsWithValue(F, [&F, this, is_signed,
3564 is_add](CallInst *Call) {
gnl21f8847012022-05-13 15:11:08 +01003565 auto intrinsic_type =
3566 is_signed ? (is_add ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
3567 : (is_add ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
alan-baker3f1bf492020-11-05 09:07:36 -05003568 auto a = Call->getArgOperand(0);
3569 auto b = Call->getArgOperand(1);
gnl21f8847012022-05-13 15:11:08 +01003570 auto intrinsic = Intrinsic::getDeclaration(F.getParent(), intrinsic_type,
3571 Call->getType());
3572 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {a, b}, "",
3573 Call);
alan-bakera52b7312020-10-26 08:58:51 -04003574 });
3575}
alan-baker4986eff2020-10-29 13:38:00 -04003576
3577bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
3578 return replaceCallsWithValue(F, [](CallInst *Call) {
3579 auto pointer = Call->getArgOperand(0);
3580 // Clang emits an address space cast to the generic address space. Skip the
3581 // cast and use the input directly.
3582 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3583 pointer = cast->getPointerOperand();
3584 }
alan-baker5641f5c2021-10-15 09:16:04 -04003585 Value *order_arg = Call->arg_size() > 1 ? Call->getArgOperand(1) : nullptr;
3586 Value *scope_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003587 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3588 clspv::AddressSpace::Global;
3589 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
3590 spv::MemorySemanticsAcquireMask);
3591 auto scope = MemoryScope(scope_arg, is_global, Call);
3592 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
3593 Call->getType(), {pointer, scope, order});
3594 });
3595}
3596
3597bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
3598 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
3599 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
3600 auto pointer = Call->getArgOperand(0);
3601 // Clang emits an address space cast to the generic address space. Skip the
3602 // cast and use the input directly.
3603 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3604 pointer = cast->getPointerOperand();
3605 }
3606 Value *value = Call->getArgOperand(1);
alan-baker5641f5c2021-10-15 09:16:04 -04003607 Value *order_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
3608 Value *scope_arg = Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003609 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3610 clspv::AddressSpace::Global;
3611 auto scope = MemoryScope(scope_arg, is_global, Call);
3612 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
3613 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
3614 {pointer, scope, order, value});
3615 });
3616}
3617
3618bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
3619 return replaceCallsWithValue(F, [](CallInst *Call) {
3620 auto pointer = Call->getArgOperand(0);
3621 // Clang emits an address space cast to the generic address space. Skip the
3622 // cast and use the input directly.
3623 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3624 pointer = cast->getPointerOperand();
3625 }
3626 auto expected = Call->getArgOperand(1);
3627 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
3628 expected = cast->getPointerOperand();
3629 }
3630 auto value = Call->getArgOperand(2);
3631 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3632 clspv::AddressSpace::Global;
3633 Value *success_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04003634 Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003635 Value *failure_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04003636 Call->arg_size() > 4 ? Call->getArgOperand(4) : nullptr;
3637 Value *scope_arg = Call->arg_size() > 5 ? Call->getArgOperand(5) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003638 auto scope = MemoryScope(scope_arg, is_global, Call);
3639 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
3640 spv::MemorySemanticsAcquireReleaseMask);
3641 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
3642 spv::MemorySemanticsAcquireMask);
3643
3644 // If the value pointed to by |expected| equals the value pointed to by
3645 // |pointer|, |value| is written into |pointer|, otherwise the value in
3646 // |pointer| is written into |expected|. In order to avoid extra stores,
3647 // the basic block with the original atomic is split and the store is
3648 // performed in the |then| block. The condition is the inversion of the
3649 // comparison result.
3650 IRBuilder<> builder(Call);
alan-baker67d639b2022-05-09 11:23:31 -04003651 auto load = builder.CreateLoad(value->getType(), expected);
alan-baker4986eff2020-10-29 13:38:00 -04003652 auto cmp_xchg = InsertSPIRVOp(
3653 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
3654 value->getType(), {pointer, scope, success, failure, value, load});
3655 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
3656 auto not_cmp = builder.CreateNot(cmp);
3657 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
3658 builder.SetInsertPoint(then_branch);
3659 builder.CreateStore(cmp_xchg, expected);
3660 return cmp;
3661 });
3662}
alan-bakercc2bafb2020-11-02 08:30:18 -05003663
alan-baker2cecaa72020-11-05 14:05:20 -05003664bool ReplaceOpenCLBuiltinPass::replaceCountZeroes(Function &F, bool leading) {
alan-bakercc2bafb2020-11-02 08:30:18 -05003665 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3666 return false;
3667
3668 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
alan-baker5f2e88e2020-12-07 15:24:04 -05003669 if (bitwidth > 64)
alan-bakercc2bafb2020-11-02 08:30:18 -05003670 return false;
3671
alan-baker5f2e88e2020-12-07 15:24:04 -05003672 return replaceCallsWithValue(F, [&F, leading](CallInst *Call) {
3673 Function *intrinsic = Intrinsic::getDeclaration(
3674 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz,
3675 Call->getType());
3676 const auto c_false = ConstantInt::getFalse(Call->getContext());
3677 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
3678 {Call->getArgOperand(0), c_false}, "", Call);
alan-bakercc2bafb2020-11-02 08:30:18 -05003679 });
3680}
alan-baker6b9d1ee2020-11-03 23:11:32 -05003681
3682bool ReplaceOpenCLBuiltinPass::replaceMadSat(Function &F, bool is_signed) {
3683 return replaceCallsWithValue(F, [&F, is_signed, this](CallInst *Call) {
3684 const auto ty = Call->getType();
3685 const auto a = Call->getArgOperand(0);
3686 const auto b = Call->getArgOperand(1);
3687 const auto c = Call->getArgOperand(2);
3688 IRBuilder<> builder(Call);
3689 if (is_signed) {
3690 unsigned bitwidth = Call->getType()->getScalarSizeInBits();
3691 if (bitwidth < 32) {
3692 // mul = sext(a) * sext(b)
3693 // add = mul + sext(c)
3694 // res = clamp(add, MIN, MAX)
3695 unsigned extended_width = bitwidth << 1;
Romaric Jodin73ef1be2022-01-25 17:21:22 +01003696 if (clspv::Option::HackClampWidth() && extended_width < 32) {
3697 extended_width = 32;
3698 }
alan-baker6b9d1ee2020-11-03 23:11:32 -05003699 Type *extended_ty = IntegerType::get(F.getContext(), extended_width);
3700 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3701 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
3702 }
3703 auto a_sext = builder.CreateSExt(a, extended_ty);
3704 auto b_sext = builder.CreateSExt(b, extended_ty);
3705 auto c_sext = builder.CreateSExt(c, extended_ty);
3706 // Extended the size so no overflows occur.
3707 auto mul = builder.CreateMul(a_sext, b_sext, "", true, true);
3708 auto add = builder.CreateAdd(mul, c_sext, "", true, true);
3709 auto func_ty = FunctionType::get(
3710 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
3711 // Don't use function type because we need signed parameters.
3712 std::string clamp_name = Builtins::GetMangledFunctionName("clamp");
3713 // The clamp values are the signed min and max of the original bitwidth
3714 // sign extended to the extended bitwidth.
3715 Constant *min = ConstantInt::get(
3716 Call->getContext(),
3717 APInt::getSignedMinValue(bitwidth).sext(extended_width));
3718 Constant *max = ConstantInt::get(
3719 Call->getContext(),
3720 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
3721 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3722 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3723 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3724 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
3725 if (extended_width == 32)
3726 clamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
3727 else
3728 clamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
3729 } else {
3730 if (extended_width == 32)
3731 clamp_name += "iii";
3732 else
3733 clamp_name += "sss";
3734 }
3735 auto callee = F.getParent()->getOrInsertFunction(clamp_name, func_ty);
3736 auto clamp = builder.CreateCall(callee, {add, min, max});
3737 return builder.CreateTrunc(clamp, ty);
3738 } else {
alan-baker6b9d1ee2020-11-03 23:11:32 -05003739 // Compute
3740 // {hi, lo} = smul_extended(a, b)
3741 // add = lo + c
Romaric Jodinc507f312022-04-08 19:09:45 +02003742 auto mul_ext = InsertOpMulExtended(Call, a, b, true);
3743
alan-baker6b9d1ee2020-11-03 23:11:32 -05003744 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3745 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3746 auto add = builder.CreateAdd(mul_lo, c);
3747
3748 // Constants for use in the calculation.
3749 Constant *min = ConstantInt::get(Call->getContext(),
3750 APInt::getSignedMinValue(bitwidth));
3751 Constant *max = ConstantInt::get(Call->getContext(),
3752 APInt::getSignedMaxValue(bitwidth));
3753 Constant *max_plus_1 = ConstantInt::get(
3754 Call->getContext(),
3755 APInt::getSignedMaxValue(bitwidth) + APInt(bitwidth, 1));
3756 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3757 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3758 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3759 max_plus_1 =
3760 ConstantVector::getSplat(vec_ty->getElementCount(), max_plus_1);
3761 }
3762
3763 auto a_xor_b = builder.CreateXor(a, b);
3764 auto same_sign =
3765 builder.CreateICmpSGT(a_xor_b, Constant::getAllOnesValue(ty));
3766 auto different_sign = builder.CreateNot(same_sign);
3767 auto hi_eq_0 = builder.CreateICmpEQ(mul_hi, Constant::getNullValue(ty));
3768 auto hi_ne_0 = builder.CreateNot(hi_eq_0);
3769 auto lo_ge_max = builder.CreateICmpUGE(mul_lo, max);
3770 auto c_gt_0 = builder.CreateICmpSGT(c, Constant::getNullValue(ty));
3771 auto c_lt_0 = builder.CreateICmpSLT(c, Constant::getNullValue(ty));
3772 auto add_gt_max = builder.CreateICmpUGT(add, max);
3773 auto hi_eq_m1 =
3774 builder.CreateICmpEQ(mul_hi, Constant::getAllOnesValue(ty));
3775 auto hi_ne_m1 = builder.CreateNot(hi_eq_m1);
3776 auto lo_le_max_plus_1 = builder.CreateICmpULE(mul_lo, max_plus_1);
3777 auto max_sub_lo = builder.CreateSub(max, mul_lo);
3778 auto c_lt_max_sub_lo = builder.CreateICmpULT(c, max_sub_lo);
3779
3780 // Equivalent to:
3781 // if (((x < 0) == (y < 0)) && mul_hi != 0)
3782 // return MAX
3783 // if (mul_hi == 0 && mul_lo >= MAX && (z > 0 || add > MAX))
3784 // return MAX
3785 // if (((x < 0) != (y < 0)) && mul_hi != -1)
3786 // return MIN
3787 // if (hi == -1 && mul_lo <= (MAX + 1) && (z < 0 || z < (MAX - mul_lo))
3788 // return MIN
3789 // return add
3790 auto max_clamp_1 = builder.CreateAnd(same_sign, hi_ne_0);
3791 auto max_clamp_2 = builder.CreateOr(c_gt_0, add_gt_max);
3792 auto tmp = builder.CreateAnd(hi_eq_0, lo_ge_max);
3793 max_clamp_2 = builder.CreateAnd(tmp, max_clamp_2);
3794 auto max_clamp = builder.CreateOr(max_clamp_1, max_clamp_2);
3795 auto min_clamp_1 = builder.CreateAnd(different_sign, hi_ne_m1);
3796 auto min_clamp_2 = builder.CreateOr(c_lt_0, c_lt_max_sub_lo);
3797 tmp = builder.CreateAnd(hi_eq_m1, lo_le_max_plus_1);
3798 min_clamp_2 = builder.CreateAnd(tmp, min_clamp_2);
3799 auto min_clamp = builder.CreateOr(min_clamp_1, min_clamp_2);
3800 auto sel = builder.CreateSelect(min_clamp, min, add);
3801 return builder.CreateSelect(max_clamp, max, sel);
3802 }
3803 } else {
3804 // {lo, hi} = mul_extended(a, b)
3805 // {add, carry} = add_carry(lo, c)
3806 // cmp = (mul_hi | carry) == 0
3807 // mad_sat = cmp ? add : MAX
3808 auto struct_ty = GetPairStruct(ty);
Romaric Jodinc507f312022-04-08 19:09:45 +02003809 auto mul_ext = InsertOpMulExtended(Call, a, b, false);
alan-baker6b9d1ee2020-11-03 23:11:32 -05003810 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3811 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3812 auto add_carry =
3813 InsertSPIRVOp(Call, spv::OpIAddCarry, {Attribute::ReadNone},
3814 struct_ty, {mul_lo, c});
3815 auto add = builder.CreateExtractValue(add_carry, {0});
3816 auto carry = builder.CreateExtractValue(add_carry, {1});
3817 auto or_value = builder.CreateOr(mul_hi, carry);
3818 auto cmp = builder.CreateICmpEQ(or_value, Constant::getNullValue(ty));
3819 return builder.CreateSelect(cmp, add, Constant::getAllOnesValue(ty));
3820 }
3821 });
3822}
alan-baker15106572020-11-06 15:08:10 -05003823
3824bool ReplaceOpenCLBuiltinPass::replaceOrdered(Function &F, bool is_ordered) {
3825 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3826 return false;
3827
3828 if (F.getFunctionType()->getNumParams() != 2)
3829 return false;
3830
3831 if (F.getFunctionType()->getParamType(0) !=
3832 F.getFunctionType()->getParamType(1)) {
3833 return false;
3834 }
3835
3836 switch (F.getFunctionType()->getParamType(0)->getScalarType()->getTypeID()) {
3837 case Type::FloatTyID:
3838 case Type::HalfTyID:
3839 case Type::DoubleTyID:
3840 break;
3841 default:
3842 return false;
3843 }
3844
3845 // Scalar versions all return an int, while vector versions return a vector
3846 // of an equally sized integer types (e.g. short, int or long).
3847 if (isa<VectorType>(F.getReturnType())) {
3848 if (F.getReturnType()->getScalarSizeInBits() !=
3849 F.getFunctionType()->getParamType(0)->getScalarSizeInBits()) {
3850 return false;
3851 }
3852 } else {
3853 if (F.getReturnType()->getScalarSizeInBits() != 32)
3854 return false;
3855 }
3856
3857 return replaceCallsWithValue(F, [is_ordered](CallInst *Call) {
3858 // Replace with a floating point [un]ordered comparison followed by an
3859 // extension.
3860 auto x = Call->getArgOperand(0);
3861 auto y = Call->getArgOperand(1);
3862 IRBuilder<> builder(Call);
3863 Value *tmp = nullptr;
3864 if (is_ordered) {
3865 // This leads to a slight inefficiency in the SPIR-V that is easy for
3866 // drivers to optimize where the SPIR-V for the comparison and the
3867 // extension could be fused to drop the inversion of the OpIsNan.
3868 tmp = builder.CreateFCmpORD(x, y);
3869 } else {
3870 tmp = builder.CreateFCmpUNO(x, y);
3871 }
3872 // OpenCL CTS requires that vector versions use sign extension, but scalar
3873 // versions use zero extension.
3874 if (isa<VectorType>(Call->getType()))
3875 return builder.CreateSExt(tmp, Call->getType());
3876 return builder.CreateZExt(tmp, Call->getType());
3877 });
3878}
alan-baker497920b2020-11-09 16:41:36 -05003879
3880bool ReplaceOpenCLBuiltinPass::replaceIsNormal(Function &F) {
alan-baker67d639b2022-05-09 11:23:31 -04003881 return replaceCallsWithValue(F, [](CallInst *Call) {
alan-baker497920b2020-11-09 16:41:36 -05003882 auto ty = Call->getType();
3883 auto x = Call->getArgOperand(0);
3884 unsigned width = x->getType()->getScalarSizeInBits();
3885 Type *int_ty = IntegerType::get(Call->getContext(), width);
3886 uint64_t abs_mask = 0x7fffffff;
3887 uint64_t exp_mask = 0x7f800000;
3888 uint64_t min_mask = 0x00800000;
3889 if (width == 16) {
3890 abs_mask = 0x7fff;
3891 exp_mask = 0x7c00;
3892 min_mask = 0x0400;
3893 } else if (width == 64) {
3894 abs_mask = 0x7fffffffffffffff;
3895 exp_mask = 0x7ff0000000000000;
3896 min_mask = 0x0010000000000000;
3897 }
3898 Constant *abs_const = ConstantInt::get(int_ty, APInt(width, abs_mask));
3899 Constant *exp_const = ConstantInt::get(int_ty, APInt(width, exp_mask));
3900 Constant *min_const = ConstantInt::get(int_ty, APInt(width, min_mask));
3901 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3902 int_ty = VectorType::get(int_ty, vec_ty->getElementCount());
3903 abs_const =
3904 ConstantVector::getSplat(vec_ty->getElementCount(), abs_const);
3905 exp_const =
3906 ConstantVector::getSplat(vec_ty->getElementCount(), exp_const);
3907 min_const =
3908 ConstantVector::getSplat(vec_ty->getElementCount(), min_const);
3909 }
3910 // Drop the sign bit and then check that the number is between
3911 // (exclusive) the min and max exponent values for the bit width.
3912 IRBuilder<> builder(Call);
3913 auto bitcast = builder.CreateBitCast(x, int_ty);
3914 auto abs = builder.CreateAnd(bitcast, abs_const);
3915 auto lt = builder.CreateICmpULT(abs, exp_const);
3916 auto ge = builder.CreateICmpUGE(abs, min_const);
3917 auto tmp = builder.CreateAnd(lt, ge);
3918 // OpenCL CTS requires that vector versions use sign extension, but scalar
3919 // versions use zero extension.
3920 if (isa<VectorType>(ty))
3921 return builder.CreateSExt(tmp, ty);
3922 return builder.CreateZExt(tmp, ty);
3923 });
3924}
alan-bakere0406e72020-11-10 12:32:04 -05003925
3926bool ReplaceOpenCLBuiltinPass::replaceFDim(Function &F) {
3927 return replaceCallsWithValue(F, [](CallInst *Call) {
3928 const auto x = Call->getArgOperand(0);
3929 const auto y = Call->getArgOperand(1);
3930 IRBuilder<> builder(Call);
3931 auto sub = builder.CreateFSub(x, y);
3932 auto cmp = builder.CreateFCmpUGT(x, y);
3933 return builder.CreateSelect(cmp, sub,
3934 Constant::getNullValue(Call->getType()));
3935 });
3936}
alan-baker3e0de472020-12-08 15:57:17 -05003937
3938bool ReplaceOpenCLBuiltinPass::replaceRound(Function &F) {
3939 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3940 const auto x = Call->getArgOperand(0);
3941 const double c_halfway = 0.5;
3942 auto halfway = ConstantFP::get(Call->getType(), c_halfway);
3943
3944 const auto clspv_fract_name =
3945 Builtins::GetMangledFunctionName("clspv.fract", F.getFunctionType());
3946 Function *clspv_fract_fn = F.getParent()->getFunction(clspv_fract_name);
3947 if (!clspv_fract_fn) {
3948 // Make the clspv_fract function.
3949 clspv_fract_fn = cast<Function>(
3950 F.getParent()
3951 ->getOrInsertFunction(clspv_fract_name, F.getFunctionType())
3952 .getCallee());
3953 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3954 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3955 }
3956
3957 auto ceil = Intrinsic::getDeclaration(F.getParent(), Intrinsic::ceil,
3958 Call->getType());
3959 auto floor = Intrinsic::getDeclaration(F.getParent(), Intrinsic::floor,
3960 Call->getType());
3961 auto fabs = Intrinsic::getDeclaration(F.getParent(), Intrinsic::fabs,
3962 Call->getType());
3963 auto copysign = Intrinsic::getDeclaration(
3964 F.getParent(), Intrinsic::copysign, {Call->getType(), Call->getType()});
3965
3966 IRBuilder<> builder(Call);
3967
3968 auto fabs_call = builder.CreateCall(F.getFunctionType(), fabs, {x});
3969 auto ceil_call = builder.CreateCall(F.getFunctionType(), ceil, {fabs_call});
3970 auto floor_call =
3971 builder.CreateCall(F.getFunctionType(), floor, {fabs_call});
3972 auto fract_call =
3973 builder.CreateCall(F.getFunctionType(), clspv_fract_fn, {fabs_call});
3974 auto cmp = builder.CreateFCmpOGE(fract_call, halfway);
3975 auto sel = builder.CreateSelect(cmp, ceil_call, floor_call);
3976 return builder.CreateCall(copysign->getFunctionType(), copysign, {sel, x});
3977 });
3978}
3979
3980bool ReplaceOpenCLBuiltinPass::replaceTrigPi(Function &F,
3981 Builtins::BuiltinType type) {
3982 return replaceCallsWithValue(F, [&F, type](CallInst *Call) -> Value * {
3983 const auto x = Call->getArgOperand(0);
3984 const double k_pi = 0x1.921fb54442d18p+1;
3985 Constant *pi = ConstantFP::get(x->getType(), k_pi);
3986
3987 IRBuilder<> builder(Call);
3988 auto mul = builder.CreateFMul(x, pi);
3989 switch (type) {
3990 case Builtins::kSinpi: {
3991 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3992 x->getType());
3993 return builder.CreateCall(func->getFunctionType(), func, {mul});
3994 }
3995 case Builtins::kCospi: {
3996 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3997 x->getType());
3998 return builder.CreateCall(func->getFunctionType(), func, {mul});
3999 }
4000 case Builtins::kTanpi: {
4001 auto sin = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
4002 x->getType());
4003 auto sin_call = builder.CreateCall(sin->getFunctionType(), sin, {mul});
4004 auto cos = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
4005 x->getType());
4006 auto cos_call = builder.CreateCall(cos->getFunctionType(), cos, {mul});
4007 return builder.CreateFDiv(sin_call, cos_call);
4008 }
4009 default:
4010 llvm_unreachable("unexpected builtin");
4011 break;
4012 }
4013 return nullptr;
4014 });
4015}
alan-baker8b968112020-12-15 15:53:29 -05004016
4017bool ReplaceOpenCLBuiltinPass::replaceSincos(Function &F) {
4018 return replaceCallsWithValue(F, [&F](CallInst *Call) {
4019 auto sin_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
4020 Call->getType());
4021 auto cos_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
4022 Call->getType());
4023
4024 IRBuilder<> builder(Call);
4025 auto sin = builder.CreateCall(sin_func->getFunctionType(), sin_func,
4026 {Call->getArgOperand(0)});
4027 auto cos = builder.CreateCall(cos_func->getFunctionType(), cos_func,
4028 {Call->getArgOperand(0)});
4029 builder.CreateStore(cos, Call->getArgOperand(1));
4030 return sin;
4031 });
4032}
4033
4034bool ReplaceOpenCLBuiltinPass::replaceExpm1(Function &F) {
4035 return replaceCallsWithValue(F, [&F](CallInst *Call) {
4036 auto exp_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::exp,
4037 Call->getType());
4038
4039 IRBuilder<> builder(Call);
4040 auto exp = builder.CreateCall(exp_func->getFunctionType(), exp_func,
4041 {Call->getArgOperand(0)});
4042 return builder.CreateFSub(exp, ConstantFP::get(Call->getType(), 1.0));
4043 });
4044}
4045
4046bool ReplaceOpenCLBuiltinPass::replacePown(Function &F) {
4047 return replaceCallsWithValue(F, [&F](CallInst *Call) {
4048 auto pow_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::pow,
4049 Call->getType());
4050
4051 IRBuilder<> builder(Call);
4052 auto conv = builder.CreateSIToFP(Call->getArgOperand(1), Call->getType());
4053 return builder.CreateCall(pow_func->getFunctionType(), pow_func,
4054 {Call->getArgOperand(0), conv});
4055 });
4056}