blob: 159b1cdc8c5274b5fb4f71f107329f6d5d73c9fc [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
alan-baker57ce1c22022-04-26 19:10:44 -040039#include "ReplaceOpenCLBuiltinPass.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040040#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
David Neto22f144c2017-06-12 14:26:21 -040046namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000047
David Neto22f144c2017-06-12 14:26:21 -040048uint32_t clz(uint32_t v) {
49 uint32_t r;
50 uint32_t shift;
51
52 r = (v > 0xFFFF) << 4;
53 v >>= r;
54 shift = (v > 0xFF) << 3;
55 v >>= shift;
56 r |= shift;
57 shift = (v > 0xF) << 2;
58 v >>= shift;
59 r |= shift;
60 shift = (v > 0x3) << 1;
61 v >>= shift;
62 r |= shift;
63 r |= (v >> 1);
64
65 return r;
66}
67
Kévin Petitfdfa92e2019-09-25 14:20:58 +010068Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
69 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040070 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040071 IntTy = FixedVectorType::get(IntTy,
72 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010073 }
74 return IntTy;
75}
76
alan-baker4986eff2020-10-29 13:38:00 -040077Value *MemoryOrderSemantics(Value *order, bool is_global,
78 Instruction *InsertBefore,
alan-baker36309f92021-02-05 12:28:03 -050079 spv::MemorySemanticsMask base_semantics,
80 bool include_storage = true) {
alan-baker4986eff2020-10-29 13:38:00 -040081 enum AtomicMemoryOrder : uint32_t {
82 kMemoryOrderRelaxed = 0,
83 kMemoryOrderAcquire = 2,
84 kMemoryOrderRelease = 3,
85 kMemoryOrderAcqRel = 4,
86 kMemoryOrderSeqCst = 5
87 };
88
89 IRBuilder<> builder(InsertBefore);
90
91 // Constants for OpenCL C 2.0 memory_order.
92 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
93 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
94 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
95 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
96
97 // Constants for SPIR-V ordering memory semantics.
98 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
99 const auto AcquireSemantics =
100 builder.getInt32(spv::MemorySemanticsAcquireMask);
101 const auto ReleaseSemantics =
102 builder.getInt32(spv::MemorySemanticsReleaseMask);
103 const auto AcqRelSemantics =
104 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
105
106 // Constants for SPIR-V storage class semantics.
107 const auto UniformSemantics =
108 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
109 const auto WorkgroupSemantics =
110 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
111
112 // Instead of sequentially consistent, use acquire, release or acquire
113 // release semantics.
114 Value *base_order = nullptr;
115 switch (base_semantics) {
116 case spv::MemorySemanticsAcquireMask:
117 base_order = AcquireSemantics;
118 break;
119 case spv::MemorySemanticsReleaseMask:
120 base_order = ReleaseSemantics;
121 break;
122 default:
123 base_order = AcqRelSemantics;
124 break;
125 }
126
127 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
alan-baker36309f92021-02-05 12:28:03 -0500128 if (order == nullptr) {
129 if (include_storage)
130 return builder.CreateOr({storage, base_order});
131 else
132 return base_order;
133 }
alan-baker4986eff2020-10-29 13:38:00 -0400134
135 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
136 auto is_acquire = builder.CreateICmpEQ(order, acquire);
137 auto is_release = builder.CreateICmpEQ(order, release);
138 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
139 auto semantics =
140 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
141 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
142 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
143 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
alan-baker36309f92021-02-05 12:28:03 -0500144 if (include_storage)
145 return builder.CreateOr({storage, semantics});
146 else
147 return semantics;
alan-baker4986eff2020-10-29 13:38:00 -0400148}
149
150Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
151 enum AtomicMemoryScope : uint32_t {
152 kMemoryScopeWorkItem = 0,
153 kMemoryScopeWorkGroup = 1,
154 kMemoryScopeDevice = 2,
155 kMemoryScopeAllSVMDevices = 3, // not supported
156 kMemoryScopeSubGroup = 4
157 };
158
159 IRBuilder<> builder(InsertBefore);
160
161 // Constants for OpenCL C 2.0 memory_scope.
162 const auto work_item =
163 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
164 const auto work_group =
165 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
166 const auto sub_group =
167 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
168 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
169
170 // Constants for SPIR-V memory scopes.
171 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
172 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
173 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
174 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
175
176 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
177 if (scope == nullptr)
178 return base_scope;
179
180 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
181 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
182 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
183 auto is_device = builder.CreateICmpEQ(scope, device);
184
185 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
186 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
187 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
188 scope = builder.CreateSelect(is_device, DeviceScope, scope);
189
190 return scope;
191}
192
SJW2c317da2020-03-23 07:39:13 -0500193bool replaceCallsWithValue(Function &F,
194 std::function<Value *(CallInst *)> Replacer) {
195
196 bool Changed = false;
197
198 SmallVector<Instruction *, 4> ToRemoves;
199
200 // Walk the users of the function.
201 for (auto &U : F.uses()) {
202 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
203
204 auto NewValue = Replacer(CI);
205
206 if (NewValue != nullptr) {
207 CI->replaceAllUsesWith(NewValue);
208
209 // Lastly, remember to remove the user.
210 ToRemoves.push_back(CI);
211 }
212 }
213 }
214
215 Changed = !ToRemoves.empty();
216
217 // And cleanup the calls we don't use anymore.
218 for (auto V : ToRemoves) {
219 V->eraseFromParent();
220 }
221
222 return Changed;
223}
224
Kévin Petit91bc72e2019-04-08 15:17:46 +0100225} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400226
alan-baker57ce1c22022-04-26 19:10:44 -0400227PreservedAnalyses ReplaceOpenCLBuiltinPass::run(Module &M,
228 ModuleAnalysisManager &MPM) {
229 PreservedAnalyses PA;
SJW2c317da2020-03-23 07:39:13 -0500230 std::list<Function *> func_list;
231 for (auto &F : M.getFunctionList()) {
232 // process only function declarations
233 if (F.isDeclaration() && runOnFunction(F)) {
234 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000235 }
236 }
SJW2c317da2020-03-23 07:39:13 -0500237 if (func_list.size() != 0) {
238 // recursively convert functions, but first remove dead
239 for (auto *F : func_list) {
240 if (F->use_empty()) {
241 F->eraseFromParent();
242 }
243 }
alan-baker57ce1c22022-04-26 19:10:44 -0400244 PA = run(M, MPM);
245 return PA;
SJW2c317da2020-03-23 07:39:13 -0500246 }
alan-baker57ce1c22022-04-26 19:10:44 -0400247 return PA;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000248}
249
SJW2c317da2020-03-23 07:39:13 -0500250bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
251 auto &FI = Builtins::Lookup(&F);
252 switch (FI.getType()) {
253 case Builtins::kAbs:
254 if (!FI.getParameter(0).is_signed) {
255 return replaceAbs(F);
256 }
257 break;
258 case Builtins::kAbsDiff:
259 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400260
261 case Builtins::kAddSat:
alan-baker3f1bf492020-11-05 09:07:36 -0500262 return replaceAddSubSat(F, FI.getParameter(0).is_signed, true);
alan-bakera52b7312020-10-26 08:58:51 -0400263
alan-bakercc2bafb2020-11-02 08:30:18 -0500264 case Builtins::kClz:
alan-baker2cecaa72020-11-05 14:05:20 -0500265 return replaceCountZeroes(F, true);
266
267 case Builtins::kCtz:
268 return replaceCountZeroes(F, false);
alan-bakercc2bafb2020-11-02 08:30:18 -0500269
alan-bakerb6da5132020-10-29 15:59:06 -0400270 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000271 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400272 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000273 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400274
SJW2c317da2020-03-23 07:39:13 -0500275 case Builtins::kCopysign:
276 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100277
SJW2c317da2020-03-23 07:39:13 -0500278 case Builtins::kHalfRecip:
279 case Builtins::kNativeRecip:
280 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100281
SJW2c317da2020-03-23 07:39:13 -0500282 case Builtins::kHalfDivide:
283 case Builtins::kNativeDivide:
284 return replaceDivide(F);
285
286 case Builtins::kDot:
287 return replaceDot(F);
288
289 case Builtins::kExp10:
290 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500291 case Builtins::kNativeExp10:
292 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500293
alan-baker8b968112020-12-15 15:53:29 -0500294 case Builtins::kExpm1:
295 return replaceExpm1(F);
296
SJW2c317da2020-03-23 07:39:13 -0500297 case Builtins::kLog10:
298 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500299 case Builtins::kNativeLog10:
300 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500301
gnl21636e7992020-09-09 16:08:16 +0100302 case Builtins::kLog1p:
303 return replaceLog1p(F);
304
alan-bakere0406e72020-11-10 12:32:04 -0500305 case Builtins::kFdim:
306 return replaceFDim(F);
307
SJW2c317da2020-03-23 07:39:13 -0500308 case Builtins::kFmod:
309 return replaceFmod(F);
310
alan-baker8b968112020-12-15 15:53:29 -0500311 case Builtins::kPown:
312 return replacePown(F);
313
alan-baker3e0de472020-12-08 15:57:17 -0500314 case Builtins::kRound:
315 return replaceRound(F);
316
317 case Builtins::kCospi:
318 case Builtins::kSinpi:
319 case Builtins::kTanpi:
320 return replaceTrigPi(F, FI.getType());
321
alan-baker8b968112020-12-15 15:53:29 -0500322 case Builtins::kSincos:
323 return replaceSincos(F);
324
SJW2c317da2020-03-23 07:39:13 -0500325 case Builtins::kBarrier:
326 case Builtins::kWorkGroupBarrier:
327 return replaceBarrier(F);
328
alan-baker12d2c182020-07-20 08:22:42 -0400329 case Builtins::kSubGroupBarrier:
330 return replaceBarrier(F, true);
331
alan-baker36309f92021-02-05 12:28:03 -0500332 case Builtins::kAtomicWorkItemFence:
333 return replaceMemFence(F, spv::MemorySemanticsMaskNone);
SJW2c317da2020-03-23 07:39:13 -0500334 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400335 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500336 case Builtins::kReadMemFence:
337 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
338 case Builtins::kWriteMemFence:
339 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
340
341 // Relational
342 case Builtins::kIsequal:
alan-baker3e217772020-11-07 17:29:40 -0500343 return replaceRelational(F, CmpInst::FCMP_OEQ);
SJW2c317da2020-03-23 07:39:13 -0500344 case Builtins::kIsgreater:
alan-baker3e217772020-11-07 17:29:40 -0500345 return replaceRelational(F, CmpInst::FCMP_OGT);
SJW2c317da2020-03-23 07:39:13 -0500346 case Builtins::kIsgreaterequal:
alan-baker3e217772020-11-07 17:29:40 -0500347 return replaceRelational(F, CmpInst::FCMP_OGE);
SJW2c317da2020-03-23 07:39:13 -0500348 case Builtins::kIsless:
alan-baker3e217772020-11-07 17:29:40 -0500349 return replaceRelational(F, CmpInst::FCMP_OLT);
SJW2c317da2020-03-23 07:39:13 -0500350 case Builtins::kIslessequal:
alan-baker3e217772020-11-07 17:29:40 -0500351 return replaceRelational(F, CmpInst::FCMP_OLE);
SJW2c317da2020-03-23 07:39:13 -0500352 case Builtins::kIsnotequal:
alan-baker3e217772020-11-07 17:29:40 -0500353 return replaceRelational(F, CmpInst::FCMP_UNE);
354 case Builtins::kIslessgreater:
355 return replaceRelational(F, CmpInst::FCMP_ONE);
SJW2c317da2020-03-23 07:39:13 -0500356
alan-baker15106572020-11-06 15:08:10 -0500357 case Builtins::kIsordered:
358 return replaceOrdered(F, true);
359
360 case Builtins::kIsunordered:
361 return replaceOrdered(F, false);
362
SJW2c317da2020-03-23 07:39:13 -0500363 case Builtins::kIsinf: {
364 bool is_vec = FI.getParameter(0).vector_size != 0;
365 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
366 }
367 case Builtins::kIsnan: {
368 bool is_vec = FI.getParameter(0).vector_size != 0;
369 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
370 }
371
372 case Builtins::kIsfinite:
373 return replaceIsFinite(F);
374
375 case Builtins::kAll: {
376 bool is_vec = FI.getParameter(0).vector_size != 0;
377 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
378 }
379 case Builtins::kAny: {
380 bool is_vec = FI.getParameter(0).vector_size != 0;
381 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
382 }
383
alan-baker497920b2020-11-09 16:41:36 -0500384 case Builtins::kIsnormal:
385 return replaceIsNormal(F);
386
SJW2c317da2020-03-23 07:39:13 -0500387 case Builtins::kUpsample:
388 return replaceUpsample(F);
389
390 case Builtins::kRotate:
391 return replaceRotate(F);
392
393 case Builtins::kConvert:
394 return replaceConvert(F, FI.getParameter(0).is_signed,
395 FI.getReturnType().is_signed);
396
alan-baker4986eff2020-10-29 13:38:00 -0400397 // OpenCL 2.0 explicit atomics have different default scopes and semantics
398 // than legacy atomic functions.
399 case Builtins::kAtomicLoad:
400 case Builtins::kAtomicLoadExplicit:
401 return replaceAtomicLoad(F);
402 case Builtins::kAtomicStore:
403 case Builtins::kAtomicStoreExplicit:
404 return replaceExplicitAtomics(F, spv::OpAtomicStore,
405 spv::MemorySemanticsReleaseMask);
406 case Builtins::kAtomicExchange:
407 case Builtins::kAtomicExchangeExplicit:
408 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
409 case Builtins::kAtomicFetchAdd:
410 case Builtins::kAtomicFetchAddExplicit:
411 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
412 case Builtins::kAtomicFetchSub:
413 case Builtins::kAtomicFetchSubExplicit:
414 return replaceExplicitAtomics(F, spv::OpAtomicISub);
415 case Builtins::kAtomicFetchOr:
416 case Builtins::kAtomicFetchOrExplicit:
417 return replaceExplicitAtomics(F, spv::OpAtomicOr);
418 case Builtins::kAtomicFetchXor:
419 case Builtins::kAtomicFetchXorExplicit:
420 return replaceExplicitAtomics(F, spv::OpAtomicXor);
421 case Builtins::kAtomicFetchAnd:
422 case Builtins::kAtomicFetchAndExplicit:
423 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
424 case Builtins::kAtomicFetchMin:
425 case Builtins::kAtomicFetchMinExplicit:
426 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
427 ? spv::OpAtomicSMin
428 : spv::OpAtomicUMin);
429 case Builtins::kAtomicFetchMax:
430 case Builtins::kAtomicFetchMaxExplicit:
431 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
432 ? spv::OpAtomicSMax
433 : spv::OpAtomicUMax);
434 // Weak compare exchange is generated as strong compare exchange.
435 case Builtins::kAtomicCompareExchangeWeak:
436 case Builtins::kAtomicCompareExchangeWeakExplicit:
437 case Builtins::kAtomicCompareExchangeStrong:
438 case Builtins::kAtomicCompareExchangeStrongExplicit:
439 return replaceAtomicCompareExchange(F);
440
441 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500442 case Builtins::kAtomicInc:
443 return replaceAtomics(F, spv::OpAtomicIIncrement);
444 case Builtins::kAtomicDec:
445 return replaceAtomics(F, spv::OpAtomicIDecrement);
446 case Builtins::kAtomicCmpxchg:
447 return replaceAtomics(F, spv::OpAtomicCompareExchange);
448 case Builtins::kAtomicAdd:
449 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
450 case Builtins::kAtomicSub:
451 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
452 case Builtins::kAtomicXchg:
453 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
454 case Builtins::kAtomicMin:
455 return replaceAtomics(F, FI.getParameter(0).is_signed
456 ? llvm::AtomicRMWInst::Min
457 : llvm::AtomicRMWInst::UMin);
458 case Builtins::kAtomicMax:
459 return replaceAtomics(F, FI.getParameter(0).is_signed
460 ? llvm::AtomicRMWInst::Max
461 : llvm::AtomicRMWInst::UMax);
462 case Builtins::kAtomicAnd:
463 return replaceAtomics(F, llvm::AtomicRMWInst::And);
464 case Builtins::kAtomicOr:
465 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
466 case Builtins::kAtomicXor:
467 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
468
469 case Builtins::kCross:
470 if (FI.getParameter(0).vector_size == 4) {
471 return replaceCross(F);
472 }
473 break;
474
475 case Builtins::kFract:
476 if (FI.getParameterCount()) {
477 return replaceFract(F, FI.getParameter(0).vector_size);
478 }
479 break;
480
481 case Builtins::kMadHi:
482 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
483 case Builtins::kMulHi:
484 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
485
alan-baker6b9d1ee2020-11-03 23:11:32 -0500486 case Builtins::kMadSat:
487 return replaceMadSat(F, FI.getParameter(0).is_signed);
488
SJW2c317da2020-03-23 07:39:13 -0500489 case Builtins::kMad:
490 case Builtins::kMad24:
491 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
492 true);
493 case Builtins::kMul24:
494 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
495 false);
496
497 case Builtins::kSelect:
498 return replaceSelect(F);
499
500 case Builtins::kBitselect:
501 return replaceBitSelect(F);
502
503 case Builtins::kVload:
504 return replaceVload(F);
505
506 case Builtins::kVloadaHalf:
Romaric Jodin71fdb322022-05-03 17:01:10 +0200507 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size,
508 true);
SJW2c317da2020-03-23 07:39:13 -0500509 case Builtins::kVloadHalf:
Romaric Jodin71fdb322022-05-03 17:01:10 +0200510 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size,
511 false);
SJW2c317da2020-03-23 07:39:13 -0500512
513 case Builtins::kVstore:
514 return replaceVstore(F);
515
SJW2c317da2020-03-23 07:39:13 -0500516 case Builtins::kVstoreaHalf:
Romaric Jodin71fdb322022-05-03 17:01:10 +0200517 return replaceVstoreHalf(F, FI.getParameter(0).vector_size, true);
518 case Builtins::kVstoreHalf:
519 return replaceVstoreHalf(F, FI.getParameter(0).vector_size, false);
SJW2c317da2020-03-23 07:39:13 -0500520
521 case Builtins::kSmoothstep: {
522 int vec_size = FI.getLastParameter().vector_size;
523 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500524 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500525 }
526 break;
527 }
528 case Builtins::kStep: {
529 int vec_size = FI.getLastParameter().vector_size;
530 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500531 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500532 }
533 break;
534 }
535
536 case Builtins::kSignbit:
537 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
538
alan-baker3f1bf492020-11-05 09:07:36 -0500539 case Builtins::kSubSat:
540 return replaceAddSubSat(F, FI.getParameter(0).is_signed, false);
541
SJW2c317da2020-03-23 07:39:13 -0500542 case Builtins::kReadImageh:
543 return replaceHalfReadImage(F);
544 case Builtins::kReadImagef:
545 case Builtins::kReadImagei:
546 case Builtins::kReadImageui: {
547 if (FI.getParameter(1).isSampler() &&
548 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
549 return replaceSampledReadImageWithIntCoords(F);
550 }
551 break;
552 }
553
554 case Builtins::kWriteImageh:
555 return replaceHalfWriteImage(F);
556
Kévin Petit1cb45112020-04-27 18:55:48 +0100557 case Builtins::kPrefetch:
558 return replacePrefetch(F);
559
rjodinchr791203f2021-10-07 20:42:41 +0200560 // Asynchronous copies
561 case Builtins::kAsyncWorkGroupCopy:
562 return replaceAsyncWorkGroupCopy(F);
563 case Builtins::kAsyncWorkGroupStridedCopy:
564 return replaceAsyncWorkGroupStridedCopy(F);
565 case Builtins::kWaitGroupEvents:
566 return replaceWaitGroupEvents(F);
567
SJW2c317da2020-03-23 07:39:13 -0500568 default:
569 break;
570 }
571
572 return false;
573}
574
alan-baker6b9d1ee2020-11-03 23:11:32 -0500575Type *ReplaceOpenCLBuiltinPass::GetPairStruct(Type *type) {
576 auto iter = PairStructMap.find(type);
577 if (iter != PairStructMap.end())
578 return iter->second;
579
580 auto new_struct = StructType::get(type->getContext(), {type, type});
581 PairStructMap[type] = new_struct;
582 return new_struct;
583}
584
Romaric Jodinc507f312022-04-08 19:09:45 +0200585Value *ReplaceOpenCLBuiltinPass::InsertOpMulExtended(Instruction *InsertPoint,
586 Value *a, Value *b,
alan-baker57ce1c22022-04-26 19:10:44 -0400587 bool IsSigned,
588 bool Int64) {
Romaric Jodinc507f312022-04-08 19:09:45 +0200589
590 Type *Ty = a->getType();
591 Type *RetTy = GetPairStruct(a->getType());
592 assert(Ty == b->getType());
593
594 if (!Option::HackMulExtended()) {
595 spv::Op opcode = IsSigned ? spv::OpSMulExtended : spv::OpUMulExtended;
596
597 return clspv::InsertSPIRVOp(InsertPoint, opcode, {Attribute::ReadNone},
598 RetTy, {a, b});
599 }
600
601 unsigned int ScalarSizeInBits = Ty->getScalarSizeInBits();
602 bool IsVector = Ty->isVectorTy();
603
604 IRBuilder<> Builder(InsertPoint);
605
606 if (ScalarSizeInBits < 32 || (ScalarSizeInBits == 32 && Int64)) {
607 /*
608 * {mul_lo, mul_hi} = OpMulExtended(a, b, IsSigned) {
609 * S = SizeInBits(a)
610 * a_ext = ext2S(a, IsSigned)
611 * b_ext = ext2S(b, IsSigned)
612 * mul = a_ext * b_ext
613 * mul_lo = truncS(mul)
614 * mul_hi = truncS(mul >> S)
615 * return {mul_lo, mul_hi}
616 * }
617 */
618 Type *TyTimes2 =
619 Ty->getIntNTy(InsertPoint->getContext(), ScalarSizeInBits * 2);
620 if (IsVector) {
621 TyTimes2 = VectorType::get(TyTimes2, dyn_cast<VectorType>(Ty));
622 }
623 Value *aExtended, *bExtended;
624 if (IsSigned) {
625 aExtended = Builder.CreateSExt(a, TyTimes2);
626 bExtended = Builder.CreateSExt(b, TyTimes2);
627 } else {
628 aExtended = Builder.CreateZExt(a, TyTimes2);
629 bExtended = Builder.CreateZExt(b, TyTimes2);
630 }
631 auto mul = Builder.CreateMul(aExtended, bExtended);
632 auto mul_lo = Builder.CreateTrunc(mul, Ty);
633 auto mul_hi =
634 Builder.CreateTrunc(Builder.CreateLShr(mul, ScalarSizeInBits), Ty);
635
636 return Builder.CreateInsertValue(
637 Builder.CreateInsertValue(UndefValue::get(RetTy), mul_lo, {0}), mul_hi,
638 {1});
639 } else if (ScalarSizeInBits == 64 || (ScalarSizeInBits == 32 && !Int64)) {
640 /*
641 * {mul_lo, mul_hi} = OpMulExtended(a, b, IsSigned) {
642 * S = SizeInBits(a)
643 * hS = S / 2
644 * if (IsSigned) {
645 * res_neg = (a > 0) ^ (b > 0) = (a ^ b) < 0
646 * a = abs(a)
647 * b = abs(b)
648 * }
649 * a0 = trunchS(a)
650 * a1 = trunchS(a >> hS)
651 * b0 = trunchS(b)
652 * b1 = trunchS(b >> hS)
653 * {a0b0_0, a0b0_1} = zextS(OpUMulExtended(a0, b0))
654 * {a1b0_0, a1b0_1} = zextS(OpUMulExtended(a1, b0))
655 * {a0b1_0, a0b1_1} = zextS(OpUMulExtended(a0, b1))
656 * {a1b1_0, a1b1_1} = zextS(OpUMulExtended(a1, b1))
657 *
658 * mul_lo_hi = a0b0_1 + a1b0_0 + a0b1_0
659 * carry_mul_lo_hi = mul_lo_hi >> hS
660 * mul_hi_lo = a1b1_0 + a1b0_1 + a0b1_1 + carry_mul_lo_hi
661 * mul_lo = a0b0_0 + mul_lo_hi << hS
662 * mul_hi = mul_hi_lo + a1b1_1 << hS
663 *
664 * if (IsSigned) {
665 * mul_lo_xor = mul_lo ^ -1
666 * {mul_lo_inv, carry} = OpIAddCarry(mul_lo_xor, 1)
667 * mul_hi_inv = mul_hi ^ -1 + carry
668 * mul_lo = res_neg ? mul_lo_inv : mul_lo
669 * mul_hi = res_neg ? mul_hi_inv : mul_hi
670 * }
671 * return {mul_lo, mul_hi}
672 * }
673 */
674 Type *TyDiv2 =
675 Ty->getIntNTy(InsertPoint->getContext(), ScalarSizeInBits / 2);
676 if (IsVector) {
677 TyDiv2 = VectorType::get(TyDiv2, dyn_cast<VectorType>(Ty));
678 }
679
680 Value *res_neg;
681 if (IsSigned) {
682 // We want to work with unsigned value.
683 // Convert everything to unsigned and remember the signed of the end
684 // result.
685 auto a_b_xor = Builder.CreateXor(a, b);
686 res_neg = Builder.CreateICmpSLT(a_b_xor, ConstantInt::get(Ty, 0, true));
687
688 auto F = InsertPoint->getFunction();
689 auto abs = Intrinsic::getDeclaration(F->getParent(), Intrinsic::abs, Ty);
690 a = Builder.CreateCall(abs, {a, Builder.getInt1(false)});
691 b = Builder.CreateCall(abs, {b, Builder.getInt1(false)});
692 }
693
694 auto a0 = Builder.CreateTrunc(a, TyDiv2);
695 auto a1 = Builder.CreateTrunc(Builder.CreateLShr(a, ScalarSizeInBits / 2),
696 TyDiv2);
697 auto b0 = Builder.CreateTrunc(b, TyDiv2);
698 auto b1 = Builder.CreateTrunc(Builder.CreateLShr(b, ScalarSizeInBits / 2),
699 TyDiv2);
700
701 auto a0b0 = InsertOpMulExtended(InsertPoint, a0, b0, false, true);
702 auto a1b0 = InsertOpMulExtended(InsertPoint, a1, b0, false, true);
703 auto a0b1 = InsertOpMulExtended(InsertPoint, a0, b1, false, true);
704 auto a1b1 = InsertOpMulExtended(InsertPoint, a1, b1, false, true);
705 auto a0b0_0 = Builder.CreateZExt(Builder.CreateExtractValue(a0b0, {0}), Ty);
706 auto a0b0_1 = Builder.CreateZExt(Builder.CreateExtractValue(a0b0, {1}), Ty);
707 auto a1b0_0 = Builder.CreateZExt(Builder.CreateExtractValue(a1b0, {0}), Ty);
708 auto a1b0_1 = Builder.CreateZExt(Builder.CreateExtractValue(a1b0, {1}), Ty);
709 auto a0b1_0 = Builder.CreateZExt(Builder.CreateExtractValue(a0b1, {0}), Ty);
710 auto a0b1_1 = Builder.CreateZExt(Builder.CreateExtractValue(a0b1, {1}), Ty);
711 auto a1b1_0 = Builder.CreateZExt(Builder.CreateExtractValue(a1b1, {0}), Ty);
712 auto a1b1_1 = Builder.CreateZExt(Builder.CreateExtractValue(a1b1, {1}), Ty);
713
714 auto mul_lo_hi =
715 Builder.CreateAdd(Builder.CreateAdd(a0b0_1, a1b0_0), a0b1_0);
716 auto carry_mul_lo_hi = Builder.CreateLShr(mul_lo_hi, ScalarSizeInBits / 2);
717 auto mul_hi_lo = Builder.CreateAdd(
718 Builder.CreateAdd(Builder.CreateAdd(a1b1_0, a1b0_1), a0b1_1),
719 carry_mul_lo_hi);
720 auto mul_lo = Builder.CreateAdd(
721 a0b0_0, Builder.CreateShl(mul_lo_hi, ScalarSizeInBits / 2));
722 auto mul_hi = Builder.CreateAdd(
723 mul_hi_lo, Builder.CreateShl(a1b1_1, ScalarSizeInBits / 2));
724
725 if (IsSigned) {
726 // Apply the sign that we got from the previous if statement setting
727 // res_neg.
728 auto mul_lo_xor =
729 Builder.CreateXor(mul_lo, Constant::getAllOnesValue(Ty));
730 auto mul_lo_xor_add =
731 InsertSPIRVOp(InsertPoint, spv::OpIAddCarry, {Attribute::ReadNone},
732 RetTy, {mul_lo_xor, ConstantInt::get(Ty, 1)});
733 auto mul_lo_inv = Builder.CreateExtractValue(mul_lo_xor_add, {0});
734 auto carry = Builder.CreateExtractValue(mul_lo_xor_add, {1});
735 auto mul_hi_inv = Builder.CreateAdd(
736 carry, Builder.CreateXor(mul_hi, Constant::getAllOnesValue(Ty)));
737 mul_lo = Builder.CreateSelect(res_neg, mul_lo_inv, mul_lo);
738 mul_hi = Builder.CreateSelect(res_neg, mul_hi_inv, mul_hi);
739 }
740
741 return Builder.CreateInsertValue(
742 Builder.CreateInsertValue(UndefValue::get(RetTy), mul_lo, {0}), mul_hi,
743 {1});
744 } else {
745 llvm_unreachable("Unexpected type for InsertOpMulExtended");
746 }
747}
748
rjodinchr791203f2021-10-07 20:42:41 +0200749bool ReplaceOpenCLBuiltinPass::replaceWaitGroupEvents(Function &F) {
750 /* Simple implementation for wait_group_events to avoid dealing with the event
751 * list:
752 *
753 * void wait_group_events(int num_events, event_t *event_list) {
754 * barrier(CLK_LOCAL_MEM_FENCE);
755 * }
756 *
757 */
758
759 enum {
760 CLK_LOCAL_MEM_FENCE = 0x01,
761 CLK_GLOBAL_MEM_FENCE = 0x02,
762 CLK_IMAGE_MEM_FENCE = 0x04
763 };
764
765 return replaceCallsWithValue(F, [](CallInst *CI) {
766 IRBuilder<> Builder(CI);
767
768 const auto ConstantScopeWorkgroup = Builder.getInt32(spv::ScopeWorkgroup);
769 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
770 Instruction::Shl, Builder.getInt32(CLK_LOCAL_MEM_FENCE),
771 Builder.getInt32(clz(spv::MemorySemanticsWorkgroupMemoryMask) -
772 clz(CLK_LOCAL_MEM_FENCE)),
773 "", CI);
774 auto MemorySemantics = BinaryOperator::Create(
775 Instruction::Or, MemorySemanticsWorkgroup,
776 ConstantInt::get(Builder.getInt32Ty(),
777 spv::MemorySemanticsAcquireReleaseMask),
778 "", CI);
779
780 return clspv::InsertSPIRVOp(
781 CI, spv::OpControlBarrier,
782 {Attribute::NoDuplicate, Attribute::Convergent}, Builder.getVoidTy(),
783 {ConstantScopeWorkgroup, ConstantScopeWorkgroup, MemorySemantics});
784 });
785}
786
787GlobalVariable *ReplaceOpenCLBuiltinPass::getOrCreateGlobalVariable(
788 Module &M, std::string VariableName,
789 AddressSpace::Type VariableAddressSpace) {
790 GlobalVariable *GV = M.getGlobalVariable(VariableName);
791 if (GV == nullptr) {
792 IntegerType *IT = IntegerType::get(M.getContext(), 32);
793 VectorType *VT = FixedVectorType::get(IT, 3);
794
795 GV = new GlobalVariable(M, VT, false, GlobalValue::ExternalLinkage, nullptr,
796 VariableName, nullptr,
797 GlobalValue::ThreadLocalMode::NotThreadLocal,
798 VariableAddressSpace);
799 GV->setInitializer(Constant::getNullValue(VT));
800 }
801 return GV;
802}
803
804Value *ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopies(
805 Module &M, CallInst *CI, Value *Dst, Value *Src, Value *NumGentypes,
806 Value *Stride, Value *Event) {
807 /*
808 * event_t *async_work_group_strided_copy(T *dst, T *src, size_t num_gentypes,
809 * size_t stride, event_t event) {
810 * size_t start_id = ((get_local_id(2) * get_local_size(1))
811 * + get_local_id(1)) * get_local_size(0)
812 * + get_local_id(0);
813 * size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
814 * for (size_t it = start_id; it < num_gentypes; it += incr) {
815 * dst[it] = src[it * stride];
816 * }
817 * return event;
818 * }
819 */
820
821 /* BB:
822 * before
823 * async_work_group_strided_copy
824 * after
825 *
826 * ================================
827 *
828 * BB:
829 * before
830 * start_id = f(get_local_ids, get_local_sizes)
831 * incr = g(get_local_sizes)
832 * br CmpBB
833 *
834 * CmpBB:
835 * it = PHI(start_id, it)
836 * cmp = it < NumGentypes
837 * condBr cmp, LoopBB, ExitBB
838 *
839 * LoopBB:
840 * dstI = dst[it]
841 * srcI = src[it * stride]
842 * OpCopyMemory dstI, srcI
843 * it += incr
844 * br CmpBB
845 *
846 * ExitBB:
847 * after
848 */
849
850 IRBuilder<> Builder(CI);
851
852 auto Cst0 = Builder.getInt32(0);
853 auto Cst1 = Builder.getInt32(1);
854 auto Cst2 = Builder.getInt32(2);
855
856 // get_local_id({0, 1, 2});
857 GlobalVariable *GVId =
858 getOrCreateGlobalVariable(M, clspv::LocalInvocationIdVariableName(),
859 clspv::LocalInvocationIdAddressSpace());
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +0100860 Type *GVIdElTy = GVId->getType()->getScalarType()->getPointerElementType();
861 Value *GEP0 = Builder.CreateGEP(GVIdElTy, GVId, {Cst0, Cst0});
862 Value *LocalId0 =
863 Builder.CreateLoad(GEP0->getType()->getPointerElementType(), GEP0);
864 Value *GEP1 = Builder.CreateGEP(GVIdElTy, GVId, {Cst0, Cst1});
865 Value *LocalId1 =
866 Builder.CreateLoad(GEP1->getType()->getPointerElementType(), GEP1);
867 Value *GEP2 = Builder.CreateGEP(GVIdElTy, GVId, {Cst0, Cst2});
868 Value *LocalId2 =
869 Builder.CreateLoad(GEP2->getType()->getPointerElementType(), GEP2);
rjodinchr791203f2021-10-07 20:42:41 +0200870
871 // get_local_size({0, 1, 2});
872 GlobalVariable *GVSize =
873 getOrCreateGlobalVariable(M, clspv::WorkgroupSizeVariableName(),
874 clspv::WorkgroupSizeAddressSpace());
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +0100875 auto LocalSize =
876 Builder.CreateLoad(GVSize->getType()->getPointerElementType(), GVSize);
rjodinchr791203f2021-10-07 20:42:41 +0200877 auto LocalSize0 = Builder.CreateExtractElement(LocalSize, Cst0);
878 auto LocalSize1 = Builder.CreateExtractElement(LocalSize, Cst1);
879 auto LocalSize2 = Builder.CreateExtractElement(LocalSize, Cst2);
880
881 // size_t start_id = ((get_local_id(2) * get_local_size(1))
882 // + get_local_id(1)) * get_local_size(0)
883 // + get_local_id(0);
884 auto tmp0 = Builder.CreateMul(LocalId2, LocalSize1);
885 auto tmp1 = Builder.CreateAdd(tmp0, LocalId1);
886 auto tmp2 = Builder.CreateMul(tmp1, LocalSize0);
887 auto StartId = Builder.CreateAdd(tmp2, LocalId0);
888
889 // size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
890 auto tmp3 = Builder.CreateMul(LocalSize0, LocalSize1);
891 auto Incr = Builder.CreateMul(tmp3, LocalSize2);
892
893 // Create BasicBlocks
894 auto BB = CI->getParent();
895 auto CmpBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
896 auto LoopBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
897 auto ExitBB = SplitBlock(BB, CI);
898
899 // BB
900 auto BrCmpBB = BranchInst::Create(CmpBB);
901 ReplaceInstWithInst(BB->getTerminator(), BrCmpBB);
902
903 // CmpBB
904 Builder.SetInsertPoint(CmpBB);
905 auto PHIIterator = Builder.CreatePHI(Builder.getInt32Ty(), 2);
906 auto Cmp = Builder.CreateCmp(CmpInst::ICMP_ULT, PHIIterator, NumGentypes);
907 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
908
909 // LoopBB
910 Builder.SetInsertPoint(LoopBB);
911
912 // default values for non-strided copies
913 Value *SrcIterator = PHIIterator;
914 Value *DstIterator = PHIIterator;
915 if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
916 clspv::AddressSpace::Global)) {
917 // async_work_group_strided_copy local to global case
918 DstIterator = Builder.CreateMul(PHIIterator, Stride);
919 } else if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
920 clspv::AddressSpace::Local)) {
921 // async_work_group_strided_copy global to local case
922 SrcIterator = Builder.CreateMul(PHIIterator, Stride);
923 }
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +0100924 auto DstI = Builder.CreateGEP(
925 Dst->getType()->getScalarType()->getPointerElementType(), Dst,
926 DstIterator);
927 auto SrcI = Builder.CreateGEP(
928 Src->getType()->getScalarType()->getPointerElementType(), Src,
929 SrcIterator);
rjodinchr791203f2021-10-07 20:42:41 +0200930 auto NewIterator = Builder.CreateAdd(PHIIterator, Incr);
931 auto Br = Builder.CreateBr(CmpBB);
932 clspv::InsertSPIRVOp(Br, spv::OpCopyMemory, {}, Builder.getVoidTy(),
933 {DstI, SrcI});
934
935 // Set PHIIterator for CmpBB now that we have NewIterator
936 PHIIterator->addIncoming(StartId, BB);
937 PHIIterator->addIncoming(NewIterator, LoopBB);
938
939 return Event;
940}
941
942bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopy(Function &F) {
943 return replaceCallsWithValue(F, [&F, this](CallInst *CI) {
944 Module &M = *F.getParent();
945
946 auto Dst = CI->getOperand(0);
947 auto Src = CI->getOperand(1);
948 auto NumGentypes = CI->getOperand(2);
949 auto Event = CI->getOperand(3);
950
951 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, NumGentypes, nullptr,
952 Event);
953 });
954}
955
956bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupStridedCopy(Function &F) {
957 return replaceCallsWithValue(F, [&F, this](CallInst *CI) {
958 Module &M = *F.getParent();
959
960 auto Dst = CI->getOperand(0);
961 auto Src = CI->getOperand(1);
962 auto NumGentypes = CI->getOperand(2);
963 auto Stride = CI->getOperand(3);
964 auto Event = CI->getOperand(4);
965
966 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, NumGentypes, Stride,
967 Event);
968 });
969}
970
SJW2c317da2020-03-23 07:39:13 -0500971bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
972 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400973 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100974}
975
SJW2c317da2020-03-23 07:39:13 -0500976bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
977 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100978 auto XValue = CI->getOperand(0);
979 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100980
Kévin Petite8edce32019-04-10 14:23:32 +0100981 IRBuilder<> Builder(CI);
982 auto XmY = Builder.CreateSub(XValue, YValue);
983 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100984
SJW2c317da2020-03-23 07:39:13 -0500985 Value *Cmp = nullptr;
986 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100987 Cmp = Builder.CreateICmpSGT(YValue, XValue);
988 } else {
989 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100990 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100991
Kévin Petite8edce32019-04-10 14:23:32 +0100992 return Builder.CreateSelect(Cmp, YmX, XmY);
993 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100994}
995
SJW2c317da2020-03-23 07:39:13 -0500996bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
alan-baker5f2e88e2020-12-07 15:24:04 -0500997 return replaceCallsWithValue(F, [&F](CallInst *Call) {
998 const auto x = Call->getArgOperand(0);
999 const auto y = Call->getArgOperand(1);
1000 auto intrinsic = Intrinsic::getDeclaration(
1001 F.getParent(), Intrinsic::copysign, Call->getType());
1002 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {x, y}, "",
1003 Call);
Kévin Petite8edce32019-04-10 14:23:32 +01001004 });
Kévin Petit8c1be282019-04-02 19:34:25 +01001005}
1006
SJW2c317da2020-03-23 07:39:13 -05001007bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
1008 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001009 // Recip has one arg.
1010 auto Arg = CI->getOperand(0);
1011 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
1012 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
1013 });
David Neto22f144c2017-06-12 14:26:21 -04001014}
1015
SJW2c317da2020-03-23 07:39:13 -05001016bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
1017 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001018 auto Op0 = CI->getOperand(0);
1019 auto Op1 = CI->getOperand(1);
1020 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
1021 });
David Neto22f144c2017-06-12 14:26:21 -04001022}
1023
SJW2c317da2020-03-23 07:39:13 -05001024bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
1025 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +01001026 auto Op0 = CI->getOperand(0);
1027 auto Op1 = CI->getOperand(1);
1028
SJW2c317da2020-03-23 07:39:13 -05001029 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +01001030 if (Op0->getType()->isVectorTy()) {
1031 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
1032 CI->getType(), {Op0, Op1});
1033 } else {
1034 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
1035 }
1036
1037 return V;
1038 });
1039}
1040
SJW2c317da2020-03-23 07:39:13 -05001041bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -05001042 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001043 // convert to natural
1044 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001045 std::string NewFName = basename.substr(0, slen);
1046 NewFName =
1047 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001048
SJW2c317da2020-03-23 07:39:13 -05001049 Module &M = *F.getParent();
1050 return replaceCallsWithValue(F, [&](CallInst *CI) {
1051 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1052
1053 auto Arg = CI->getOperand(0);
1054
1055 // Constant of the natural log of 10 (ln(10)).
1056 const double Ln10 =
1057 2.302585092994045684017991454684364207601101488628772976033;
1058
1059 auto Mul = BinaryOperator::Create(
1060 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
1061
1062 return CallInst::Create(NewF, Mul, "", CI);
1063 });
David Neto22f144c2017-06-12 14:26:21 -04001064}
1065
SJW2c317da2020-03-23 07:39:13 -05001066bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +01001067 // OpenCL fmod(x,y) is x - y * trunc(x/y)
1068 // The sign for a non-zero result is taken from x.
1069 // (Try an example.)
1070 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -05001071 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +01001072 auto Op0 = CI->getOperand(0);
1073 auto Op1 = CI->getOperand(1);
1074 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
1075 });
1076}
1077
SJW2c317da2020-03-23 07:39:13 -05001078bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -05001079 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001080 // convert to natural
1081 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001082 std::string NewFName = basename.substr(0, slen);
1083 NewFName =
1084 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001085
SJW2c317da2020-03-23 07:39:13 -05001086 Module &M = *F.getParent();
1087 return replaceCallsWithValue(F, [&](CallInst *CI) {
1088 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1089
1090 auto Arg = CI->getOperand(0);
1091
1092 // Constant of the reciprocal of the natural log of 10 (ln(10)).
1093 const double Ln10 =
1094 0.434294481903251827651128918916605082294397005803666566114;
1095
1096 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
1097
1098 return BinaryOperator::Create(Instruction::FMul,
1099 ConstantFP::get(Arg->getType(), Ln10), NewCI,
1100 "", CI);
1101 });
David Neto22f144c2017-06-12 14:26:21 -04001102}
1103
gnl21636e7992020-09-09 16:08:16 +01001104bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
1105 // convert to natural
alan-baker8b968112020-12-15 15:53:29 -05001106 return replaceCallsWithValue(F, [&F](CallInst *CI) {
gnl21636e7992020-09-09 16:08:16 +01001107 auto Arg = CI->getOperand(0);
1108
1109 auto ArgP1 = BinaryOperator::Create(
1110 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
1111
alan-baker8b968112020-12-15 15:53:29 -05001112 auto log =
1113 Intrinsic::getDeclaration(F.getParent(), Intrinsic::log, CI->getType());
1114 return CallInst::Create(log, ArgP1, "", CI);
gnl21636e7992020-09-09 16:08:16 +01001115 });
1116}
1117
alan-baker12d2c182020-07-20 08:22:42 -04001118bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -04001119
alan-bakerf6bc8252020-09-23 14:58:55 -04001120 enum {
1121 CLK_LOCAL_MEM_FENCE = 0x01,
1122 CLK_GLOBAL_MEM_FENCE = 0x02,
1123 CLK_IMAGE_MEM_FENCE = 0x04
1124 };
David Neto22f144c2017-06-12 14:26:21 -04001125
alan-baker12d2c182020-07-20 08:22:42 -04001126 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +01001127 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001128
Kévin Petitc4643922019-06-17 19:32:05 +01001129 // We need to map the OpenCL constants to the SPIR-V equivalents.
1130 const auto LocalMemFence =
1131 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1132 const auto GlobalMemFence =
1133 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001134 const auto ImageMemFence =
1135 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -04001136 const auto ConstantAcquireRelease = ConstantInt::get(
1137 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +01001138 const auto ConstantScopeDevice =
1139 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
1140 const auto ConstantScopeWorkgroup =
1141 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -04001142 const auto ConstantScopeSubgroup =
1143 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -04001144
Kévin Petitc4643922019-06-17 19:32:05 +01001145 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1146 const auto LocalMemFenceMask =
1147 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1148 const auto WorkgroupShiftAmount =
1149 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1150 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1151 Instruction::Shl, LocalMemFenceMask,
1152 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001153
Kévin Petitc4643922019-06-17 19:32:05 +01001154 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1155 const auto GlobalMemFenceMask =
1156 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1157 const auto UniformShiftAmount =
1158 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1159 const auto MemorySemanticsUniform = BinaryOperator::Create(
1160 Instruction::Shl, GlobalMemFenceMask,
1161 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001162
alan-bakerf6bc8252020-09-23 14:58:55 -04001163 // OpenCL 2.0
1164 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1165 const auto ImageMemFenceMask =
1166 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1167 const auto ImageShiftAmount =
1168 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1169 const auto MemorySemanticsImage = BinaryOperator::Create(
1170 Instruction::Shl, ImageMemFenceMask,
1171 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1172
Kévin Petitc4643922019-06-17 19:32:05 +01001173 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -04001174 // MemorySemanticsSequentiallyConsistentMask.
1175 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +01001176 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -04001177 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -04001178 auto MemorySemantics2 = BinaryOperator::Create(
1179 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
1180 auto MemorySemantics = BinaryOperator::Create(
1181 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001182
alan-baker12d2c182020-07-20 08:22:42 -04001183 // If the memory scope is not specified explicitly, it is either Subgroup
1184 // or Workgroup depending on the type of barrier.
1185 Value *MemoryScope =
1186 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
1187 if (CI->data_operands_size() > 1) {
1188 enum {
1189 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
1190 CL_MEMORY_SCOPE_DEVICE = 0x2,
1191 CL_MEMORY_SCOPE_SUBGROUP = 0x4
1192 };
1193 // The call was given an explicit memory scope.
1194 const auto MemoryScopeSubgroup =
1195 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
1196 const auto MemoryScopeDevice =
1197 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -04001198
alan-baker12d2c182020-07-20 08:22:42 -04001199 auto Cmp =
1200 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1201 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
1202 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
1203 ConstantScopeWorkgroup, "", CI);
1204 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1205 MemoryScopeDevice, CI->getOperand(1), "", CI);
1206 MemoryScope =
1207 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
1208 }
1209
1210 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
1211 // the type of barrier;
1212 const auto ExecutionScope =
1213 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -04001214
Kévin Petitc4643922019-06-17 19:32:05 +01001215 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -04001216 {Attribute::NoDuplicate, Attribute::Convergent},
1217 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +01001218 {ExecutionScope, MemoryScope, MemorySemantics});
1219 });
David Neto22f144c2017-06-12 14:26:21 -04001220}
1221
alan-baker36309f92021-02-05 12:28:03 -05001222bool ReplaceOpenCLBuiltinPass::replaceMemFence(
1223 Function &F, spv::MemorySemanticsMask semantics) {
David Neto22f144c2017-06-12 14:26:21 -04001224
SJW2c317da2020-03-23 07:39:13 -05001225 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -04001226 enum {
1227 CLK_LOCAL_MEM_FENCE = 0x01,
1228 CLK_GLOBAL_MEM_FENCE = 0x02,
1229 CLK_IMAGE_MEM_FENCE = 0x04,
1230 };
David Neto22f144c2017-06-12 14:26:21 -04001231
SJW2c317da2020-03-23 07:39:13 -05001232 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001233
SJW2c317da2020-03-23 07:39:13 -05001234 // We need to map the OpenCL constants to the SPIR-V equivalents.
1235 const auto LocalMemFence =
1236 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1237 const auto GlobalMemFence =
1238 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001239 const auto ImageMemFence =
1240 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -05001241 const auto ConstantMemorySemantics =
1242 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -04001243 const auto ConstantScopeWorkgroup =
1244 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -04001245
SJW2c317da2020-03-23 07:39:13 -05001246 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1247 const auto LocalMemFenceMask =
1248 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1249 const auto WorkgroupShiftAmount =
1250 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1251 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1252 Instruction::Shl, LocalMemFenceMask,
1253 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001254
SJW2c317da2020-03-23 07:39:13 -05001255 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1256 const auto GlobalMemFenceMask =
1257 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1258 const auto UniformShiftAmount =
1259 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1260 const auto MemorySemanticsUniform = BinaryOperator::Create(
1261 Instruction::Shl, GlobalMemFenceMask,
1262 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001263
alan-bakerf6bc8252020-09-23 14:58:55 -04001264 // OpenCL 2.0
1265 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1266 const auto ImageMemFenceMask =
1267 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1268 const auto ImageShiftAmount =
1269 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1270 const auto MemorySemanticsImage = BinaryOperator::Create(
1271 Instruction::Shl, ImageMemFenceMask,
1272 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1273
alan-baker36309f92021-02-05 12:28:03 -05001274 Value *MemOrder = ConstantMemorySemantics;
1275 Value *MemScope = ConstantScopeWorkgroup;
1276 IRBuilder<> builder(CI);
alan-baker5641f5c2021-10-15 09:16:04 -04001277 if (CI->arg_size() > 1) {
alan-baker36309f92021-02-05 12:28:03 -05001278 MemOrder = MemoryOrderSemantics(CI->getArgOperand(1), false, CI,
1279 semantics, false);
1280 MemScope = MemoryScope(CI->getArgOperand(2), false, CI);
1281 }
1282 // Join the storage semantics and the order semantics.
alan-bakerf6bc8252020-09-23 14:58:55 -04001283 auto MemorySemantics1 =
alan-baker36309f92021-02-05 12:28:03 -05001284 builder.CreateOr({MemorySemanticsWorkgroup, MemorySemanticsUniform});
1285 auto MemorySemantics2 = builder.CreateOr({MemorySemanticsImage, MemOrder});
1286 auto MemorySemantics =
1287 builder.CreateOr({MemorySemantics1, MemorySemantics2});
David Neto22f144c2017-06-12 14:26:21 -04001288
alan-baker3d905692020-10-28 14:02:37 -04001289 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
1290 {Attribute::Convergent}, CI->getType(),
alan-baker36309f92021-02-05 12:28:03 -05001291 {MemScope, MemorySemantics});
SJW2c317da2020-03-23 07:39:13 -05001292 });
David Neto22f144c2017-06-12 14:26:21 -04001293}
1294
Kévin Petit1cb45112020-04-27 18:55:48 +01001295bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
1296 bool Changed = false;
1297
1298 SmallVector<Instruction *, 4> ToRemoves;
1299
1300 // Find all calls to the function
1301 for (auto &U : F.uses()) {
1302 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1303 ToRemoves.push_back(CI);
1304 }
1305 }
1306
1307 Changed = !ToRemoves.empty();
1308
1309 // Delete them
1310 for (auto V : ToRemoves) {
1311 V->eraseFromParent();
1312 }
1313
1314 return Changed;
1315}
1316
SJW2c317da2020-03-23 07:39:13 -05001317bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
alan-baker3e217772020-11-07 17:29:40 -05001318 CmpInst::Predicate P) {
SJW2c317da2020-03-23 07:39:13 -05001319 return replaceCallsWithValue(F, [&](CallInst *CI) {
1320 // The predicate to use in the CmpInst.
1321 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -04001322
SJW2c317da2020-03-23 07:39:13 -05001323 auto Arg1 = CI->getOperand(0);
1324 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001325
SJW2c317da2020-03-23 07:39:13 -05001326 const auto Cmp =
1327 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
alan-baker3e217772020-11-07 17:29:40 -05001328 if (isa<VectorType>(F.getReturnType()))
1329 return CastInst::Create(Instruction::SExt, Cmp, CI->getType(), "", CI);
1330 return CastInst::Create(Instruction::ZExt, Cmp, CI->getType(), "", CI);
SJW2c317da2020-03-23 07:39:13 -05001331 });
David Neto22f144c2017-06-12 14:26:21 -04001332}
1333
SJW2c317da2020-03-23 07:39:13 -05001334bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
1335 spv::Op SPIRVOp,
1336 int32_t C) {
1337 Module &M = *F.getParent();
1338 return replaceCallsWithValue(F, [&](CallInst *CI) {
1339 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -04001340
SJW2c317da2020-03-23 07:39:13 -05001341 // The value to return for true.
1342 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -04001343
SJW2c317da2020-03-23 07:39:13 -05001344 // The value to return for false.
1345 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001346
SJW2c317da2020-03-23 07:39:13 -05001347 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001348 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001349 CorrespondingBoolTy =
1350 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1351 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001352 }
David Neto22f144c2017-06-12 14:26:21 -04001353
SJW2c317da2020-03-23 07:39:13 -05001354 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1355 CorrespondingBoolTy, {CI->getOperand(0)});
1356
1357 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1358 });
David Neto22f144c2017-06-12 14:26:21 -04001359}
1360
SJW2c317da2020-03-23 07:39:13 -05001361bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1362 Module &M = *F.getParent();
1363 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001364 auto &C = M.getContext();
1365 auto Val = CI->getOperand(0);
1366 auto ValTy = Val->getType();
1367 auto RetTy = CI->getType();
1368
1369 // Get a suitable integer type to represent the number
1370 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1371
1372 // Create Mask
1373 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001374 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001375 switch (ScalarSize) {
1376 case 16:
1377 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1378 break;
1379 case 32:
1380 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1381 break;
1382 case 64:
1383 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1384 break;
1385 default:
1386 llvm_unreachable("Unsupported floating-point type");
1387 }
1388
1389 IRBuilder<> Builder(CI);
1390
1391 // Bitcast to int
1392 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1393
1394 // Mask and compare
1395 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1396 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1397
1398 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001399 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001400 if (ValTy->isVectorTy()) {
1401 RetTrue = ConstantInt::getSigned(RetTy, -1);
1402 } else {
1403 RetTrue = ConstantInt::get(RetTy, 1);
1404 }
1405 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1406 });
1407}
1408
SJW2c317da2020-03-23 07:39:13 -05001409bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1410 Module &M = *F.getParent();
1411 return replaceCallsWithValue(F, [&](CallInst *CI) {
1412 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001413
SJW2c317da2020-03-23 07:39:13 -05001414 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001415
SJW2c317da2020-03-23 07:39:13 -05001416 // If the argument is a 32-bit int, just use a shift
1417 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1418 V = BinaryOperator::Create(Instruction::LShr, Arg,
1419 ConstantInt::get(Arg->getType(), 31), "", CI);
1420 } else {
1421 // The value for zero to compare against.
1422 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001423
SJW2c317da2020-03-23 07:39:13 -05001424 // The value to return for true.
1425 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001426
SJW2c317da2020-03-23 07:39:13 -05001427 // The value to return for false.
1428 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001429
SJW2c317da2020-03-23 07:39:13 -05001430 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1431 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001432
SJW2c317da2020-03-23 07:39:13 -05001433 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001434
SJW2c317da2020-03-23 07:39:13 -05001435 // If we have a function to call, call it!
1436 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001437
SJW2c317da2020-03-23 07:39:13 -05001438 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001439
SJW2c317da2020-03-23 07:39:13 -05001440 const auto NewCI = clspv::InsertSPIRVOp(
1441 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1442 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001443
SJW2c317da2020-03-23 07:39:13 -05001444 } else {
1445 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001446 }
1447
SJW2c317da2020-03-23 07:39:13 -05001448 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001449 }
SJW2c317da2020-03-23 07:39:13 -05001450 return V;
1451 });
David Neto22f144c2017-06-12 14:26:21 -04001452}
1453
SJW2c317da2020-03-23 07:39:13 -05001454bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1455 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1456 // Get arguments
1457 auto HiValue = CI->getOperand(0);
1458 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001459
SJW2c317da2020-03-23 07:39:13 -05001460 // Don't touch overloads that aren't in OpenCL C
1461 auto HiType = HiValue->getType();
1462 auto LoType = LoValue->getType();
1463
1464 if (HiType != LoType) {
1465 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001466 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001467
SJW2c317da2020-03-23 07:39:13 -05001468 if (!HiType->isIntOrIntVectorTy()) {
1469 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001470 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001471
SJW2c317da2020-03-23 07:39:13 -05001472 if (HiType->getScalarSizeInBits() * 2 !=
1473 CI->getType()->getScalarSizeInBits()) {
1474 return nullptr;
1475 }
1476
1477 if ((HiType->getScalarSizeInBits() != 8) &&
1478 (HiType->getScalarSizeInBits() != 16) &&
1479 (HiType->getScalarSizeInBits() != 32)) {
1480 return nullptr;
1481 }
1482
James Pricecf53df42020-04-20 14:41:24 -04001483 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001484 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001485 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1486 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001487 return nullptr;
1488 }
1489 }
1490
1491 // Convert both operands to the result type
1492 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1493 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1494
1495 // Shift high operand
1496 auto ShiftAmount =
1497 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1498 auto HiShifted =
1499 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1500
1501 // OR both results
1502 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1503 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001504}
1505
SJW2c317da2020-03-23 07:39:13 -05001506bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1507 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1508 // Get arguments
1509 auto SrcValue = CI->getOperand(0);
1510 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001511
SJW2c317da2020-03-23 07:39:13 -05001512 // Don't touch overloads that aren't in OpenCL C
1513 auto SrcType = SrcValue->getType();
1514 auto RotType = RotAmount->getType();
1515
1516 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1517 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001518 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001519
SJW2c317da2020-03-23 07:39:13 -05001520 if (!SrcType->isIntOrIntVectorTy()) {
1521 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001522 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001523
SJW2c317da2020-03-23 07:39:13 -05001524 if ((SrcType->getScalarSizeInBits() != 8) &&
1525 (SrcType->getScalarSizeInBits() != 16) &&
1526 (SrcType->getScalarSizeInBits() != 32) &&
1527 (SrcType->getScalarSizeInBits() != 64)) {
1528 return nullptr;
1529 }
1530
James Pricecf53df42020-04-20 14:41:24 -04001531 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001532 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001533 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1534 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001535 return nullptr;
1536 }
1537 }
1538
alan-bakerfd22ae12020-10-29 15:59:22 -04001539 // Replace with LLVM's funnel shift left intrinsic because it is more
1540 // generic than rotate.
1541 Function *intrinsic =
1542 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1543 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1544 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001545 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001546}
1547
SJW2c317da2020-03-23 07:39:13 -05001548bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1549 bool DstIsSigned) {
1550 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1551 Value *V = nullptr;
1552 // Get arguments
1553 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001554
SJW2c317da2020-03-23 07:39:13 -05001555 // Don't touch overloads that aren't in OpenCL C
1556 auto SrcType = SrcValue->getType();
1557 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001558
SJW2c317da2020-03-23 07:39:13 -05001559 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1560 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1561 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001562 }
1563
James Pricecf53df42020-04-20 14:41:24 -04001564 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001565 unsigned SrcNumElements =
1566 SrcVecType->getElementCount().getKnownMinValue();
1567 unsigned DstNumElements =
1568 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001569 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001570 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001571 }
1572
James Pricecf53df42020-04-20 14:41:24 -04001573 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1574 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1575 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001576 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001577 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001578 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001579
SJW2c317da2020-03-23 07:39:13 -05001580 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1581 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1582
1583 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1584 bool DstIsInt = DstType->isIntOrIntVectorTy();
1585
1586 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1587 // Unnecessary cast operation.
1588 V = SrcValue;
1589 } else if (SrcIsFloat && DstIsFloat) {
1590 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1591 } else if (SrcIsFloat && DstIsInt) {
1592 if (DstIsSigned) {
1593 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1594 } else {
1595 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1596 }
1597 } else if (SrcIsInt && DstIsFloat) {
1598 if (SrcIsSigned) {
1599 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1600 } else {
1601 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1602 }
1603 } else if (SrcIsInt && DstIsInt) {
1604 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1605 } else {
1606 // Not something we're supposed to handle, just move on
1607 }
1608
1609 return V;
1610 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001611}
1612
SJW2c317da2020-03-23 07:39:13 -05001613bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1614 bool is_mad) {
1615 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1616 Value *V = nullptr;
1617 // Get arguments
1618 auto AValue = CI->getOperand(0);
1619 auto BValue = CI->getOperand(1);
1620 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001621
SJW2c317da2020-03-23 07:39:13 -05001622 // Don't touch overloads that aren't in OpenCL C
1623 auto AType = AValue->getType();
1624 auto BType = BValue->getType();
1625 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001626
SJW2c317da2020-03-23 07:39:13 -05001627 if ((AType != BType) || (CI->getType() != AType) ||
1628 (is_mad && (AType != CType))) {
1629 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001630 }
1631
SJW2c317da2020-03-23 07:39:13 -05001632 if (!AType->isIntOrIntVectorTy()) {
1633 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001634 }
Kévin Petit8a560882019-03-21 15:24:34 +00001635
SJW2c317da2020-03-23 07:39:13 -05001636 if ((AType->getScalarSizeInBits() != 8) &&
1637 (AType->getScalarSizeInBits() != 16) &&
1638 (AType->getScalarSizeInBits() != 32) &&
1639 (AType->getScalarSizeInBits() != 64)) {
1640 return V;
1641 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001642
James Pricecf53df42020-04-20 14:41:24 -04001643 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001644 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001645 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1646 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001647 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001648 }
1649 }
1650
Romaric Jodinc507f312022-04-08 19:09:45 +02001651 auto Call = InsertOpMulExtended(CI, AValue, BValue, is_signed);
SJW2c317da2020-03-23 07:39:13 -05001652
1653 // Get the high part of the result
1654 unsigned Idxs[] = {1};
1655 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1656
1657 // If we're handling a mad_hi, add the third argument to the result
1658 if (is_mad) {
1659 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001660 }
1661
SJW2c317da2020-03-23 07:39:13 -05001662 return V;
1663 });
Kévin Petit8a560882019-03-21 15:24:34 +00001664}
1665
SJW2c317da2020-03-23 07:39:13 -05001666bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1667 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1668 // Get arguments
1669 auto FalseValue = CI->getOperand(0);
1670 auto TrueValue = CI->getOperand(1);
1671 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001672
SJW2c317da2020-03-23 07:39:13 -05001673 // Don't touch overloads that aren't in OpenCL C
1674 auto FalseType = FalseValue->getType();
1675 auto TrueType = TrueValue->getType();
1676 auto PredicateType = PredicateValue->getType();
1677
1678 if (FalseType != TrueType) {
1679 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001680 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001681
SJW2c317da2020-03-23 07:39:13 -05001682 if (!PredicateType->isIntOrIntVectorTy()) {
1683 return nullptr;
1684 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001685
SJW2c317da2020-03-23 07:39:13 -05001686 if (!FalseType->isIntOrIntVectorTy() &&
1687 !FalseType->getScalarType()->isFloatingPointTy()) {
1688 return nullptr;
1689 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001690
SJW2c317da2020-03-23 07:39:13 -05001691 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1692 return nullptr;
1693 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001694
SJW2c317da2020-03-23 07:39:13 -05001695 if (FalseType->getScalarSizeInBits() !=
1696 PredicateType->getScalarSizeInBits()) {
1697 return nullptr;
1698 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001699
James Pricecf53df42020-04-20 14:41:24 -04001700 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001701 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1702 if (NumElements != cast<VectorType>(PredicateType)
1703 ->getElementCount()
1704 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001705 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001706 }
1707
James Pricecf53df42020-04-20 14:41:24 -04001708 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1709 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001710 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001711 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001712 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001713
SJW2c317da2020-03-23 07:39:13 -05001714 // Create constant
1715 const auto ZeroValue = Constant::getNullValue(PredicateType);
1716
1717 // Scalar and vector are to be treated differently
1718 CmpInst::Predicate Pred;
1719 if (PredicateType->isVectorTy()) {
1720 Pred = CmpInst::ICMP_SLT;
1721 } else {
1722 Pred = CmpInst::ICMP_NE;
1723 }
1724
1725 // Create comparison instruction
1726 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1727 ZeroValue, "", CI);
1728
1729 // Create select
1730 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1731 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001732}
1733
SJW2c317da2020-03-23 07:39:13 -05001734bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1735 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1736 Value *V = nullptr;
1737 if (CI->getNumOperands() != 4) {
1738 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001739 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001740
SJW2c317da2020-03-23 07:39:13 -05001741 // Get arguments
1742 auto FalseValue = CI->getOperand(0);
1743 auto TrueValue = CI->getOperand(1);
1744 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001745
SJW2c317da2020-03-23 07:39:13 -05001746 // Don't touch overloads that aren't in OpenCL C
1747 auto FalseType = FalseValue->getType();
1748 auto TrueType = TrueValue->getType();
1749 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001750
SJW2c317da2020-03-23 07:39:13 -05001751 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1752 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001753 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001754
James Pricecf53df42020-04-20 14:41:24 -04001755 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001756 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1757 !TrueType->getScalarType()->isIntegerTy()) {
1758 return V;
1759 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001760 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001761 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1762 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001763 return V;
1764 }
1765 }
1766
1767 // Remember the type of the operands
1768 auto OpType = TrueType;
1769
1770 // The actual bit selection will always be done on an integer type,
1771 // declare it here
1772 Type *BitType;
1773
1774 // If the operands are float, then bitcast them to int
1775 if (OpType->getScalarType()->isFloatingPointTy()) {
1776
1777 // First create the new type
1778 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1779
1780 // Then bitcast all operands
1781 PredicateValue =
1782 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1783 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1784 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1785
1786 } else {
1787 // The operands have an integer type, use it directly
1788 BitType = OpType;
1789 }
1790
1791 // All the operands are now always integers
1792 // implement as (c & b) | (~c & a)
1793
1794 // Create our negated predicate value
1795 auto AllOnes = Constant::getAllOnesValue(BitType);
1796 auto NotPredicateValue = BinaryOperator::Create(
1797 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1798
1799 // Then put everything together
1800 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1801 FalseValue, "", CI);
1802 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1803 TrueValue, "", CI);
1804
1805 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1806
1807 // If we were dealing with a floating point type, we must bitcast
1808 // the result back to that
1809 if (OpType->getScalarType()->isFloatingPointTy()) {
1810 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1811 }
1812
1813 return V;
1814 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001815}
1816
SJW61531372020-06-09 07:31:08 -05001817bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001818 // convert to vector versions
1819 Module &M = *F.getParent();
1820 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1821 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1822 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001823
SJW2c317da2020-03-23 07:39:13 -05001824 // First figure out which function we're dealing with
1825 if (is_smooth) {
1826 ArgsToSplat.push_back(CI->getOperand(1));
1827 VectorArg = CI->getOperand(2);
1828 } else {
1829 VectorArg = CI->getOperand(1);
1830 }
1831
1832 // Splat arguments that need to be
1833 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001834 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001835
1836 for (auto arg : ArgsToSplat) {
1837 Value *NewVectorArg = UndefValue::get(VecType);
Marco Antognini7e338402021-03-15 12:48:37 +00001838 for (size_t i = 0; i < VecType->getElementCount().getKnownMinValue();
1839 i++) {
SJW2c317da2020-03-23 07:39:13 -05001840 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1841 NewVectorArg =
1842 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1843 }
1844 SplatArgs.push_back(NewVectorArg);
1845 }
1846
1847 // Replace the call with the vector/vector flavour
1848 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1849 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1850
SJW61531372020-06-09 07:31:08 -05001851 std::string NewFName = Builtins::GetMangledFunctionName(
1852 is_smooth ? "smoothstep" : "step", NewFType);
1853
SJW2c317da2020-03-23 07:39:13 -05001854 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1855
1856 SmallVector<Value *, 3> NewArgs;
1857 for (auto arg : SplatArgs) {
1858 NewArgs.push_back(arg);
1859 }
1860 NewArgs.push_back(VectorArg);
1861
1862 return CallInst::Create(NewF, NewArgs, "", CI);
1863 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001864}
1865
SJW2c317da2020-03-23 07:39:13 -05001866bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001867 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1868 auto Arg = CI->getOperand(0);
1869 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001870
SJW2c317da2020-03-23 07:39:13 -05001871 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001872
SJW2c317da2020-03-23 07:39:13 -05001873 return BinaryOperator::Create(Op, Bitcast,
1874 ConstantInt::get(CI->getType(), 31), "", CI);
1875 });
David Neto22f144c2017-06-12 14:26:21 -04001876}
1877
SJW2c317da2020-03-23 07:39:13 -05001878bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1879 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001880 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1881 // The multiply instruction to use.
1882 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001883
SJW2c317da2020-03-23 07:39:13 -05001884 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001885
SJW2c317da2020-03-23 07:39:13 -05001886 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1887 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001888
SJW2c317da2020-03-23 07:39:13 -05001889 if (is_mad) {
1890 // The add instruction to use.
1891 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001892
SJW2c317da2020-03-23 07:39:13 -05001893 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001894 }
David Neto22f144c2017-06-12 14:26:21 -04001895
SJW2c317da2020-03-23 07:39:13 -05001896 return V;
1897 });
David Neto22f144c2017-06-12 14:26:21 -04001898}
1899
SJW2c317da2020-03-23 07:39:13 -05001900bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001901 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1902 Value *V = nullptr;
1903 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001904
SJW2c317da2020-03-23 07:39:13 -05001905 auto data_type = data->getType();
1906 if (!data_type->isVectorTy())
1907 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001908
James Pricecf53df42020-04-20 14:41:24 -04001909 auto vec_data_type = cast<VectorType>(data_type);
1910
alan-baker5a8c3be2020-09-09 13:44:26 -04001911 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001912 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1913 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001914
SJW2c317da2020-03-23 07:39:13 -05001915 auto offset = CI->getOperand(1);
1916 auto ptr = CI->getOperand(2);
1917 auto ptr_type = ptr->getType();
1918 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001919 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001920 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001921
SJW2c317da2020-03-23 07:39:13 -05001922 // Avoid pointer casts. Instead generate the correct number of stores
1923 // and rely on drivers to coalesce appropriately.
1924 IRBuilder<> builder(CI);
1925 auto elems_const = builder.getInt32(elems);
1926 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001927 for (size_t i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001928 auto idx = builder.getInt32(i);
1929 auto add = builder.CreateAdd(adjust, idx);
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01001930 auto gep = builder.CreateGEP(
1931 ptr->getType()->getScalarType()->getPointerElementType(), ptr, add);
SJW2c317da2020-03-23 07:39:13 -05001932 auto extract = builder.CreateExtractElement(data, i);
1933 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001934 }
SJW2c317da2020-03-23 07:39:13 -05001935 return V;
1936 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001937}
1938
SJW2c317da2020-03-23 07:39:13 -05001939bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001940 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1941 Value *V = nullptr;
1942 auto ret_type = F.getReturnType();
1943 if (!ret_type->isVectorTy())
1944 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001945
James Pricecf53df42020-04-20 14:41:24 -04001946 auto vec_ret_type = cast<VectorType>(ret_type);
1947
alan-baker5a8c3be2020-09-09 13:44:26 -04001948 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001949 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1950 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001951
SJW2c317da2020-03-23 07:39:13 -05001952 auto offset = CI->getOperand(0);
1953 auto ptr = CI->getOperand(1);
1954 auto ptr_type = ptr->getType();
1955 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001956 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001957 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001958
SJW2c317da2020-03-23 07:39:13 -05001959 // Avoid pointer casts. Instead generate the correct number of loads
1960 // and rely on drivers to coalesce appropriately.
1961 IRBuilder<> builder(CI);
1962 auto elems_const = builder.getInt32(elems);
1963 V = UndefValue::get(ret_type);
1964 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001965 for (unsigned i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001966 auto idx = builder.getInt32(i);
1967 auto add = builder.CreateAdd(adjust, idx);
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01001968 auto gep = builder.CreateGEP(
1969 ptr_type->getScalarType()->getPointerElementType(), ptr, add);
1970 auto load =
1971 builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
SJW2c317da2020-03-23 07:39:13 -05001972 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001973 }
SJW2c317da2020-03-23 07:39:13 -05001974 return V;
1975 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001976}
1977
SJW2c317da2020-03-23 07:39:13 -05001978bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1979 const std::string &name,
Romaric Jodin71fdb322022-05-03 17:01:10 +02001980 int vec_size, bool aligned) {
SJW2c317da2020-03-23 07:39:13 -05001981 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1982 if (!vec_size) {
Romaric Jodin71fdb322022-05-03 17:01:10 +02001983 // deduce vec_size from last characters of name (e.g. vload_half4)
1984 std::string half = "half";
1985 vec_size = std::atoi(
1986 name.substr(name.find(half) + half.size(), std::string::npos).c_str());
David Neto22f144c2017-06-12 14:26:21 -04001987 }
SJW2c317da2020-03-23 07:39:13 -05001988 switch (vec_size) {
1989 case 2:
1990 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02001991 case 3:
1992 if (!is_clspv_version) {
1993 return aligned ? replaceVloadaHalf3(F) : replaceVloadHalf3(F);
1994 }
1995 break;
SJW2c317da2020-03-23 07:39:13 -05001996 case 4:
1997 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02001998 case 8:
1999 if (!is_clspv_version) {
2000 return replaceVloadHalf8(F);
2001 }
2002 break;
2003 case 16:
2004 if (!is_clspv_version) {
2005 return replaceVloadHalf16(F);
2006 }
2007 break;
SJW2c317da2020-03-23 07:39:13 -05002008 case 0:
2009 if (!is_clspv_version) {
2010 return replaceVloadHalf(F);
2011 }
SJW2c317da2020-03-23 07:39:13 -05002012 break;
2013 }
Romaric Jodin71fdb322022-05-03 17:01:10 +02002014 llvm_unreachable("Unsupported vload_half vector size");
David Neto22f144c2017-06-12 14:26:21 -04002015}
2016
SJW2c317da2020-03-23 07:39:13 -05002017bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
2018 Module &M = *F.getParent();
2019 return replaceCallsWithValue(F, [&](CallInst *CI) {
2020 // The index argument from vload_half.
2021 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002022
SJW2c317da2020-03-23 07:39:13 -05002023 // The pointer argument from vload_half.
2024 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002025
SJW2c317da2020-03-23 07:39:13 -05002026 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002027 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05002028 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2029
2030 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002031 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05002032
2033 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2034
2035 Value *V = nullptr;
2036
alan-baker7efcaaa2020-05-06 19:33:27 -04002037 bool supports_16bit_storage = true;
2038 switch (Arg1->getType()->getPointerAddressSpace()) {
2039 case clspv::AddressSpace::Global:
2040 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2041 clspv::Option::StorageClass::kSSBO);
2042 break;
2043 case clspv::AddressSpace::Constant:
2044 if (clspv::Option::ConstantArgsInUniformBuffer())
2045 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2046 clspv::Option::StorageClass::kUBO);
2047 else
2048 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2049 clspv::Option::StorageClass::kSSBO);
2050 break;
2051 default:
2052 // Clspv will emit the Float16 capability if the half type is
2053 // encountered. That capability covers private and local addressspaces.
2054 break;
2055 }
2056
2057 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05002058 auto ShortTy = Type::getInt16Ty(M.getContext());
2059 auto ShortPointerTy =
2060 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
2061
2062 // Cast the half* pointer to short*.
2063 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
2064
2065 // Index into the correct address of the casted pointer.
2066 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2067
2068 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002069 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002070
2071 // ZExt the short -> int.
2072 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2073
2074 // Get our float2.
2075 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2076
2077 // Extract out the bottom element which is our float result.
2078 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
2079 } else {
2080 // Assume the pointer argument points to storage aligned to 32bits
2081 // or more.
2082 // TODO(dneto): Do more analysis to make sure this is true?
2083 //
2084 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2085 // with:
2086 //
2087 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2088 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2089 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2090 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2091 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2092 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2093 // x float> %converted, %index_is_odd32
2094
2095 auto IntPointerTy =
2096 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
2097
2098 // Cast the base pointer to int*.
2099 // In a valid call (according to assumptions), this should get
2100 // optimized away in the simplify GEP pass.
2101 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2102
2103 auto One = ConstantInt::get(IntTy, 1);
2104 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2105 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2106
2107 // Index into the correct address of the casted pointer.
2108 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2109
2110 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002111 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002112
2113 // Get our float2.
2114 auto Call = CallInst::Create(NewF, Load, "", CI);
2115
2116 // Extract out the float result, where the element number is
2117 // determined by whether the original index was even or odd.
2118 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2119 }
2120 return V;
2121 });
2122}
2123
2124bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
2125 Module &M = *F.getParent();
2126 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002127 // The index argument from vload_half.
2128 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002129
Kévin Petite8edce32019-04-10 14:23:32 +01002130 // The pointer argument from vload_half.
2131 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002132
Kévin Petite8edce32019-04-10 14:23:32 +01002133 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002134 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002135 auto NewPointerTy =
2136 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002137 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002138
Kévin Petite8edce32019-04-10 14:23:32 +01002139 // Cast the half* pointer to int*.
2140 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002141
Kévin Petite8edce32019-04-10 14:23:32 +01002142 // Index into the correct address of the casted pointer.
2143 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002144
Kévin Petite8edce32019-04-10 14:23:32 +01002145 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002146 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002147
Kévin Petite8edce32019-04-10 14:23:32 +01002148 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002149 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002150
Kévin Petite8edce32019-04-10 14:23:32 +01002151 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002152
Kévin Petite8edce32019-04-10 14:23:32 +01002153 // Get our float2.
2154 return CallInst::Create(NewF, Load, "", CI);
2155 });
David Neto22f144c2017-06-12 14:26:21 -04002156}
2157
Romaric Jodin71fdb322022-05-03 17:01:10 +02002158bool ReplaceOpenCLBuiltinPass::replaceVloadHalf3(Function &F) {
2159 Module &M = *F.getParent();
2160 return replaceCallsWithValue(F, [&](CallInst *CI) {
2161 // The index argument from vload_half.
2162 auto Arg0 = CI->getOperand(0);
2163
2164 // The pointer argument from vload_half.
2165 auto Arg1 = CI->getOperand(1);
2166
2167 auto IntTy = Type::getInt32Ty(M.getContext());
2168 auto ShortTy = Type::getInt16Ty(M.getContext());
2169 auto FloatTy = Type::getFloatTy(M.getContext());
2170 auto Float2Ty = FixedVectorType::get(FloatTy, 2);
2171 auto Float3Ty = FixedVectorType::get(FloatTy, 3);
2172 auto NewPointerTy =
2173 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
2174 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2175
2176 auto Int0 = ConstantInt::get(IntTy, 0);
2177 auto Int1 = ConstantInt::get(IntTy, 1);
2178 auto Int2 = ConstantInt::get(IntTy, 2);
2179
2180 // Cast the half* pointer to short*.
2181 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2182
2183 // Load the first element
2184 auto Index0 = BinaryOperator::Create(
2185 Instruction::Add,
2186 BinaryOperator::Create(Instruction::Shl, Arg0, Int1, "", CI), Arg0, "",
2187 CI);
2188 auto GEP0 = GetElementPtrInst::Create(ShortTy, Cast, Index0, "", CI);
2189 auto Load0 = new LoadInst(ShortTy, GEP0, "", CI);
2190
2191 // Load the second element
2192 auto Index1 =
2193 BinaryOperator::Create(Instruction::Add, Index0, Int1, "", CI);
2194 auto GEP1 = GetElementPtrInst::Create(ShortTy, Cast, Index1, "", CI);
2195 auto Load1 = new LoadInst(ShortTy, GEP1, "", CI);
2196
2197 // Load the third element
2198 auto Index2 =
2199 BinaryOperator::Create(Instruction::Add, Index1, Int1, "", CI);
2200 auto GEP2 = GetElementPtrInst::Create(ShortTy, Cast, Index2, "", CI);
2201 auto Load2 = new LoadInst(ShortTy, GEP2, "", CI);
2202
2203 // Extend each short to int.
2204 auto X0 = CastInst::Create(Instruction::ZExt, Load0, IntTy, "", CI);
2205 auto X1 = CastInst::Create(Instruction::ZExt, Load1, IntTy, "", CI);
2206 auto X2 = CastInst::Create(Instruction::ZExt, Load2, IntTy, "", CI);
2207
2208 // Our intrinsic to unpack a float2 from an int.
2209 auto SPIRVIntrinsic = clspv::UnpackFunction();
2210
2211 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2212
2213 // Convert int to float2 and extract the uniq meaningful float
2214 auto Y0 = ExtractElementInst::Create(CallInst::Create(NewF, X0, "", CI),
2215 Int0, "", CI);
2216 auto Y1 = ExtractElementInst::Create(CallInst::Create(NewF, X1, "", CI),
2217 Int0, "", CI);
2218 auto Y2 = ExtractElementInst::Create(CallInst::Create(NewF, X2, "", CI),
2219 Int0, "", CI);
2220
2221 // Create the final float3 to be returned
2222 auto Combine =
2223 InsertElementInst::Create(UndefValue::get(Float3Ty), Y0, Int0, "", CI);
2224 Combine = InsertElementInst::Create(Combine, Y1, Int1, "", CI);
2225 Combine = InsertElementInst::Create(Combine, Y2, Int2, "", CI);
2226
2227 return Combine;
2228 });
2229}
2230
2231bool ReplaceOpenCLBuiltinPass::replaceVloadaHalf3(Function &F) {
2232 Module &M = *F.getParent();
2233 return replaceCallsWithValue(F, [&](CallInst *CI) {
2234 // The index argument from vload_half.
2235 auto Arg0 = CI->getOperand(0);
2236
2237 // The pointer argument from vload_half.
2238 auto Arg1 = CI->getOperand(1);
2239
2240 auto IntTy = Type::getInt32Ty(M.getContext());
2241 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2242 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
2243 auto NewPointerTy =
2244 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
2245 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2246
2247 // Cast the half* pointer to int2*.
2248 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2249
2250 // Index into the correct address of the casted pointer.
2251 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
2252
2253 // Load from the int2* we casted to.
2254 auto Load = new LoadInst(Int2Ty, Index, "", CI);
2255
2256 // Extract each element from the loaded int2.
2257 auto X =
2258 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2259 auto Y =
2260 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
2261
2262 // Our intrinsic to unpack a float2 from an int.
2263 auto SPIRVIntrinsic = clspv::UnpackFunction();
2264
2265 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2266
2267 // Get the lower (x & y) components of our final float4.
2268 auto Lo = CallInst::Create(NewF, X, "", CI);
2269
2270 // Get the higher (z & w) components of our final float4.
2271 auto Hi = CallInst::Create(NewF, Y, "", CI);
2272
2273 Constant *ShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2274 ConstantInt::get(IntTy, 1),
2275 ConstantInt::get(IntTy, 2)};
2276
2277 // Combine our two float2's into one float4.
2278 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2279 CI);
2280 });
2281}
2282
SJW2c317da2020-03-23 07:39:13 -05002283bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
2284 Module &M = *F.getParent();
2285 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002286 // The index argument from vload_half.
2287 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002288
Kévin Petite8edce32019-04-10 14:23:32 +01002289 // The pointer argument from vload_half.
2290 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002291
Kévin Petite8edce32019-04-10 14:23:32 +01002292 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002293 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2294 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002295 auto NewPointerTy =
2296 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002297 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002298
Kévin Petite8edce32019-04-10 14:23:32 +01002299 // Cast the half* pointer to int2*.
2300 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002301
Kévin Petite8edce32019-04-10 14:23:32 +01002302 // Index into the correct address of the casted pointer.
2303 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002304
Kévin Petite8edce32019-04-10 14:23:32 +01002305 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002306 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002307
Kévin Petite8edce32019-04-10 14:23:32 +01002308 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002309 auto X =
2310 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2311 auto Y =
2312 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002313
Kévin Petite8edce32019-04-10 14:23:32 +01002314 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002315 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002316
Kévin Petite8edce32019-04-10 14:23:32 +01002317 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002318
Kévin Petite8edce32019-04-10 14:23:32 +01002319 // Get the lower (x & y) components of our final float4.
2320 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002321
Kévin Petite8edce32019-04-10 14:23:32 +01002322 // Get the higher (z & w) components of our final float4.
2323 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002324
Kévin Petite8edce32019-04-10 14:23:32 +01002325 Constant *ShuffleMask[4] = {
2326 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2327 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002328
Kévin Petite8edce32019-04-10 14:23:32 +01002329 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002330 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2331 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002332 });
David Neto22f144c2017-06-12 14:26:21 -04002333}
2334
Romaric Jodin71fdb322022-05-03 17:01:10 +02002335bool ReplaceOpenCLBuiltinPass::replaceVloadHalf8(Function &F) {
2336 Module &M = *F.getParent();
2337 return replaceCallsWithValue(F, [&](CallInst *CI) {
2338 // The index argument from vload_half.
2339 auto Arg0 = CI->getOperand(0);
2340
2341 // The pointer argument from vload_half.
2342 auto Arg1 = CI->getOperand(1);
2343
2344 auto IntTy = Type::getInt32Ty(M.getContext());
2345 auto Int4Ty = FixedVectorType::get(IntTy, 4);
2346 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
2347 auto NewPointerTy =
2348 PointerType::get(Int4Ty, Arg1->getType()->getPointerAddressSpace());
2349 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2350
2351 // Cast the half* pointer to int4*.
2352 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2353
2354 // Index into the correct address of the casted pointer.
2355 auto Index = GetElementPtrInst::Create(Int4Ty, Cast, Arg0, "", CI);
2356
2357 // Load from the int4* we casted to.
2358 auto Load = new LoadInst(Int4Ty, Index, "", CI);
2359
2360 // Extract each element from the loaded int4.
2361 auto X1 =
2362 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2363 auto X2 =
2364 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
2365 auto X3 =
2366 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 2), "", CI);
2367 auto X4 =
2368 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 3), "", CI);
2369
2370 // Our intrinsic to unpack a float2 from an int.
2371 auto SPIRVIntrinsic = clspv::UnpackFunction();
2372
2373 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2374
2375 // Convert the 4 int into 4 float2
2376 auto Y1 = CallInst::Create(NewF, X1, "", CI);
2377 auto Y2 = CallInst::Create(NewF, X2, "", CI);
2378 auto Y3 = CallInst::Create(NewF, X3, "", CI);
2379 auto Y4 = CallInst::Create(NewF, X4, "", CI);
2380
2381 Constant *ShuffleMask4[4] = {
2382 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2383 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2384
2385 // Combine our two float2's into one float4.
2386 auto Z1 = new ShuffleVectorInst(Y1, Y2, ConstantVector::get(ShuffleMask4),
2387 "", CI);
2388 auto Z2 = new ShuffleVectorInst(Y3, Y4, ConstantVector::get(ShuffleMask4),
2389 "", CI);
2390
2391 Constant *ShuffleMask8[8] = {
2392 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2393 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3),
2394 ConstantInt::get(IntTy, 4), ConstantInt::get(IntTy, 5),
2395 ConstantInt::get(IntTy, 6), ConstantInt::get(IntTy, 7)};
2396
2397 // Combine our two float4's into one float8.
2398 return new ShuffleVectorInst(Z1, Z2, ConstantVector::get(ShuffleMask8), "",
2399 CI);
2400 });
2401}
2402
2403bool ReplaceOpenCLBuiltinPass::replaceVloadHalf16(Function &F) {
2404 Module &M = *F.getParent();
2405 return replaceCallsWithValue(F, [&](CallInst *CI) {
2406 // The index argument from vload_half.
2407 auto Arg0 = CI->getOperand(0);
2408
2409 // The pointer argument from vload_half.
2410 auto Arg1 = CI->getOperand(1);
2411
2412 auto IntTy = Type::getInt32Ty(M.getContext());
2413 auto Int4Ty = FixedVectorType::get(IntTy, 4);
2414 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
2415 auto NewPointerTy =
2416 PointerType::get(Int4Ty, Arg1->getType()->getPointerAddressSpace());
2417 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2418
2419 // Cast the half* pointer to int4*.
2420 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2421
2422 // Index into the correct address of the casted pointer.
2423 auto Arg0x2 = BinaryOperator::Create(Instruction::Shl, Arg0, ConstantInt::get(IntTy, 1), "", CI);
2424 auto Index1 = GetElementPtrInst::Create(Int4Ty, Cast, Arg0x2, "", CI);
2425 auto Arg0x2p1 = BinaryOperator::Create(Instruction::Add, Arg0x2, ConstantInt::get(IntTy, 1), "", CI);
2426 auto Index2 = GetElementPtrInst::Create(Int4Ty, Cast, Arg0x2p1, "", CI);
2427
2428 // Load from the int4* we casted to.
2429 auto Load1 = new LoadInst(Int4Ty, Index1, "", CI);
2430 auto Load2 = new LoadInst(Int4Ty, Index2, "", CI);
2431
2432 // Extract each element from the two loaded int4.
2433 auto X1 =
2434 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 0), "", CI);
2435 auto X2 =
2436 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 1), "", CI);
2437 auto X3 =
2438 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 2), "", CI);
2439 auto X4 =
2440 ExtractElementInst::Create(Load1, ConstantInt::get(IntTy, 3), "", CI);
2441 auto X5 =
2442 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 0), "", CI);
2443 auto X6 =
2444 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 1), "", CI);
2445 auto X7 =
2446 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 2), "", CI);
2447 auto X8 =
2448 ExtractElementInst::Create(Load2, ConstantInt::get(IntTy, 3), "", CI);
2449
2450 // Our intrinsic to unpack a float2 from an int.
2451 auto SPIRVIntrinsic = clspv::UnpackFunction();
2452
2453 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2454
2455 // Convert the eight int into float2
2456 auto Y1 = CallInst::Create(NewF, X1, "", CI);
2457 auto Y2 = CallInst::Create(NewF, X2, "", CI);
2458 auto Y3 = CallInst::Create(NewF, X3, "", CI);
2459 auto Y4 = CallInst::Create(NewF, X4, "", CI);
2460 auto Y5 = CallInst::Create(NewF, X5, "", CI);
2461 auto Y6 = CallInst::Create(NewF, X6, "", CI);
2462 auto Y7 = CallInst::Create(NewF, X7, "", CI);
2463 auto Y8 = CallInst::Create(NewF, X8, "", CI);
2464
2465 Constant *ShuffleMask4[4] = {
2466 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2467 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2468
2469 // Combine our two float2's into one float4.
2470 auto Z1 = new ShuffleVectorInst(Y1, Y2, ConstantVector::get(ShuffleMask4),
2471 "", CI);
2472 auto Z2 = new ShuffleVectorInst(Y3, Y4, ConstantVector::get(ShuffleMask4),
2473 "", CI);
2474 auto Z3 = new ShuffleVectorInst(Y5, Y6, ConstantVector::get(ShuffleMask4),
2475 "", CI);
2476 auto Z4 = new ShuffleVectorInst(Y7, Y8, ConstantVector::get(ShuffleMask4),
2477 "", CI);
2478
2479 Constant *ShuffleMask8[8] = {
2480 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2481 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3),
2482 ConstantInt::get(IntTy, 4), ConstantInt::get(IntTy, 5),
2483 ConstantInt::get(IntTy, 6), ConstantInt::get(IntTy, 7)};
2484
2485 // Combine our two float4's into one float8.
2486 auto Z5 = new ShuffleVectorInst(Z1, Z2, ConstantVector::get(ShuffleMask8),
2487 "", CI);
2488 auto Z6 = new ShuffleVectorInst(Z3, Z4, ConstantVector::get(ShuffleMask8),
2489 "", CI);
2490 Constant *ShuffleMask16[16] = {
2491 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2492 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3),
2493 ConstantInt::get(IntTy, 4), ConstantInt::get(IntTy, 5),
2494 ConstantInt::get(IntTy, 6), ConstantInt::get(IntTy, 7),
2495 ConstantInt::get(IntTy, 8), ConstantInt::get(IntTy, 9),
2496 ConstantInt::get(IntTy, 10), ConstantInt::get(IntTy, 11),
2497 ConstantInt::get(IntTy, 12), ConstantInt::get(IntTy, 13),
2498 ConstantInt::get(IntTy, 14), ConstantInt::get(IntTy, 15)};
2499 // Combine our two float8's into one float16.
2500 return new ShuffleVectorInst(Z5, Z6, ConstantVector::get(ShuffleMask16), "",
2501 CI);
2502 });
2503}
2504
SJW2c317da2020-03-23 07:39:13 -05002505bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002506
2507 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2508 //
2509 // %u = load i32 %ptr
Romaric Jodin71fdb322022-05-03 17:01:10 +02002510 // %result = call <2 x float> Unpack2xHalf(u)
SJW2c317da2020-03-23 07:39:13 -05002511 Module &M = *F.getParent();
2512 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002513 auto Index = CI->getOperand(0);
2514 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002515
Kévin Petite8edce32019-04-10 14:23:32 +01002516 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002517 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002518 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002519
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002520 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002521 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002522
Kévin Petite8edce32019-04-10 14:23:32 +01002523 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002524 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002525
Kévin Petite8edce32019-04-10 14:23:32 +01002526 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002527
Kévin Petite8edce32019-04-10 14:23:32 +01002528 // Get our final float2.
2529 return CallInst::Create(NewF, Load, "", CI);
2530 });
David Neto6ad93232018-06-07 15:42:58 -07002531}
2532
SJW2c317da2020-03-23 07:39:13 -05002533bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002534
2535 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2536 //
2537 // %u2 = load <2 x i32> %ptr
2538 // %u2xy = extractelement %u2, 0
2539 // %u2zw = extractelement %u2, 1
2540 // %fxy = call <2 x float> Unpack2xHalf(uint)
2541 // %fzw = call <2 x float> Unpack2xHalf(uint)
Romaric Jodin71fdb322022-05-03 17:01:10 +02002542 // %result = shufflevector %fxy %fzw <4 x float> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05002543 Module &M = *F.getParent();
2544 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002545 auto Index = CI->getOperand(0);
2546 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002547
Kévin Petite8edce32019-04-10 14:23:32 +01002548 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002549 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2550 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002551 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002552
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002553 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002554 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002555
Kévin Petite8edce32019-04-10 14:23:32 +01002556 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002557 auto X =
2558 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2559 auto Y =
2560 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002561
Kévin Petite8edce32019-04-10 14:23:32 +01002562 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002563 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002564
Kévin Petite8edce32019-04-10 14:23:32 +01002565 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002566
Kévin Petite8edce32019-04-10 14:23:32 +01002567 // Get the lower (x & y) components of our final float4.
2568 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002569
Kévin Petite8edce32019-04-10 14:23:32 +01002570 // Get the higher (z & w) components of our final float4.
2571 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002572
Kévin Petite8edce32019-04-10 14:23:32 +01002573 Constant *ShuffleMask[4] = {
2574 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2575 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002576
Kévin Petite8edce32019-04-10 14:23:32 +01002577 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002578 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2579 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002580 });
David Neto6ad93232018-06-07 15:42:58 -07002581}
2582
Romaric Jodin71fdb322022-05-03 17:01:10 +02002583bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size, bool aligned) {
SJW2c317da2020-03-23 07:39:13 -05002584 switch (vec_size) {
2585 case 0:
2586 return replaceVstoreHalf(F);
2587 case 2:
2588 return replaceVstoreHalf2(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02002589 case 3:
2590 return aligned ? replaceVstoreaHalf3(F) : replaceVstoreHalf3(F);
SJW2c317da2020-03-23 07:39:13 -05002591 case 4:
2592 return replaceVstoreHalf4(F);
Romaric Jodin71fdb322022-05-03 17:01:10 +02002593 case 8:
2594 return replaceVstoreHalf8(F);
2595 case 16:
2596 return replaceVstoreHalf16(F);
SJW2c317da2020-03-23 07:39:13 -05002597 default:
2598 llvm_unreachable("Unsupported vstore_half vector size");
2599 break;
2600 }
2601 return false;
2602}
David Neto22f144c2017-06-12 14:26:21 -04002603
SJW2c317da2020-03-23 07:39:13 -05002604bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
2605 Module &M = *F.getParent();
2606 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002607 // The value to store.
2608 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002609
Kévin Petite8edce32019-04-10 14:23:32 +01002610 // The index argument from vstore_half.
2611 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002612
Kévin Petite8edce32019-04-10 14:23:32 +01002613 // The pointer argument from vstore_half.
2614 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002615
Kévin Petite8edce32019-04-10 14:23:32 +01002616 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002617 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002618 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2619 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002620
Kévin Petite8edce32019-04-10 14:23:32 +01002621 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002622 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002623
Kévin Petite8edce32019-04-10 14:23:32 +01002624 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002625
Kévin Petite8edce32019-04-10 14:23:32 +01002626 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002627 auto TempVec = InsertElementInst::Create(
2628 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002629
Kévin Petite8edce32019-04-10 14:23:32 +01002630 // Pack the float2 -> half2 (in an int).
2631 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002632
alan-baker7efcaaa2020-05-06 19:33:27 -04002633 bool supports_16bit_storage = true;
2634 switch (Arg2->getType()->getPointerAddressSpace()) {
2635 case clspv::AddressSpace::Global:
2636 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2637 clspv::Option::StorageClass::kSSBO);
2638 break;
2639 case clspv::AddressSpace::Constant:
2640 if (clspv::Option::ConstantArgsInUniformBuffer())
2641 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2642 clspv::Option::StorageClass::kUBO);
2643 else
2644 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2645 clspv::Option::StorageClass::kSSBO);
2646 break;
2647 default:
2648 // Clspv will emit the Float16 capability if the half type is
2649 // encountered. That capability covers private and local addressspaces.
2650 break;
2651 }
2652
SJW2c317da2020-03-23 07:39:13 -05002653 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002654 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002655 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002656 auto ShortPointerTy =
2657 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002658
Kévin Petite8edce32019-04-10 14:23:32 +01002659 // Truncate our i32 to an i16.
2660 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002661
Kévin Petite8edce32019-04-10 14:23:32 +01002662 // Cast the half* pointer to short*.
2663 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002664
Kévin Petite8edce32019-04-10 14:23:32 +01002665 // Index into the correct address of the casted pointer.
2666 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002667
Kévin Petite8edce32019-04-10 14:23:32 +01002668 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002669 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002670 } else {
2671 // We can only write to 32-bit aligned words.
2672 //
2673 // Assuming base is aligned to 32-bits, replace the equivalent of
2674 // vstore_half(value, index, base)
2675 // with:
2676 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2677 // uint32_t write_to_upper_half = index & 1u;
2678 // uint32_t shift = write_to_upper_half << 4;
2679 //
2680 // // Pack the float value as a half number in bottom 16 bits
2681 // // of an i32.
2682 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2683 //
2684 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2685 // ^ ((packed & 0xffff) << shift)
2686 // // We only need relaxed consistency, but OpenCL 1.2 only has
2687 // // sequentially consistent atomics.
2688 // // TODO(dneto): Use relaxed consistency.
2689 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002690 auto IntPointerTy =
2691 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002692
Kévin Petite8edce32019-04-10 14:23:32 +01002693 auto Four = ConstantInt::get(IntTy, 4);
2694 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002695
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002696 auto IndexIsOdd =
2697 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002698 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002699 auto IndexIntoI32 =
2700 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2701 auto BaseI32Ptr =
2702 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2703 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2704 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002705 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002706 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002707 auto MaskBitsToWrite =
2708 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2709 auto MaskedCurrent = BinaryOperator::CreateAnd(
2710 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002711
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002712 auto XLowerBits =
2713 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2714 auto NewBitsToWrite =
2715 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2716 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2717 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002718
Kévin Petite8edce32019-04-10 14:23:32 +01002719 // Generate the call to atomi_xor.
2720 SmallVector<Type *, 5> ParamTypes;
2721 // The pointer type.
2722 ParamTypes.push_back(IntPointerTy);
2723 // The Types for memory scope, semantics, and value.
2724 ParamTypes.push_back(IntTy);
2725 ParamTypes.push_back(IntTy);
2726 ParamTypes.push_back(IntTy);
2727 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2728 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002729
Kévin Petite8edce32019-04-10 14:23:32 +01002730 const auto ConstantScopeDevice =
2731 ConstantInt::get(IntTy, spv::ScopeDevice);
2732 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2733 // (SPIR-V Workgroup).
2734 const auto AddrSpaceSemanticsBits =
2735 IntPointerTy->getPointerAddressSpace() == 1
2736 ? spv::MemorySemanticsUniformMemoryMask
2737 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002738
Kévin Petite8edce32019-04-10 14:23:32 +01002739 // We're using relaxed consistency here.
2740 const auto ConstantMemorySemantics =
2741 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2742 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002743
Kévin Petite8edce32019-04-10 14:23:32 +01002744 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2745 ConstantMemorySemantics, ValueToXor};
2746 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002747
2748 // Return a Nop so the old Call is removed
2749 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2750 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002751 }
David Neto22f144c2017-06-12 14:26:21 -04002752
SJW2c317da2020-03-23 07:39:13 -05002753 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002754 });
David Neto22f144c2017-06-12 14:26:21 -04002755}
2756
SJW2c317da2020-03-23 07:39:13 -05002757bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2758 Module &M = *F.getParent();
2759 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002760 // The value to store.
2761 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002762
Kévin Petite8edce32019-04-10 14:23:32 +01002763 // The index argument from vstore_half.
2764 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002765
Kévin Petite8edce32019-04-10 14:23:32 +01002766 // The pointer argument from vstore_half.
2767 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002768
Kévin Petite8edce32019-04-10 14:23:32 +01002769 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002770 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002771 auto NewPointerTy =
2772 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002773 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002774
Kévin Petite8edce32019-04-10 14:23:32 +01002775 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002776 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002777
Kévin Petite8edce32019-04-10 14:23:32 +01002778 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002779
Kévin Petite8edce32019-04-10 14:23:32 +01002780 // Turn the packed x & y into the final packing.
2781 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002782
Kévin Petite8edce32019-04-10 14:23:32 +01002783 // Cast the half* pointer to int*.
2784 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002785
Kévin Petite8edce32019-04-10 14:23:32 +01002786 // Index into the correct address of the casted pointer.
2787 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002788
Kévin Petite8edce32019-04-10 14:23:32 +01002789 // Store to the int* we casted to.
2790 return new StoreInst(X, Index, CI);
2791 });
David Neto22f144c2017-06-12 14:26:21 -04002792}
2793
Romaric Jodin71fdb322022-05-03 17:01:10 +02002794bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf3(Function &F) {
2795 Module &M = *F.getParent();
2796 return replaceCallsWithValue(F, [&](CallInst *CI) {
2797 // The value to store.
2798 auto Arg0 = CI->getOperand(0);
2799
2800 // The index argument from vstore_half.
2801 auto Arg1 = CI->getOperand(1);
2802
2803 // The pointer argument from vstore_half.
2804 auto Arg2 = CI->getOperand(2);
2805
2806 auto IntTy = Type::getInt32Ty(M.getContext());
2807 auto ShortTy = Type::getInt16Ty(M.getContext());
2808 auto FloatTy = Type::getFloatTy(M.getContext());
2809 auto Float2Ty = FixedVectorType::get(FloatTy, 2);
2810 auto NewPointerTy =
2811 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
2812 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2813
2814 auto Int0 = ConstantInt::get(IntTy, 0);
2815 auto Int1 = ConstantInt::get(IntTy, 1);
2816 auto Int2 = ConstantInt::get(IntTy, 2);
2817
2818 auto X0 = InsertElementInst::Create(
2819 UndefValue::get(Float2Ty),
2820 ExtractElementInst::Create(Arg0, Int0, "", CI), Int0, "", CI);
2821 auto X1 = InsertElementInst::Create(
2822 UndefValue::get(Float2Ty),
2823 ExtractElementInst::Create(Arg0, Int1, "", CI), Int0, "", CI);
2824 auto X2 = InsertElementInst::Create(
2825 UndefValue::get(Float2Ty),
2826 ExtractElementInst::Create(Arg0, Int2, "", CI), Int0, "", CI);
2827
2828 // Our intrinsic to pack a float2 to an int.
2829 auto SPIRVIntrinsic = clspv::PackFunction();
2830
2831 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2832
2833 // Convert float2 into int and trunc to short to keep only the meaningful
2834 // part of it
2835 auto Y0 =
2836 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X0, "", CI),
2837 ShortTy, "", CI);
2838 auto Y1 =
2839 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X1, "", CI),
2840 ShortTy, "", CI);
2841 auto Y2 =
2842 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X2, "", CI),
2843 ShortTy, "", CI);
2844
2845 // Cast the half* pointer to short*.
2846 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2847
2848 auto Index0 = BinaryOperator::Create(
2849 Instruction::Add,
2850 BinaryOperator::Create(Instruction::Shl, Arg1, Int1, "", CI), Arg1, "",
2851 CI);
2852 auto GEP0 = GetElementPtrInst::Create(ShortTy, Cast, Index0, "", CI);
2853 new StoreInst(Y0, GEP0, CI);
2854
2855 auto Index1 =
2856 BinaryOperator::Create(Instruction::Add, Index0, Int1, "", CI);
2857 auto GEP1 = GetElementPtrInst::Create(ShortTy, Cast, Index1, "", CI);
2858 new StoreInst(Y1, GEP1, CI);
2859
2860 auto Index2 =
2861 BinaryOperator::Create(Instruction::Add, Index1, Int1, "", CI);
2862 auto GEP2 = GetElementPtrInst::Create(ShortTy, Cast, Index2, "", CI);
2863 return new StoreInst(Y2, GEP2, CI);
2864 });
2865}
2866
2867bool ReplaceOpenCLBuiltinPass::replaceVstoreaHalf3(Function &F) {
2868 Module &M = *F.getParent();
2869 return replaceCallsWithValue(F, [&](CallInst *CI) {
2870 // The value to store.
2871 auto Arg0 = CI->getOperand(0);
2872
2873 // The index argument from vstore_half.
2874 auto Arg1 = CI->getOperand(1);
2875
2876 // The pointer argument from vstore_half.
2877 auto Arg2 = CI->getOperand(2);
2878
2879 auto IntTy = Type::getInt32Ty(M.getContext());
2880 auto ShortTy = Type::getInt16Ty(M.getContext());
2881 auto FloatTy = Type::getFloatTy(M.getContext());
2882 auto Float2Ty = FixedVectorType::get(FloatTy, 2);
2883 auto NewPointerTy =
2884 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
2885 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2886
2887 auto Int0 = ConstantInt::get(IntTy, 0);
2888 auto Int1 = ConstantInt::get(IntTy, 1);
2889 auto Int2 = ConstantInt::get(IntTy, 2);
2890
2891 auto X0 = InsertElementInst::Create(
2892 UndefValue::get(Float2Ty),
2893 ExtractElementInst::Create(Arg0, Int0, "", CI), Int0, "", CI);
2894 auto X1 = InsertElementInst::Create(
2895 UndefValue::get(Float2Ty),
2896 ExtractElementInst::Create(Arg0, Int1, "", CI), Int0, "", CI);
2897 auto X2 = InsertElementInst::Create(
2898 UndefValue::get(Float2Ty),
2899 ExtractElementInst::Create(Arg0, Int2, "", CI), Int0, "", CI);
2900
2901 // Our intrinsic to pack a float2 to an int.
2902 auto SPIRVIntrinsic = clspv::PackFunction();
2903
2904 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2905
2906 // Convert float2 into int and trunc to short to keep only the meaningful
2907 // part of it
2908 auto Y0 =
2909 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X0, "", CI),
2910 ShortTy, "", CI);
2911 auto Y1 =
2912 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X1, "", CI),
2913 ShortTy, "", CI);
2914 auto Y2 =
2915 CastInst::Create(Instruction::Trunc, CallInst::Create(NewF, X2, "", CI),
2916 ShortTy, "", CI);
2917
2918 // Cast the half* pointer to short*.
2919 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2920
2921 auto Index0 = BinaryOperator::Create(Instruction::Shl, Arg1, Int2, "", CI);
2922 auto GEP0 = GetElementPtrInst::Create(ShortTy, Cast, Index0, "", CI);
2923 new StoreInst(Y0, GEP0, CI);
2924
2925 auto Index1 =
2926 BinaryOperator::Create(Instruction::Add, Index0, Int1, "", CI);
2927 auto GEP1 = GetElementPtrInst::Create(ShortTy, Cast, Index1, "", CI);
2928 new StoreInst(Y1, GEP1, CI);
2929
2930 auto Index2 =
2931 BinaryOperator::Create(Instruction::Add, Index1, Int1, "", CI);
2932 auto GEP2 = GetElementPtrInst::Create(ShortTy, Cast, Index2, "", CI);
2933 return new StoreInst(Y2, GEP2, CI);
2934 });
2935}
2936
SJW2c317da2020-03-23 07:39:13 -05002937bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2938 Module &M = *F.getParent();
2939 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002940 // The value to store.
2941 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002942
Kévin Petite8edce32019-04-10 14:23:32 +01002943 // The index argument from vstore_half.
2944 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002945
Kévin Petite8edce32019-04-10 14:23:32 +01002946 // The pointer argument from vstore_half.
2947 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002948
Kévin Petite8edce32019-04-10 14:23:32 +01002949 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002950 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2951 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002952 auto NewPointerTy =
2953 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002954 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002955
Kévin Petite8edce32019-04-10 14:23:32 +01002956 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2957 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002958
Kévin Petite8edce32019-04-10 14:23:32 +01002959 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002960 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2961 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002962
Kévin Petite8edce32019-04-10 14:23:32 +01002963 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2964 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002965
Kévin Petite8edce32019-04-10 14:23:32 +01002966 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002967 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2968 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002969
Kévin Petite8edce32019-04-10 14:23:32 +01002970 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002971 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002972
Kévin Petite8edce32019-04-10 14:23:32 +01002973 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002974
Kévin Petite8edce32019-04-10 14:23:32 +01002975 // Turn the packed x & y into the final component of our int2.
2976 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002977
Kévin Petite8edce32019-04-10 14:23:32 +01002978 // Turn the packed z & w into the final component of our int2.
2979 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002980
Kévin Petite8edce32019-04-10 14:23:32 +01002981 auto Combine = InsertElementInst::Create(
2982 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002983 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2984 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002985
Kévin Petite8edce32019-04-10 14:23:32 +01002986 // Cast the half* pointer to int2*.
2987 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002988
Kévin Petite8edce32019-04-10 14:23:32 +01002989 // Index into the correct address of the casted pointer.
2990 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002991
Kévin Petite8edce32019-04-10 14:23:32 +01002992 // Store to the int2* we casted to.
2993 return new StoreInst(Combine, Index, CI);
2994 });
David Neto22f144c2017-06-12 14:26:21 -04002995}
2996
Romaric Jodin71fdb322022-05-03 17:01:10 +02002997bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf8(Function &F) {
2998 Module &M = *F.getParent();
2999 return replaceCallsWithValue(F, [&](CallInst *CI) {
3000 // The value to store.
3001 auto Arg0 = CI->getOperand(0);
3002
3003 // The index argument from vstore_half.
3004 auto Arg1 = CI->getOperand(1);
3005
3006 // The pointer argument from vstore_half.
3007 auto Arg2 = CI->getOperand(2);
3008
3009 auto IntTy = Type::getInt32Ty(M.getContext());
3010 auto Int4Ty = FixedVectorType::get(IntTy, 4);
3011 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
3012 auto NewPointerTy =
3013 PointerType::get(Int4Ty, Arg2->getType()->getPointerAddressSpace());
3014 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
3015
3016 Constant *ShuffleMask01[2] = {ConstantInt::get(IntTy, 0),
3017 ConstantInt::get(IntTy, 1)};
3018 auto X01 =
3019 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3020 ConstantVector::get(ShuffleMask01), "", CI);
3021 Constant *ShuffleMask23[2] = {ConstantInt::get(IntTy, 2),
3022 ConstantInt::get(IntTy, 3)};
3023 auto X23 =
3024 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3025 ConstantVector::get(ShuffleMask23), "", CI);
3026 Constant *ShuffleMask45[2] = {ConstantInt::get(IntTy, 4),
3027 ConstantInt::get(IntTy, 5)};
3028 auto X45 =
3029 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3030 ConstantVector::get(ShuffleMask45), "", CI);
3031 Constant *ShuffleMask67[2] = {ConstantInt::get(IntTy, 6),
3032 ConstantInt::get(IntTy, 7)};
3033 auto X67 =
3034 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3035 ConstantVector::get(ShuffleMask67), "", CI);
3036
3037 // Our intrinsic to pack a float2 to an int.
3038 auto SPIRVIntrinsic = clspv::PackFunction();
3039
3040 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
3041
3042 auto Y01 = CallInst::Create(NewF, X01, "", CI);
3043 auto Y23 = CallInst::Create(NewF, X23, "", CI);
3044 auto Y45 = CallInst::Create(NewF, X45, "", CI);
3045 auto Y67 = CallInst::Create(NewF, X67, "", CI);
3046
3047 auto Combine = InsertElementInst::Create(
3048 UndefValue::get(Int4Ty), Y01, ConstantInt::get(IntTy, 0), "", CI);
3049 Combine = InsertElementInst::Create(Combine, Y23,
3050 ConstantInt::get(IntTy, 1), "", CI);
3051 Combine = InsertElementInst::Create(Combine, Y45,
3052 ConstantInt::get(IntTy, 2), "", CI);
3053 Combine = InsertElementInst::Create(Combine, Y67,
3054 ConstantInt::get(IntTy, 3), "", CI);
3055
3056 // Cast the half* pointer to int4*.
3057 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
3058
3059 // Index into the correct address of the casted pointer.
3060 auto Index = GetElementPtrInst::Create(Int4Ty, Cast, Arg1, "", CI);
3061
3062 // Store to the int4* we casted to.
3063 return new StoreInst(Combine, Index, CI);
3064 });
3065}
3066
3067bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf16(Function &F) {
3068 Module &M = *F.getParent();
3069 return replaceCallsWithValue(F, [&](CallInst *CI) {
3070 // The value to store.
3071 auto Arg0 = CI->getOperand(0);
3072
3073 // The index argument from vstore_half.
3074 auto Arg1 = CI->getOperand(1);
3075
3076 // The pointer argument from vstore_half.
3077 auto Arg2 = CI->getOperand(2);
3078
3079 auto IntTy = Type::getInt32Ty(M.getContext());
3080 auto Int4Ty = FixedVectorType::get(IntTy, 4);
3081 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
3082 auto NewPointerTy =
3083 PointerType::get(Int4Ty, Arg2->getType()->getPointerAddressSpace());
3084 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
3085
3086 Constant *ShuffleMask0[2] = {ConstantInt::get(IntTy, 0),
3087 ConstantInt::get(IntTy, 1)};
3088 auto X0 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3089 ConstantVector::get(ShuffleMask0), "", CI);
3090 Constant *ShuffleMask1[2] = {ConstantInt::get(IntTy, 2),
3091 ConstantInt::get(IntTy, 3)};
3092 auto X1 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3093 ConstantVector::get(ShuffleMask1), "", CI);
3094 Constant *ShuffleMask2[2] = {ConstantInt::get(IntTy, 4),
3095 ConstantInt::get(IntTy, 5)};
3096 auto X2 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3097 ConstantVector::get(ShuffleMask2), "", CI);
3098 Constant *ShuffleMask3[2] = {ConstantInt::get(IntTy, 6),
3099 ConstantInt::get(IntTy, 7)};
3100 auto X3 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3101 ConstantVector::get(ShuffleMask3), "", CI);
3102 Constant *ShuffleMask4[2] = {ConstantInt::get(IntTy, 8),
3103 ConstantInt::get(IntTy, 9)};
3104 auto X4 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3105 ConstantVector::get(ShuffleMask4), "", CI);
3106 Constant *ShuffleMask5[2] = {ConstantInt::get(IntTy, 10),
3107 ConstantInt::get(IntTy, 11)};
3108 auto X5 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3109 ConstantVector::get(ShuffleMask5), "", CI);
3110 Constant *ShuffleMask6[2] = {ConstantInt::get(IntTy, 12),
3111 ConstantInt::get(IntTy, 13)};
3112 auto X6 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3113 ConstantVector::get(ShuffleMask6), "", CI);
3114 Constant *ShuffleMask7[2] = {ConstantInt::get(IntTy, 14),
3115 ConstantInt::get(IntTy, 15)};
3116 auto X7 = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
3117 ConstantVector::get(ShuffleMask7), "", CI);
3118
3119 // Our intrinsic to pack a float2 to an int.
3120 auto SPIRVIntrinsic = clspv::PackFunction();
3121
3122 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
3123
3124 auto Y0 = CallInst::Create(NewF, X0, "", CI);
3125 auto Y1 = CallInst::Create(NewF, X1, "", CI);
3126 auto Y2 = CallInst::Create(NewF, X2, "", CI);
3127 auto Y3 = CallInst::Create(NewF, X3, "", CI);
3128 auto Y4 = CallInst::Create(NewF, X4, "", CI);
3129 auto Y5 = CallInst::Create(NewF, X5, "", CI);
3130 auto Y6 = CallInst::Create(NewF, X6, "", CI);
3131 auto Y7 = CallInst::Create(NewF, X7, "", CI);
3132
3133 auto Combine1 = InsertElementInst::Create(
3134 UndefValue::get(Int4Ty), Y0, ConstantInt::get(IntTy, 0), "", CI);
3135 Combine1 = InsertElementInst::Create(Combine1, Y1,
3136 ConstantInt::get(IntTy, 1), "", CI);
3137 Combine1 = InsertElementInst::Create(Combine1, Y2,
3138 ConstantInt::get(IntTy, 2), "", CI);
3139 Combine1 = InsertElementInst::Create(Combine1, Y3,
3140 ConstantInt::get(IntTy, 3), "", CI);
3141
3142 auto Combine2 = InsertElementInst::Create(
3143 UndefValue::get(Int4Ty), Y4, ConstantInt::get(IntTy, 0), "", CI);
3144 Combine2 = InsertElementInst::Create(Combine2, Y5,
3145 ConstantInt::get(IntTy, 1), "", CI);
3146 Combine2 = InsertElementInst::Create(Combine2, Y6,
3147 ConstantInt::get(IntTy, 2), "", CI);
3148 Combine2 = InsertElementInst::Create(Combine2, Y7,
3149 ConstantInt::get(IntTy, 3), "", CI);
3150
3151 // Cast the half* pointer to int4*.
3152 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
3153
3154 // Index into the correct address of the casted pointer.
3155 auto Arg1x2 = BinaryOperator::Create(Instruction::Shl, Arg1,
3156 ConstantInt::get(IntTy, 1), "", CI);
3157 auto Index1 = GetElementPtrInst::Create(Int4Ty, Cast, Arg1x2, "", CI);
3158
3159 // Store to the int4* we casted to.
3160 new StoreInst(Combine1, Index1, CI);
3161
3162 // Index into the correct address of the casted pointer.
3163 auto Arg1Plus1 = BinaryOperator::Create(Instruction::Add, Arg1x2,
3164 ConstantInt::get(IntTy, 1), "", CI);
3165 auto Index2 = GetElementPtrInst::Create(Int4Ty, Cast, Arg1Plus1, "", CI);
3166
3167 // Store to the int4* we casted to.
3168 return new StoreInst(Combine2, Index2, CI);
3169 });
3170}
3171
SJW2c317da2020-03-23 07:39:13 -05003172bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
3173 // convert half to float
3174 Module &M = *F.getParent();
3175 return replaceCallsWithValue(F, [&](CallInst *CI) {
3176 SmallVector<Type *, 3> types;
3177 SmallVector<Value *, 3> args;
alan-baker5641f5c2021-10-15 09:16:04 -04003178 for (size_t i = 0; i < CI->arg_size(); ++i) {
SJW2c317da2020-03-23 07:39:13 -05003179 types.push_back(CI->getArgOperand(i)->getType());
3180 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05003181 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05003182
alan-baker5a8c3be2020-09-09 13:44:26 -04003183 auto NewFType =
3184 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
3185 cast<VectorType>(CI->getType())
3186 ->getElementCount()
3187 .getKnownMinValue()),
3188 types, false);
SJW2c317da2020-03-23 07:39:13 -05003189
SJW61531372020-06-09 07:31:08 -05003190 std::string NewFName =
3191 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05003192
3193 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
3194
3195 auto NewCI = CallInst::Create(NewF, args, "", CI);
3196
3197 // Convert to the half type.
3198 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
3199 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05003200}
3201
SJW2c317da2020-03-23 07:39:13 -05003202bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
3203 // convert half to float
3204 Module &M = *F.getParent();
3205 return replaceCallsWithValue(F, [&](CallInst *CI) {
3206 SmallVector<Type *, 3> types(3);
3207 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003208
SJW2c317da2020-03-23 07:39:13 -05003209 // Image
3210 types[0] = CI->getArgOperand(0)->getType();
3211 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003212
SJW2c317da2020-03-23 07:39:13 -05003213 // Coord
3214 types[1] = CI->getArgOperand(1)->getType();
3215 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003216
SJW2c317da2020-03-23 07:39:13 -05003217 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04003218 types[2] =
3219 FixedVectorType::get(Type::getFloatTy(M.getContext()),
3220 cast<VectorType>(CI->getArgOperand(2)->getType())
3221 ->getElementCount()
3222 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05003223
SJW2c317da2020-03-23 07:39:13 -05003224 auto NewFType =
3225 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003226
SJW61531372020-06-09 07:31:08 -05003227 std::string NewFName =
3228 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003229
SJW2c317da2020-03-23 07:39:13 -05003230 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05003231
SJW2c317da2020-03-23 07:39:13 -05003232 // Convert data to the float type.
3233 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
3234 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05003235
SJW2c317da2020-03-23 07:39:13 -05003236 return CallInst::Create(NewF, args, "", CI);
3237 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05003238}
3239
SJW2c317da2020-03-23 07:39:13 -05003240bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
3241 Function &F) {
3242 // convert read_image with int coords to float coords
3243 Module &M = *F.getParent();
3244 return replaceCallsWithValue(F, [&](CallInst *CI) {
3245 // The image.
3246 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04003247
SJW2c317da2020-03-23 07:39:13 -05003248 // The sampler.
3249 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04003250
SJW2c317da2020-03-23 07:39:13 -05003251 // The coordinate (integer type that we can't handle).
3252 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04003253
Romaric Jodin9b353742022-01-25 17:21:02 +01003254 uint32_t dim = clspv::ImageNumDimensions(Arg0->getType());
SJW2c317da2020-03-23 07:39:13 -05003255 uint32_t components =
3256 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
3257 Type *float_ty = nullptr;
3258 if (components == 1) {
3259 float_ty = Type::getFloatTy(M.getContext());
3260 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04003261 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
3262 cast<VectorType>(Arg2->getType())
3263 ->getElementCount()
3264 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04003265 }
David Neto22f144c2017-06-12 14:26:21 -04003266
SJW2c317da2020-03-23 07:39:13 -05003267 auto NewFType = FunctionType::get(
3268 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
3269
3270 std::string NewFName = F.getName().str();
3271 NewFName[NewFName.length() - 1] = 'f';
3272
3273 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
3274
3275 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
3276
3277 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
3278 });
David Neto22f144c2017-06-12 14:26:21 -04003279}
3280
SJW2c317da2020-03-23 07:39:13 -05003281bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
3282 return replaceCallsWithValue(F, [&](CallInst *CI) {
3283 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04003284
SJW2c317da2020-03-23 07:39:13 -05003285 // We need to map the OpenCL constants to the SPIR-V equivalents.
3286 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
3287 const auto ConstantMemorySemantics = ConstantInt::get(
3288 IntTy, spv::MemorySemanticsUniformMemoryMask |
3289 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04003290
SJW2c317da2020-03-23 07:39:13 -05003291 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04003292
SJW2c317da2020-03-23 07:39:13 -05003293 // The pointer.
3294 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04003295
SJW2c317da2020-03-23 07:39:13 -05003296 // The memory scope.
3297 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04003298
SJW2c317da2020-03-23 07:39:13 -05003299 // The memory semantics.
3300 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04003301
alan-baker5641f5c2021-10-15 09:16:04 -04003302 if (2 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05003303 // The unequal memory semantics.
3304 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04003305
SJW2c317da2020-03-23 07:39:13 -05003306 // The value.
3307 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04003308
SJW2c317da2020-03-23 07:39:13 -05003309 // The comparator.
3310 Params.push_back(CI->getArgOperand(1));
alan-baker5641f5c2021-10-15 09:16:04 -04003311 } else if (1 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05003312 // The value.
3313 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04003314 }
David Neto22f144c2017-06-12 14:26:21 -04003315
SJW2c317da2020-03-23 07:39:13 -05003316 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
3317 });
David Neto22f144c2017-06-12 14:26:21 -04003318}
3319
SJW2c317da2020-03-23 07:39:13 -05003320bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
3321 llvm::AtomicRMWInst::BinOp Op) {
3322 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04003323 auto align = F.getParent()->getDataLayout().getABITypeAlign(
3324 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05003325 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04003326 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05003327 SyncScope::System, CI);
3328 });
3329}
David Neto22f144c2017-06-12 14:26:21 -04003330
SJW2c317da2020-03-23 07:39:13 -05003331bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
3332 Module &M = *F.getParent();
3333 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04003334 auto IntTy = Type::getInt32Ty(M.getContext());
3335 auto FloatTy = Type::getFloatTy(M.getContext());
3336
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003337 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
3338 ConstantInt::get(IntTy, 1),
3339 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04003340
3341 Constant *UpShuffleMask[4] = {
3342 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3343 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3344
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003345 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
3346 UndefValue::get(FloatTy),
3347 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04003348
Kévin Petite8edce32019-04-10 14:23:32 +01003349 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003350 auto Arg0 =
3351 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
3352 ConstantVector::get(DownShuffleMask), "", CI);
3353 auto Arg1 =
3354 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
3355 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003356 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04003357
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003358 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05003359 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003360
SJW61531372020-06-09 07:31:08 -05003361 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003362
Kévin Petite8edce32019-04-10 14:23:32 +01003363 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003364
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003365 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
3366 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003367 });
David Neto22f144c2017-06-12 14:26:21 -04003368}
David Neto62653202017-10-16 19:05:18 -04003369
SJW2c317da2020-03-23 07:39:13 -05003370bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04003371 // OpenCL's float result = fract(float x, float* ptr)
3372 //
3373 // In the LLVM domain:
3374 //
3375 // %floor_result = call spir_func float @floor(float %x)
3376 // store float %floor_result, float * %ptr
3377 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3378 // %result = call spir_func float
3379 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3380 //
3381 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3382 // and clspv.fract occur in the SPIR-V generator pass:
3383 //
3384 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3385 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3386 // ...
3387 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3388 // OpStore %ptr %floor_result
3389 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3390 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01003391 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04003392
David Neto62653202017-10-16 19:05:18 -04003393 using std::string;
3394
3395 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3396 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04003397
SJW2c317da2020-03-23 07:39:13 -05003398 Module &M = *F.getParent();
3399 return replaceCallsWithValue(F, [&](CallInst *CI) {
SJW2c317da2020-03-23 07:39:13 -05003400 // This is either float or a float vector. All the float-like
3401 // types are this type.
3402 auto result_ty = F.getReturnType();
3403
SJW61531372020-06-09 07:31:08 -05003404 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05003405 Function *fmin_fn = M.getFunction(fmin_name);
3406 if (!fmin_fn) {
3407 // Make the fmin function.
3408 FunctionType *fn_ty =
3409 FunctionType::get(result_ty, {result_ty, result_ty}, false);
3410 fmin_fn =
3411 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
3412 fmin_fn->addFnAttr(Attribute::ReadNone);
3413 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3414 }
3415
SJW61531372020-06-09 07:31:08 -05003416 std::string floor_name =
3417 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05003418 Function *floor_fn = M.getFunction(floor_name);
3419 if (!floor_fn) {
3420 // Make the floor function.
3421 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
3422 floor_fn =
3423 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
3424 floor_fn->addFnAttr(Attribute::ReadNone);
3425 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3426 }
3427
SJW61531372020-06-09 07:31:08 -05003428 std::string clspv_fract_name =
3429 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05003430 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
3431 if (!clspv_fract_fn) {
3432 // Make the clspv_fract function.
3433 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
3434 clspv_fract_fn = cast<Function>(
3435 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
3436 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3437 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3438 }
3439
3440 // Number of significant significand bits, whether represented or not.
3441 unsigned num_significand_bits;
3442 switch (result_ty->getScalarType()->getTypeID()) {
3443 case Type::HalfTyID:
3444 num_significand_bits = 11;
3445 break;
3446 case Type::FloatTyID:
3447 num_significand_bits = 24;
3448 break;
3449 case Type::DoubleTyID:
3450 num_significand_bits = 53;
3451 break;
3452 default:
3453 llvm_unreachable("Unhandled float type when processing fract builtin");
3454 break;
3455 }
3456 // Beware that the disassembler displays this value as
3457 // OpConstant %float 1
3458 // which is not quite right.
3459 const double kJustUnderOneScalar =
3460 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3461
3462 Constant *just_under_one =
3463 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3464 if (result_ty->isVectorTy()) {
3465 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04003466 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05003467 }
3468
3469 IRBuilder<> Builder(CI);
3470
3471 auto arg = CI->getArgOperand(0);
3472 auto ptr = CI->getArgOperand(1);
3473
3474 // Compute floor result and store it.
3475 auto floor = Builder.CreateCall(floor_fn, {arg});
3476 Builder.CreateStore(floor, ptr);
3477
3478 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
3479 auto fract_result =
3480 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
3481
3482 return fract_result;
3483 });
David Neto62653202017-10-16 19:05:18 -04003484}
alan-bakera52b7312020-10-26 08:58:51 -04003485
Kévin Petit8576f682020-11-02 14:51:32 +00003486bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04003487 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00003488 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04003489 // a_shr = a >> 1
3490 // b_shr = b >> 1
3491 // add1 = a_shr + b_shr
3492 // join = a |join_opcode| b
3493 // and = join & 1
3494 // add = add1 + and
3495 const auto a = Call->getArgOperand(0);
3496 const auto b = Call->getArgOperand(1);
3497 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00003498 Value *a_shift, *b_shift;
3499 if (is_signed) {
3500 a_shift = builder.CreateAShr(a, 1);
3501 b_shift = builder.CreateAShr(b, 1);
3502 } else {
3503 a_shift = builder.CreateLShr(a, 1);
3504 b_shift = builder.CreateLShr(b, 1);
3505 }
alan-bakerb6da5132020-10-29 15:59:06 -04003506 auto add = builder.CreateAdd(a_shift, b_shift);
3507 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
3508 auto constant_one = ConstantInt::get(a->getType(), 1);
3509 auto and_bit = builder.CreateAnd(join, constant_one);
3510 return builder.CreateAdd(add, and_bit);
3511 });
3512}
3513
alan-baker3f1bf492020-11-05 09:07:36 -05003514bool ReplaceOpenCLBuiltinPass::replaceAddSubSat(Function &F, bool is_signed,
3515 bool is_add) {
3516 return replaceCallsWithValue(F, [&F, this, is_signed,
3517 is_add](CallInst *Call) {
3518 auto ty = Call->getType();
3519 auto a = Call->getArgOperand(0);
3520 auto b = Call->getArgOperand(1);
3521 IRBuilder<> builder(Call);
alan-bakera52b7312020-10-26 08:58:51 -04003522 if (is_signed) {
3523 unsigned bitwidth = ty->getScalarSizeInBits();
3524 if (bitwidth < 32) {
alan-baker3f1bf492020-11-05 09:07:36 -05003525 unsigned extended_width = bitwidth << 1;
Romaric Jodin73ef1be2022-01-25 17:21:22 +01003526 if (clspv::Option::HackClampWidth() && extended_width < 32) {
3527 extended_width = 32;
3528 }
alan-baker3f1bf492020-11-05 09:07:36 -05003529 Type *extended_ty =
3530 IntegerType::get(Call->getContext(), extended_width);
3531 Constant *min = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04003532 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05003533 APInt::getSignedMinValue(bitwidth).sext(extended_width));
3534 Constant *max = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04003535 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05003536 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
alan-bakera52b7312020-10-26 08:58:51 -04003537 // Don't use the type in GetMangledFunctionName to ensure we get
3538 // signed parameters.
3539 std::string sclamp_name = Builtins::GetMangledFunctionName("clamp");
alan-bakera52b7312020-10-26 08:58:51 -04003540 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
alan-baker3f1bf492020-11-05 09:07:36 -05003541 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
3542 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3543 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3544 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
3545 if (extended_width == 32) {
alan-bakera52b7312020-10-26 08:58:51 -04003546 sclamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
alan-bakera52b7312020-10-26 08:58:51 -04003547 } else {
3548 sclamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
3549 }
alan-baker3f1bf492020-11-05 09:07:36 -05003550 } else {
3551 if (extended_width == 32) {
3552 sclamp_name += "iii";
3553 } else {
3554 sclamp_name += "sss";
3555 }
alan-bakera52b7312020-10-26 08:58:51 -04003556 }
alan-baker3f1bf492020-11-05 09:07:36 -05003557
3558 auto sext_a = builder.CreateSExt(a, extended_ty);
3559 auto sext_b = builder.CreateSExt(b, extended_ty);
3560 Value *op = nullptr;
3561 // Extended operations won't wrap.
3562 if (is_add)
3563 op = builder.CreateAdd(sext_a, sext_b, "", true, true);
3564 else
3565 op = builder.CreateSub(sext_a, sext_b, "", true, true);
3566 auto clamp_ty = FunctionType::get(
3567 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
3568 auto callee = F.getParent()->getOrInsertFunction(sclamp_name, clamp_ty);
3569 auto clamp = builder.CreateCall(callee, {op, min, max});
3570 return builder.CreateTrunc(clamp, ty);
alan-bakera52b7312020-10-26 08:58:51 -04003571 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05003572 // Add:
3573 // c = a + b
alan-bakera52b7312020-10-26 08:58:51 -04003574 // if (b < 0)
3575 // c = c > a ? min : c;
3576 // else
alan-baker3f1bf492020-11-05 09:07:36 -05003577 // c = c < a ? max : c;
alan-bakera52b7312020-10-26 08:58:51 -04003578 //
alan-baker3f1bf492020-11-05 09:07:36 -05003579 // Sub:
3580 // c = a - b;
3581 // if (b < 0)
3582 // c = c < a ? max : c;
3583 // else
3584 // c = c > a ? min : c;
3585 Constant *min = ConstantInt::get(Call->getContext(),
3586 APInt::getSignedMinValue(bitwidth));
3587 Constant *max = ConstantInt::get(Call->getContext(),
3588 APInt::getSignedMaxValue(bitwidth));
alan-bakera52b7312020-10-26 08:58:51 -04003589 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3590 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3591 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3592 }
alan-baker3f1bf492020-11-05 09:07:36 -05003593 Value *op = nullptr;
3594 if (is_add) {
3595 op = builder.CreateAdd(a, b);
3596 } else {
3597 op = builder.CreateSub(a, b);
3598 }
3599 auto b_lt_0 = builder.CreateICmpSLT(b, Constant::getNullValue(ty));
3600 auto op_gt_a = builder.CreateICmpSGT(op, a);
3601 auto op_lt_a = builder.CreateICmpSLT(op, a);
3602 auto neg_cmp = is_add ? op_gt_a : op_lt_a;
3603 auto pos_cmp = is_add ? op_lt_a : op_gt_a;
3604 auto neg_value = is_add ? min : max;
3605 auto pos_value = is_add ? max : min;
3606 auto neg_clamp = builder.CreateSelect(neg_cmp, neg_value, op);
3607 auto pos_clamp = builder.CreateSelect(pos_cmp, pos_value, op);
3608 return builder.CreateSelect(b_lt_0, neg_clamp, pos_clamp);
alan-bakera52b7312020-10-26 08:58:51 -04003609 }
3610 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05003611 // Replace with OpIAddCarry/OpISubBorrow and clamp to max/0 on a
3612 // carr/borrow.
3613 spv::Op op = is_add ? spv::OpIAddCarry : spv::OpISubBorrow;
3614 auto clamp_value =
3615 is_add ? Constant::getAllOnesValue(ty) : Constant::getNullValue(ty);
3616 auto struct_ty = GetPairStruct(ty);
3617 auto call =
3618 InsertSPIRVOp(Call, op, {Attribute::ReadNone}, struct_ty, {a, b});
3619 auto add_sub = builder.CreateExtractValue(call, {0});
3620 auto carry_borrow = builder.CreateExtractValue(call, {1});
3621 auto cmp = builder.CreateICmpEQ(carry_borrow, Constant::getNullValue(ty));
3622 return builder.CreateSelect(cmp, add_sub, clamp_value);
alan-bakera52b7312020-10-26 08:58:51 -04003623 }
alan-bakera52b7312020-10-26 08:58:51 -04003624 });
3625}
alan-baker4986eff2020-10-29 13:38:00 -04003626
3627bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
3628 return replaceCallsWithValue(F, [](CallInst *Call) {
3629 auto pointer = Call->getArgOperand(0);
3630 // Clang emits an address space cast to the generic address space. Skip the
3631 // cast and use the input directly.
3632 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3633 pointer = cast->getPointerOperand();
3634 }
alan-baker5641f5c2021-10-15 09:16:04 -04003635 Value *order_arg = Call->arg_size() > 1 ? Call->getArgOperand(1) : nullptr;
3636 Value *scope_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003637 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3638 clspv::AddressSpace::Global;
3639 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
3640 spv::MemorySemanticsAcquireMask);
3641 auto scope = MemoryScope(scope_arg, is_global, Call);
3642 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
3643 Call->getType(), {pointer, scope, order});
3644 });
3645}
3646
3647bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
3648 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
3649 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
3650 auto pointer = Call->getArgOperand(0);
3651 // Clang emits an address space cast to the generic address space. Skip the
3652 // cast and use the input directly.
3653 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3654 pointer = cast->getPointerOperand();
3655 }
3656 Value *value = Call->getArgOperand(1);
alan-baker5641f5c2021-10-15 09:16:04 -04003657 Value *order_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
3658 Value *scope_arg = Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003659 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3660 clspv::AddressSpace::Global;
3661 auto scope = MemoryScope(scope_arg, is_global, Call);
3662 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
3663 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
3664 {pointer, scope, order, value});
3665 });
3666}
3667
3668bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
3669 return replaceCallsWithValue(F, [](CallInst *Call) {
3670 auto pointer = Call->getArgOperand(0);
3671 // Clang emits an address space cast to the generic address space. Skip the
3672 // cast and use the input directly.
3673 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3674 pointer = cast->getPointerOperand();
3675 }
3676 auto expected = Call->getArgOperand(1);
3677 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
3678 expected = cast->getPointerOperand();
3679 }
3680 auto value = Call->getArgOperand(2);
3681 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3682 clspv::AddressSpace::Global;
3683 Value *success_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04003684 Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003685 Value *failure_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04003686 Call->arg_size() > 4 ? Call->getArgOperand(4) : nullptr;
3687 Value *scope_arg = Call->arg_size() > 5 ? Call->getArgOperand(5) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003688 auto scope = MemoryScope(scope_arg, is_global, Call);
3689 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
3690 spv::MemorySemanticsAcquireReleaseMask);
3691 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
3692 spv::MemorySemanticsAcquireMask);
3693
3694 // If the value pointed to by |expected| equals the value pointed to by
3695 // |pointer|, |value| is written into |pointer|, otherwise the value in
3696 // |pointer| is written into |expected|. In order to avoid extra stores,
3697 // the basic block with the original atomic is split and the store is
3698 // performed in the |then| block. The condition is the inversion of the
3699 // comparison result.
3700 IRBuilder<> builder(Call);
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01003701 auto load = builder.CreateLoad(expected->getType()->getPointerElementType(),
3702 expected);
alan-baker4986eff2020-10-29 13:38:00 -04003703 auto cmp_xchg = InsertSPIRVOp(
3704 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
3705 value->getType(), {pointer, scope, success, failure, value, load});
3706 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
3707 auto not_cmp = builder.CreateNot(cmp);
3708 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
3709 builder.SetInsertPoint(then_branch);
3710 builder.CreateStore(cmp_xchg, expected);
3711 return cmp;
3712 });
3713}
alan-bakercc2bafb2020-11-02 08:30:18 -05003714
alan-baker2cecaa72020-11-05 14:05:20 -05003715bool ReplaceOpenCLBuiltinPass::replaceCountZeroes(Function &F, bool leading) {
alan-bakercc2bafb2020-11-02 08:30:18 -05003716 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3717 return false;
3718
3719 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
alan-baker5f2e88e2020-12-07 15:24:04 -05003720 if (bitwidth > 64)
alan-bakercc2bafb2020-11-02 08:30:18 -05003721 return false;
3722
alan-baker5f2e88e2020-12-07 15:24:04 -05003723 return replaceCallsWithValue(F, [&F, leading](CallInst *Call) {
3724 Function *intrinsic = Intrinsic::getDeclaration(
3725 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz,
3726 Call->getType());
3727 const auto c_false = ConstantInt::getFalse(Call->getContext());
3728 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
3729 {Call->getArgOperand(0), c_false}, "", Call);
alan-bakercc2bafb2020-11-02 08:30:18 -05003730 });
3731}
alan-baker6b9d1ee2020-11-03 23:11:32 -05003732
3733bool ReplaceOpenCLBuiltinPass::replaceMadSat(Function &F, bool is_signed) {
3734 return replaceCallsWithValue(F, [&F, is_signed, this](CallInst *Call) {
3735 const auto ty = Call->getType();
3736 const auto a = Call->getArgOperand(0);
3737 const auto b = Call->getArgOperand(1);
3738 const auto c = Call->getArgOperand(2);
3739 IRBuilder<> builder(Call);
3740 if (is_signed) {
3741 unsigned bitwidth = Call->getType()->getScalarSizeInBits();
3742 if (bitwidth < 32) {
3743 // mul = sext(a) * sext(b)
3744 // add = mul + sext(c)
3745 // res = clamp(add, MIN, MAX)
3746 unsigned extended_width = bitwidth << 1;
Romaric Jodin73ef1be2022-01-25 17:21:22 +01003747 if (clspv::Option::HackClampWidth() && extended_width < 32) {
3748 extended_width = 32;
3749 }
alan-baker6b9d1ee2020-11-03 23:11:32 -05003750 Type *extended_ty = IntegerType::get(F.getContext(), extended_width);
3751 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3752 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
3753 }
3754 auto a_sext = builder.CreateSExt(a, extended_ty);
3755 auto b_sext = builder.CreateSExt(b, extended_ty);
3756 auto c_sext = builder.CreateSExt(c, extended_ty);
3757 // Extended the size so no overflows occur.
3758 auto mul = builder.CreateMul(a_sext, b_sext, "", true, true);
3759 auto add = builder.CreateAdd(mul, c_sext, "", true, true);
3760 auto func_ty = FunctionType::get(
3761 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
3762 // Don't use function type because we need signed parameters.
3763 std::string clamp_name = Builtins::GetMangledFunctionName("clamp");
3764 // The clamp values are the signed min and max of the original bitwidth
3765 // sign extended to the extended bitwidth.
3766 Constant *min = ConstantInt::get(
3767 Call->getContext(),
3768 APInt::getSignedMinValue(bitwidth).sext(extended_width));
3769 Constant *max = ConstantInt::get(
3770 Call->getContext(),
3771 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
3772 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3773 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3774 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3775 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
3776 if (extended_width == 32)
3777 clamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
3778 else
3779 clamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
3780 } else {
3781 if (extended_width == 32)
3782 clamp_name += "iii";
3783 else
3784 clamp_name += "sss";
3785 }
3786 auto callee = F.getParent()->getOrInsertFunction(clamp_name, func_ty);
3787 auto clamp = builder.CreateCall(callee, {add, min, max});
3788 return builder.CreateTrunc(clamp, ty);
3789 } else {
alan-baker6b9d1ee2020-11-03 23:11:32 -05003790 // Compute
3791 // {hi, lo} = smul_extended(a, b)
3792 // add = lo + c
Romaric Jodinc507f312022-04-08 19:09:45 +02003793 auto mul_ext = InsertOpMulExtended(Call, a, b, true);
3794
alan-baker6b9d1ee2020-11-03 23:11:32 -05003795 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3796 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3797 auto add = builder.CreateAdd(mul_lo, c);
3798
3799 // Constants for use in the calculation.
3800 Constant *min = ConstantInt::get(Call->getContext(),
3801 APInt::getSignedMinValue(bitwidth));
3802 Constant *max = ConstantInt::get(Call->getContext(),
3803 APInt::getSignedMaxValue(bitwidth));
3804 Constant *max_plus_1 = ConstantInt::get(
3805 Call->getContext(),
3806 APInt::getSignedMaxValue(bitwidth) + APInt(bitwidth, 1));
3807 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3808 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3809 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3810 max_plus_1 =
3811 ConstantVector::getSplat(vec_ty->getElementCount(), max_plus_1);
3812 }
3813
3814 auto a_xor_b = builder.CreateXor(a, b);
3815 auto same_sign =
3816 builder.CreateICmpSGT(a_xor_b, Constant::getAllOnesValue(ty));
3817 auto different_sign = builder.CreateNot(same_sign);
3818 auto hi_eq_0 = builder.CreateICmpEQ(mul_hi, Constant::getNullValue(ty));
3819 auto hi_ne_0 = builder.CreateNot(hi_eq_0);
3820 auto lo_ge_max = builder.CreateICmpUGE(mul_lo, max);
3821 auto c_gt_0 = builder.CreateICmpSGT(c, Constant::getNullValue(ty));
3822 auto c_lt_0 = builder.CreateICmpSLT(c, Constant::getNullValue(ty));
3823 auto add_gt_max = builder.CreateICmpUGT(add, max);
3824 auto hi_eq_m1 =
3825 builder.CreateICmpEQ(mul_hi, Constant::getAllOnesValue(ty));
3826 auto hi_ne_m1 = builder.CreateNot(hi_eq_m1);
3827 auto lo_le_max_plus_1 = builder.CreateICmpULE(mul_lo, max_plus_1);
3828 auto max_sub_lo = builder.CreateSub(max, mul_lo);
3829 auto c_lt_max_sub_lo = builder.CreateICmpULT(c, max_sub_lo);
3830
3831 // Equivalent to:
3832 // if (((x < 0) == (y < 0)) && mul_hi != 0)
3833 // return MAX
3834 // if (mul_hi == 0 && mul_lo >= MAX && (z > 0 || add > MAX))
3835 // return MAX
3836 // if (((x < 0) != (y < 0)) && mul_hi != -1)
3837 // return MIN
3838 // if (hi == -1 && mul_lo <= (MAX + 1) && (z < 0 || z < (MAX - mul_lo))
3839 // return MIN
3840 // return add
3841 auto max_clamp_1 = builder.CreateAnd(same_sign, hi_ne_0);
3842 auto max_clamp_2 = builder.CreateOr(c_gt_0, add_gt_max);
3843 auto tmp = builder.CreateAnd(hi_eq_0, lo_ge_max);
3844 max_clamp_2 = builder.CreateAnd(tmp, max_clamp_2);
3845 auto max_clamp = builder.CreateOr(max_clamp_1, max_clamp_2);
3846 auto min_clamp_1 = builder.CreateAnd(different_sign, hi_ne_m1);
3847 auto min_clamp_2 = builder.CreateOr(c_lt_0, c_lt_max_sub_lo);
3848 tmp = builder.CreateAnd(hi_eq_m1, lo_le_max_plus_1);
3849 min_clamp_2 = builder.CreateAnd(tmp, min_clamp_2);
3850 auto min_clamp = builder.CreateOr(min_clamp_1, min_clamp_2);
3851 auto sel = builder.CreateSelect(min_clamp, min, add);
3852 return builder.CreateSelect(max_clamp, max, sel);
3853 }
3854 } else {
3855 // {lo, hi} = mul_extended(a, b)
3856 // {add, carry} = add_carry(lo, c)
3857 // cmp = (mul_hi | carry) == 0
3858 // mad_sat = cmp ? add : MAX
3859 auto struct_ty = GetPairStruct(ty);
Romaric Jodinc507f312022-04-08 19:09:45 +02003860 auto mul_ext = InsertOpMulExtended(Call, a, b, false);
alan-baker6b9d1ee2020-11-03 23:11:32 -05003861 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3862 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3863 auto add_carry =
3864 InsertSPIRVOp(Call, spv::OpIAddCarry, {Attribute::ReadNone},
3865 struct_ty, {mul_lo, c});
3866 auto add = builder.CreateExtractValue(add_carry, {0});
3867 auto carry = builder.CreateExtractValue(add_carry, {1});
3868 auto or_value = builder.CreateOr(mul_hi, carry);
3869 auto cmp = builder.CreateICmpEQ(or_value, Constant::getNullValue(ty));
3870 return builder.CreateSelect(cmp, add, Constant::getAllOnesValue(ty));
3871 }
3872 });
3873}
alan-baker15106572020-11-06 15:08:10 -05003874
3875bool ReplaceOpenCLBuiltinPass::replaceOrdered(Function &F, bool is_ordered) {
3876 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3877 return false;
3878
3879 if (F.getFunctionType()->getNumParams() != 2)
3880 return false;
3881
3882 if (F.getFunctionType()->getParamType(0) !=
3883 F.getFunctionType()->getParamType(1)) {
3884 return false;
3885 }
3886
3887 switch (F.getFunctionType()->getParamType(0)->getScalarType()->getTypeID()) {
3888 case Type::FloatTyID:
3889 case Type::HalfTyID:
3890 case Type::DoubleTyID:
3891 break;
3892 default:
3893 return false;
3894 }
3895
3896 // Scalar versions all return an int, while vector versions return a vector
3897 // of an equally sized integer types (e.g. short, int or long).
3898 if (isa<VectorType>(F.getReturnType())) {
3899 if (F.getReturnType()->getScalarSizeInBits() !=
3900 F.getFunctionType()->getParamType(0)->getScalarSizeInBits()) {
3901 return false;
3902 }
3903 } else {
3904 if (F.getReturnType()->getScalarSizeInBits() != 32)
3905 return false;
3906 }
3907
3908 return replaceCallsWithValue(F, [is_ordered](CallInst *Call) {
3909 // Replace with a floating point [un]ordered comparison followed by an
3910 // extension.
3911 auto x = Call->getArgOperand(0);
3912 auto y = Call->getArgOperand(1);
3913 IRBuilder<> builder(Call);
3914 Value *tmp = nullptr;
3915 if (is_ordered) {
3916 // This leads to a slight inefficiency in the SPIR-V that is easy for
3917 // drivers to optimize where the SPIR-V for the comparison and the
3918 // extension could be fused to drop the inversion of the OpIsNan.
3919 tmp = builder.CreateFCmpORD(x, y);
3920 } else {
3921 tmp = builder.CreateFCmpUNO(x, y);
3922 }
3923 // OpenCL CTS requires that vector versions use sign extension, but scalar
3924 // versions use zero extension.
3925 if (isa<VectorType>(Call->getType()))
3926 return builder.CreateSExt(tmp, Call->getType());
3927 return builder.CreateZExt(tmp, Call->getType());
3928 });
3929}
alan-baker497920b2020-11-09 16:41:36 -05003930
3931bool ReplaceOpenCLBuiltinPass::replaceIsNormal(Function &F) {
3932 return replaceCallsWithValue(F, [this](CallInst *Call) {
3933 auto ty = Call->getType();
3934 auto x = Call->getArgOperand(0);
3935 unsigned width = x->getType()->getScalarSizeInBits();
3936 Type *int_ty = IntegerType::get(Call->getContext(), width);
3937 uint64_t abs_mask = 0x7fffffff;
3938 uint64_t exp_mask = 0x7f800000;
3939 uint64_t min_mask = 0x00800000;
3940 if (width == 16) {
3941 abs_mask = 0x7fff;
3942 exp_mask = 0x7c00;
3943 min_mask = 0x0400;
3944 } else if (width == 64) {
3945 abs_mask = 0x7fffffffffffffff;
3946 exp_mask = 0x7ff0000000000000;
3947 min_mask = 0x0010000000000000;
3948 }
3949 Constant *abs_const = ConstantInt::get(int_ty, APInt(width, abs_mask));
3950 Constant *exp_const = ConstantInt::get(int_ty, APInt(width, exp_mask));
3951 Constant *min_const = ConstantInt::get(int_ty, APInt(width, min_mask));
3952 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3953 int_ty = VectorType::get(int_ty, vec_ty->getElementCount());
3954 abs_const =
3955 ConstantVector::getSplat(vec_ty->getElementCount(), abs_const);
3956 exp_const =
3957 ConstantVector::getSplat(vec_ty->getElementCount(), exp_const);
3958 min_const =
3959 ConstantVector::getSplat(vec_ty->getElementCount(), min_const);
3960 }
3961 // Drop the sign bit and then check that the number is between
3962 // (exclusive) the min and max exponent values for the bit width.
3963 IRBuilder<> builder(Call);
3964 auto bitcast = builder.CreateBitCast(x, int_ty);
3965 auto abs = builder.CreateAnd(bitcast, abs_const);
3966 auto lt = builder.CreateICmpULT(abs, exp_const);
3967 auto ge = builder.CreateICmpUGE(abs, min_const);
3968 auto tmp = builder.CreateAnd(lt, ge);
3969 // OpenCL CTS requires that vector versions use sign extension, but scalar
3970 // versions use zero extension.
3971 if (isa<VectorType>(ty))
3972 return builder.CreateSExt(tmp, ty);
3973 return builder.CreateZExt(tmp, ty);
3974 });
3975}
alan-bakere0406e72020-11-10 12:32:04 -05003976
3977bool ReplaceOpenCLBuiltinPass::replaceFDim(Function &F) {
3978 return replaceCallsWithValue(F, [](CallInst *Call) {
3979 const auto x = Call->getArgOperand(0);
3980 const auto y = Call->getArgOperand(1);
3981 IRBuilder<> builder(Call);
3982 auto sub = builder.CreateFSub(x, y);
3983 auto cmp = builder.CreateFCmpUGT(x, y);
3984 return builder.CreateSelect(cmp, sub,
3985 Constant::getNullValue(Call->getType()));
3986 });
3987}
alan-baker3e0de472020-12-08 15:57:17 -05003988
3989bool ReplaceOpenCLBuiltinPass::replaceRound(Function &F) {
3990 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3991 const auto x = Call->getArgOperand(0);
3992 const double c_halfway = 0.5;
3993 auto halfway = ConstantFP::get(Call->getType(), c_halfway);
3994
3995 const auto clspv_fract_name =
3996 Builtins::GetMangledFunctionName("clspv.fract", F.getFunctionType());
3997 Function *clspv_fract_fn = F.getParent()->getFunction(clspv_fract_name);
3998 if (!clspv_fract_fn) {
3999 // Make the clspv_fract function.
4000 clspv_fract_fn = cast<Function>(
4001 F.getParent()
4002 ->getOrInsertFunction(clspv_fract_name, F.getFunctionType())
4003 .getCallee());
4004 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
4005 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
4006 }
4007
4008 auto ceil = Intrinsic::getDeclaration(F.getParent(), Intrinsic::ceil,
4009 Call->getType());
4010 auto floor = Intrinsic::getDeclaration(F.getParent(), Intrinsic::floor,
4011 Call->getType());
4012 auto fabs = Intrinsic::getDeclaration(F.getParent(), Intrinsic::fabs,
4013 Call->getType());
4014 auto copysign = Intrinsic::getDeclaration(
4015 F.getParent(), Intrinsic::copysign, {Call->getType(), Call->getType()});
4016
4017 IRBuilder<> builder(Call);
4018
4019 auto fabs_call = builder.CreateCall(F.getFunctionType(), fabs, {x});
4020 auto ceil_call = builder.CreateCall(F.getFunctionType(), ceil, {fabs_call});
4021 auto floor_call =
4022 builder.CreateCall(F.getFunctionType(), floor, {fabs_call});
4023 auto fract_call =
4024 builder.CreateCall(F.getFunctionType(), clspv_fract_fn, {fabs_call});
4025 auto cmp = builder.CreateFCmpOGE(fract_call, halfway);
4026 auto sel = builder.CreateSelect(cmp, ceil_call, floor_call);
4027 return builder.CreateCall(copysign->getFunctionType(), copysign, {sel, x});
4028 });
4029}
4030
4031bool ReplaceOpenCLBuiltinPass::replaceTrigPi(Function &F,
4032 Builtins::BuiltinType type) {
4033 return replaceCallsWithValue(F, [&F, type](CallInst *Call) -> Value * {
4034 const auto x = Call->getArgOperand(0);
4035 const double k_pi = 0x1.921fb54442d18p+1;
4036 Constant *pi = ConstantFP::get(x->getType(), k_pi);
4037
4038 IRBuilder<> builder(Call);
4039 auto mul = builder.CreateFMul(x, pi);
4040 switch (type) {
4041 case Builtins::kSinpi: {
4042 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
4043 x->getType());
4044 return builder.CreateCall(func->getFunctionType(), func, {mul});
4045 }
4046 case Builtins::kCospi: {
4047 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
4048 x->getType());
4049 return builder.CreateCall(func->getFunctionType(), func, {mul});
4050 }
4051 case Builtins::kTanpi: {
4052 auto sin = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
4053 x->getType());
4054 auto sin_call = builder.CreateCall(sin->getFunctionType(), sin, {mul});
4055 auto cos = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
4056 x->getType());
4057 auto cos_call = builder.CreateCall(cos->getFunctionType(), cos, {mul});
4058 return builder.CreateFDiv(sin_call, cos_call);
4059 }
4060 default:
4061 llvm_unreachable("unexpected builtin");
4062 break;
4063 }
4064 return nullptr;
4065 });
4066}
alan-baker8b968112020-12-15 15:53:29 -05004067
4068bool ReplaceOpenCLBuiltinPass::replaceSincos(Function &F) {
4069 return replaceCallsWithValue(F, [&F](CallInst *Call) {
4070 auto sin_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
4071 Call->getType());
4072 auto cos_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
4073 Call->getType());
4074
4075 IRBuilder<> builder(Call);
4076 auto sin = builder.CreateCall(sin_func->getFunctionType(), sin_func,
4077 {Call->getArgOperand(0)});
4078 auto cos = builder.CreateCall(cos_func->getFunctionType(), cos_func,
4079 {Call->getArgOperand(0)});
4080 builder.CreateStore(cos, Call->getArgOperand(1));
4081 return sin;
4082 });
4083}
4084
4085bool ReplaceOpenCLBuiltinPass::replaceExpm1(Function &F) {
4086 return replaceCallsWithValue(F, [&F](CallInst *Call) {
4087 auto exp_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::exp,
4088 Call->getType());
4089
4090 IRBuilder<> builder(Call);
4091 auto exp = builder.CreateCall(exp_func->getFunctionType(), exp_func,
4092 {Call->getArgOperand(0)});
4093 return builder.CreateFSub(exp, ConstantFP::get(Call->getType(), 1.0));
4094 });
4095}
4096
4097bool ReplaceOpenCLBuiltinPass::replacePown(Function &F) {
4098 return replaceCallsWithValue(F, [&F](CallInst *Call) {
4099 auto pow_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::pow,
4100 Call->getType());
4101
4102 IRBuilder<> builder(Call);
4103 auto conv = builder.CreateSIToFP(Call->getArgOperand(1), Call->getType());
4104 return builder.CreateCall(pow_func->getFunctionType(), pow_func,
4105 {Call->getArgOperand(0), conv});
4106 });
4107}