blob: ad7f7b64310472e552546de6409faeec92af8e91 [file] [log] [blame]
David Netoc5fb5242018-07-30 13:28:31 -04001// Copyright 2018 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <climits>
16#include <map>
17#include <set>
18#include <utility>
19#include <vector>
20
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/UniqueVector.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DerivedTypes.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/Module.h"
30#include "llvm/Pass.h"
31#include "llvm/Support/raw_ostream.h"
32
33#include "clspv/Option.h"
David Netoc5fb5242018-07-30 13:28:31 -040034
35#include "ArgKind.h"
Alan Baker202c8c72018-08-13 13:47:44 -040036#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040037#include "Passes.h"
David Netoc5fb5242018-07-30 13:28:31 -040038
39using namespace llvm;
40
41#define DEBUG_TYPE "directresourceaccess"
42
43namespace {
44
45cl::opt<bool> ShowDRA("show-dra", cl::init(false), cl::Hidden,
46 cl::desc("Show direct resource access details"));
47
48using SamplerMapType = llvm::ArrayRef<std::pair<unsigned, std::string>>;
49
50class DirectResourceAccessPass final : public ModulePass {
51public:
52 static char ID;
53 DirectResourceAccessPass() : ModulePass(ID) {}
54 bool runOnModule(Module &M) override;
55
56private:
57 // Return the functions reachable from entry point functions, where
58 // callers appear before callees. OpenCL C does not permit recursion
59 // or function or pointers, so this is always well defined. The ordering
60 // should be reproducible from one run to the next.
61 UniqueVector<Function *> CallGraphOrderedFunctions(Module &);
62
63 // For each kernel argument that will map to a resource variable (descriptor),
64 // try to rewrite the uses of the argument as a direct access of the resource.
65 // We can only do this if all the callees of the function use the same
66 // resource access value for that argument. Returns true if the module
67 // changed.
68 bool RewriteResourceAccesses(Function *fn);
69
70 // Rewrite uses of this resrouce-based arg if all the callers pass in the
71 // same resource access. Returns true if the module changed.
72 bool RewriteAccessesForArg(Function *fn, int arg_index, Argument &arg);
73};
Diego Novilloa4c44fa2019-04-11 10:56:15 -040074
David Netoc5fb5242018-07-30 13:28:31 -040075} // namespace
76
77char DirectResourceAccessPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -040078INITIALIZE_PASS(DirectResourceAccessPass, "DirectResourceAccessPass",
79 "Direct resource access", false, false)
David Netoc5fb5242018-07-30 13:28:31 -040080
81namespace clspv {
82ModulePass *createDirectResourceAccessPass() {
83 return new DirectResourceAccessPass();
84}
85} // namespace clspv
86
87namespace {
88bool DirectResourceAccessPass::runOnModule(Module &M) {
89 bool Changed = false;
90
91 if (clspv::Option::DirectResourceAccess()) {
92 auto ordered_functions = CallGraphOrderedFunctions(M);
93 for (auto *fn : ordered_functions) {
94 Changed |= RewriteResourceAccesses(fn);
95 }
96 }
97
98 return Changed;
99}
100
101UniqueVector<Function *>
102DirectResourceAccessPass::CallGraphOrderedFunctions(Module &M) {
103 // Use a topological sort.
104
105 // Make an ordered list of all functions having bodies, with kernel entry
106 // points listed first.
107 UniqueVector<Function *> functions;
108 SmallVector<Function *, 10> entry_points;
109 for (Function &F : M) {
110 if (F.isDeclaration()) {
111 continue;
112 }
113 if (F.getCallingConv() == CallingConv::SPIR_KERNEL) {
114 functions.insert(&F);
115 entry_points.push_back(&F);
116 }
117 }
118 // Add the remaining functions.
119 for (Function &F : M) {
120 if (F.isDeclaration()) {
121 continue;
122 }
123 if (F.getCallingConv() != CallingConv::SPIR_KERNEL) {
124 functions.insert(&F);
125 }
126 }
127
128 // This will be a complete set of reveresed edges, i.e. with all pairs
129 // of (callee, caller).
130 using Edge = std::pair<unsigned, unsigned>;
131 auto make_edge = [&functions](Function *callee, Function *caller) {
132 return std::pair<unsigned, unsigned>{functions.idFor(callee),
133 functions.idFor(caller)};
134 };
135 std::set<Edge> reverse_edges;
136 // Map each function to the functions it calls, and populate |reverse_edges|.
137 std::map<Function *, SmallVector<Function *, 3>> calls_functions;
138 for (Function *callee : functions) {
139 for (auto &use : callee->uses()) {
140 if (auto *call = dyn_cast<CallInst>(use.getUser())) {
141 Function *caller = call->getParent()->getParent();
142 calls_functions[caller].push_back(callee);
143 reverse_edges.insert(make_edge(callee, caller));
144 }
145 }
146 }
147 // Sort the callees in module-order. This helps us produce a deterministic
148 // result.
149 for (auto &pair : calls_functions) {
150 auto &callees = pair.second;
151 std::sort(callees.begin(), callees.end(),
152 [&functions](Function *lhs, Function *rhs) {
153 return functions.idFor(lhs) < functions.idFor(rhs);
154 });
155 }
156
157 // Use Kahn's algorithm for topoological sort.
158 UniqueVector<Function *> result;
159 SmallVector<Function *, 10> work_list(entry_points.begin(),
160 entry_points.end());
161 while (!work_list.empty()) {
162 Function *caller = work_list.back();
163 work_list.pop_back();
164 result.insert(caller);
165 auto &callees = calls_functions[caller];
166 for (auto *callee : callees) {
167 reverse_edges.erase(make_edge(callee, caller));
168 auto lower_bound = reverse_edges.lower_bound(make_edge(callee, nullptr));
169 if (lower_bound == reverse_edges.end() ||
170 lower_bound->first != functions.idFor(callee)) {
171 // Callee has no other unvisited callers.
172 work_list.push_back(callee);
173 }
174 }
175 }
176 // If reverse_edges is not empty then there was a cycle. But we don't care
177 // about that erroneous case.
178
179 if (ShowDRA) {
180 outs() << "DRA: Ordered functions:\n";
181 for (Function *fun : result) {
182 outs() << "DRA: " << fun->getName() << "\n";
183 }
184 }
185 return result;
186}
187
188bool DirectResourceAccessPass::RewriteResourceAccesses(Function *fn) {
189 bool Changed = false;
190 int arg_index = 0;
191 for (Argument &arg : fn->args()) {
192 switch (clspv::GetArgKindForType(arg.getType())) {
193 case clspv::ArgKind::Buffer:
Alan Bakerfcda9482018-10-02 17:09:59 -0400194 case clspv::ArgKind::BufferUBO:
David Netoc5fb5242018-07-30 13:28:31 -0400195 case clspv::ArgKind::ReadOnlyImage:
196 case clspv::ArgKind::WriteOnlyImage:
197 case clspv::ArgKind::Sampler:
Alan Baker202c8c72018-08-13 13:47:44 -0400198 case clspv::ArgKind::Local:
David Netoc5fb5242018-07-30 13:28:31 -0400199 Changed |= RewriteAccessesForArg(fn, arg_index, arg);
200 break;
David Neto3a0df832018-08-03 14:35:42 -0400201 default:
202 // Should not happen
203 break;
David Netoc5fb5242018-07-30 13:28:31 -0400204 }
205 arg_index++;
206 }
207 return Changed;
208}
209
210bool DirectResourceAccessPass::RewriteAccessesForArg(Function *fn,
211 int arg_index,
212 Argument &arg) {
213 bool Changed = false;
214 if (fn->use_empty()) {
215 return false;
216 }
217
218 // We can convert a parameter to a direct resource access if it is
219 // either a direct call to a clspv.resource.var.* or if it a GEP of
220 // such a thing (where the GEP can only have zero indices).
221 struct ParamInfo {
Alan Baker202c8c72018-08-13 13:47:44 -0400222 // The base value. It is either a global variable or a resource-access
223 // builtin function. (@clspv.resource.var.* or @clspv.local.var.*)
224 Value *base;
David Netoc5fb5242018-07-30 13:28:31 -0400225 // The descriptor set.
226 uint32_t set;
227 // The binding.
228 uint32_t binding;
229 // If the parameter is a GEP, then this is the number of zero-indices
230 // the GEP used.
231 unsigned num_gep_zeroes;
232 // An example call fitting
233 CallInst *sample_call;
234 };
235 // The common valid parameter info across all the callers seen soo far.
236
237 bool seen_one = false;
238 ParamInfo common;
239 // Tries to merge the given parameter info into |common|. If it is the first
240 // time we've tried, then save it. Returns true if there is no conflict.
241 auto merge_param_info = [&seen_one, &common](const ParamInfo &pi) {
242 if (!seen_one) {
243 common = pi;
244 seen_one = true;
245 return true;
246 }
Alan Baker202c8c72018-08-13 13:47:44 -0400247 return pi.base == common.base && pi.set == common.set &&
David Netoc5fb5242018-07-30 13:28:31 -0400248 pi.binding == common.binding &&
249 pi.num_gep_zeroes == common.num_gep_zeroes;
250 };
251
252 for (auto &use : fn->uses()) {
253 if (auto *caller = dyn_cast<CallInst>(use.getUser())) {
254 Value *value = caller->getArgOperand(arg_index);
255 // We care about two cases:
256 // - a direct call to clspv.resource.var.*
257 // - a GEP with only zero indices, where the base pointer is
258
259 // Unpack GEPs with zeros, if we can. Rewrite |value| as we go along.
260 unsigned num_gep_zeroes = 0;
David Neto2f450002018-08-01 16:13:03 -0400261 bool first_gep = true;
David Netoc5fb5242018-07-30 13:28:31 -0400262 for (auto *gep = dyn_cast<GetElementPtrInst>(value); gep;
263 gep = dyn_cast<GetElementPtrInst>(value)) {
264 if (!gep->hasAllZeroIndices()) {
265 return false;
266 }
David Neto2f450002018-08-01 16:13:03 -0400267 // If not the first GEP, then ignore the "element" index (which I call
268 // "slide") since that will be combined with the last index of the
269 // previous GEP.
270 num_gep_zeroes += gep->getNumIndices() + (first_gep ? 0 : -1);
David Netoc5fb5242018-07-30 13:28:31 -0400271 value = gep->getPointerOperand();
David Neto2f450002018-08-01 16:13:03 -0400272 first_gep = false;
David Netoc5fb5242018-07-30 13:28:31 -0400273 }
274 if (auto *call = dyn_cast<CallInst>(value)) {
275 // If the call is a call to a @clspv.resource.var.* function, then try
276 // to merge it, assuming the given number of GEP zero-indices so far.
277 if (call->getCalledFunction()->getName().startswith(
Alan Baker202c8c72018-08-13 13:47:44 -0400278 clspv::ResourceAccessorFunction())) {
David Netoc5fb5242018-07-30 13:28:31 -0400279 const auto set = uint32_t(
280 dyn_cast<ConstantInt>(call->getOperand(0))->getZExtValue());
281 const auto binding = uint32_t(
282 dyn_cast<ConstantInt>(call->getOperand(1))->getZExtValue());
283 if (!merge_param_info({call->getCalledFunction(), set, binding,
Alan Baker202c8c72018-08-13 13:47:44 -0400284 num_gep_zeroes, call}))
David Netoc5fb5242018-07-30 13:28:31 -0400285 return false;
Alan Baker202c8c72018-08-13 13:47:44 -0400286 } else if (call->getCalledFunction()->getName().startswith(
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400287 clspv::WorkgroupAccessorFunction())) {
Alan Baker202c8c72018-08-13 13:47:44 -0400288 const uint32_t spec_id = uint32_t(
289 dyn_cast<ConstantInt>(call->getOperand(0))->getZExtValue());
290 if (!merge_param_info({call->getCalledFunction(), spec_id, 0,
291 num_gep_zeroes, call}))
292 return false;
David Netoc5fb5242018-07-30 13:28:31 -0400293 } else {
294 // A call but not to a resource access builtin function.
295 return false;
296 }
Alan Baker202c8c72018-08-13 13:47:44 -0400297 } else if (isa<GlobalValue>(value)) {
298 if (!merge_param_info({value, 0, 0, num_gep_zeroes, nullptr}))
299 return false;
David Netoc5fb5242018-07-30 13:28:31 -0400300 } else {
301 // Not a call.
302 return false;
303 }
304 } else {
305 // There isn't enough commonality. Bail out without changing anything.
306 return false;
307 }
308 }
309 if (ShowDRA) {
310 if (seen_one) {
311 outs() << "DRA: Rewrite " << fn->getName() << " arg " << arg_index << " "
Alan Baker202c8c72018-08-13 13:47:44 -0400312 << arg.getName() << ": " << common.base->getName() << " ("
David Netoc5fb5242018-07-30 13:28:31 -0400313 << common.set << "," << common.binding
Alan Bakerfc6888e2018-08-20 20:54:33 -0400314 << ") zeroes: " << common.num_gep_zeroes << " sample-call ";
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400315 if (common.sample_call)
316 outs() << *common.sample_call << "\n";
317 else
318 outs() << "nullptr\n";
David Netoc5fb5242018-07-30 13:28:31 -0400319 }
320 }
321
322 // Now rewrite the argument, using the info in |common|.
323
324 Changed = true;
325 IRBuilder<> Builder(fn->getParent()->getContext());
326 auto *zero = Builder.getInt32(0);
327 Builder.SetInsertPoint(fn->getEntryBlock().getFirstNonPHI());
328
Alan Baker202c8c72018-08-13 13:47:44 -0400329 Value *replacement = common.base;
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400330 if (Function *function = dyn_cast<Function>(replacement)) {
Alan Baker202c8c72018-08-13 13:47:44 -0400331 // Create the call.
332 SmallVector<Value *, 8> args(common.sample_call->arg_begin(),
333 common.sample_call->arg_end());
334 replacement = Builder.CreateCall(function, args);
335 if (ShowDRA) {
336 outs() << "DRA: Replace: call " << *replacement << "\n";
337 }
David Netoc5fb5242018-07-30 13:28:31 -0400338 }
339 if (common.num_gep_zeroes) {
340 SmallVector<Value *, 3> zeroes;
341 for (unsigned i = 0; i < common.num_gep_zeroes; i++) {
342 zeroes.push_back(zero);
343 }
Alan Baker202c8c72018-08-13 13:47:44 -0400344 // Builder.CreateGEP is not used to avoid creating a GEPConstantExpr in the
345 // case of global variables.
346 replacement = GetElementPtrInst::Create(nullptr, replacement, zeroes);
347 Builder.Insert(cast<Instruction>(replacement));
David Netoc5fb5242018-07-30 13:28:31 -0400348 if (ShowDRA) {
349 outs() << "DRA: Replace: gep " << *replacement << "\n";
350 }
351 }
352 arg.replaceAllUsesWith(replacement);
353
354 return Changed;
355}
356
357} // namespace