blob: 3c902ee77b72cca33eacd2a5d8ed1b88316680db [file] [log] [blame]
alan-bakerc4579bb2020-04-29 14:15:50 -04001// Copyright 2020 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "llvm/IR/CallingConv.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/Module.h"
19#include "llvm/Pass.h"
alan-baker6a3930b2020-05-21 10:09:11 -040020#include "llvm/Support/MathExtras.h"
alan-bakerc4579bb2020-04-29 14:15:50 -040021
22#include "spirv/unified1/spirv.hpp"
23
24#include "clspv/Option.h"
25
26#include "ArgKind.h"
27#include "Constants.h"
28#include "Layout.h"
29#include "Passes.h"
30#include "PushConstant.h"
31
32#define DEBUG_TYPE "autopodargs"
33
34using namespace llvm;
35
36namespace {
37class AutoPodArgsPass : public ModulePass {
38public:
39 static char ID;
40 AutoPodArgsPass() : ModulePass(ID) {}
41
42 bool runOnModule(Module &M) override;
43
44private:
45 // Decides the pod args implementation for each kernel individually.
46 void runOnFunction(Function &F);
47
48 // Makes all kernels use |impl| for pod args.
49 void AnnotateAllKernels(Module &M, clspv::PodArgImpl impl);
50
51 // Makes kernel |F| use |impl| as the pod arg implementation.
52 void AddMetadata(Function &F, clspv::PodArgImpl impl);
alan-baker7efcaaa2020-05-06 19:33:27 -040053
54 // Returns true if |type| contains an array. Does not look through pointers
55 // since we are dealing with pod args.
56 bool ContainsArrayType(Type *type) const;
57
58 // Returns true if |type| contains a |width|-bit integer or floating-point
59 // type. Does not look through pointer since we are dealing with pod args.
60 bool ContainsSizedType(Type *type, uint32_t width) const;
alan-bakerc4579bb2020-04-29 14:15:50 -040061};
62} // namespace
63
64char AutoPodArgsPass::ID = 0;
65INITIALIZE_PASS(AutoPodArgsPass, "AutoPodArgs",
66 "Mark pod arg implementation as metadata on kernels", false,
67 false)
68
69namespace clspv {
70ModulePass *createAutoPodArgsPass() { return new AutoPodArgsPass(); }
71} // namespace clspv
72
73bool AutoPodArgsPass::runOnModule(Module &M) {
74 if (clspv::Option::PodArgsInUniformBuffer()) {
75 AnnotateAllKernels(M, clspv::PodArgImpl::kUBO);
76 return true;
77 } else if (clspv::Option::PodArgsInPushConstants()) {
78 AnnotateAllKernels(M, clspv::PodArgImpl::kPushConstant);
79 return true;
80 }
81
82 for (auto &F : M) {
83 if (F.isDeclaration() || F.getCallingConv() != CallingConv::SPIR_KERNEL)
84 continue;
85
86 runOnFunction(F);
87 }
88
89 return true;
90}
91
92void AutoPodArgsPass::runOnFunction(Function &F) {
93 auto &M = *F.getParent();
94 const auto &DL = M.getDataLayout();
95 SmallVector<Type *, 8> pod_types;
96 bool satisfies_ubo = true;
97 for (auto &Arg : F.args()) {
98 auto arg_type = Arg.getType();
alan-baker6a3930b2020-05-21 10:09:11 -040099 if (Arg.hasByValAttr()) {
100 // Byval arguments end up as POD arguments.
101 arg_type = arg_type->getPointerElementType();
102 }
103
alan-bakerc4579bb2020-04-29 14:15:50 -0400104 if (isa<PointerType>(arg_type))
105 continue;
106
107 pod_types.push_back(arg_type);
108
alan-baker7efcaaa2020-05-06 19:33:27 -0400109 // If the type contains an 8- or 16-bit type UBO storage must be supported.
110 satisfies_ubo &= !ContainsSizedType(arg_type, 16) ||
111 clspv::Option::Supports16BitStorageClass(
112 clspv::Option::StorageClass::kUBO);
113 satisfies_ubo &= !ContainsSizedType(arg_type, 8) ||
114 clspv::Option::Supports8BitStorageClass(
115 clspv::Option::StorageClass::kUBO);
alan-bakerc4579bb2020-04-29 14:15:50 -0400116 if (auto struct_ty = dyn_cast<StructType>(arg_type)) {
117 // Only check individual arguments as clustering will fix the layout with
118 // padding if necessary.
119 satisfies_ubo &=
120 clspv::isValidExplicitLayout(M, struct_ty, spv::StorageClassUniform);
121 }
122 }
123
124 // Per-kernel push constant interface requires:
125 // 1. Clustered pod args.
126 // 2. No global push constants.
127 // 3. Args must fit in push constant size limit.
alan-baker7efcaaa2020-05-06 19:33:27 -0400128 // 4. No arrays.
129 // 5. If 16-bit types are used, 16-bit push constants are supported.
130 // 6. If 8-bit types are used, 8-bit push constants are supported.
alan-bakerc4579bb2020-04-29 14:15:50 -0400131 const auto pod_struct_ty = StructType::get(M.getContext(), pod_types);
alan-baker7efcaaa2020-05-06 19:33:27 -0400132 const bool contains_array = ContainsArrayType(pod_struct_ty);
133 const bool support_16bit_pc = !ContainsSizedType(pod_struct_ty, 16) ||
134 clspv::Option::Supports16BitStorageClass(
135 clspv::Option::StorageClass::kPushConstant);
136 const bool support_8bit_pc = !ContainsSizedType(pod_struct_ty, 8) ||
137 clspv::Option::Supports8BitStorageClass(
138 clspv::Option::StorageClass::kPushConstant);
alan-baker6a3930b2020-05-21 10:09:11 -0400139 // Align to 4 to use i32s.
140 const uint64_t pod_struct_size =
141 alignTo(DL.getTypeStoreSize(pod_struct_ty).getKnownMinSize(), 4);
alan-baker7efcaaa2020-05-06 19:33:27 -0400142 const bool fits_push_constant =
alan-baker6a3930b2020-05-21 10:09:11 -0400143 pod_struct_size <= clspv::Option::MaxPushConstantsSize();
alan-bakerc4579bb2020-04-29 14:15:50 -0400144 const bool satisfies_push_constant =
alan-baker7efcaaa2020-05-06 19:33:27 -0400145 clspv::Option::ClusterPodKernelArgs() && support_16bit_pc &&
146 support_8bit_pc && fits_push_constant &&
147 !clspv::UsesGlobalPushConstants(M) && !contains_array;
alan-bakerc4579bb2020-04-29 14:15:50 -0400148
alan-baker6a3930b2020-05-21 10:09:11 -0400149 // Global type-mangled push constants require:
150 // 1. Clustered pod args.
151 // 2. Args and global push constants must fit size limit.
152 // 3. Size / 4 must be less than max struct members.
153 // (In order to satisfy SPIR-V limit).
154 //
155 // Note: There is a potential tradeoff in representations. We could use
156 // either a packed or unpacked struct. A packed struct would allow more
157 // arguments to fit in the size limit, but potentially results in more
158 // instructions to undo the type-mangling. Currently we opt for an unpacked
159 // struct for two reasons:
160 // 1. The offsets of individual members make more sense at a higher level and
161 // are consistent with other clustered implementations.
162 // 2. The type demangling code is simpler (but may result in wasted space).
163 //
164 // TODO: We should generate a better pod struct by default (e.g. { i32, i8 }
165 // is preferable to { i8, i32 }). Also we could support packed structs as
166 // fallback to fit arguments depending on the performance cost.
167 const auto global_size = clspv::GlobalPushConstantsSize(M) + pod_struct_size;
168 const auto fits_global_size =
169 global_size <= clspv::Option::MaxPushConstantsSize();
170 // Leave some extra room for other push constants.
171 const uint64_t max_struct_members = 0x3fff - 64;
172 const auto enough_members = (global_size / 4) < max_struct_members;
173 const bool satisfies_global_push_constant =
174 clspv::Option::ClusterPodKernelArgs() && fits_global_size &&
175 enough_members;
176
alan-bakerc4579bb2020-04-29 14:15:50 -0400177 // Priority:
178 // 1. Per-kernel push constant interface.
alan-baker6a3930b2020-05-21 10:09:11 -0400179 // 2. Global type mangled push constant interface.
alan-bakerc4579bb2020-04-29 14:15:50 -0400180 // 3. UBO
181 // 4. SSBO
182 clspv::PodArgImpl impl = clspv::PodArgImpl::kSSBO;
183 if (satisfies_push_constant) {
184 impl = clspv::PodArgImpl::kPushConstant;
alan-baker6a3930b2020-05-21 10:09:11 -0400185 } else if (satisfies_global_push_constant) {
186 impl = clspv::PodArgImpl::kGlobalPushConstant;
alan-bakerc4579bb2020-04-29 14:15:50 -0400187 } else if (satisfies_ubo) {
188 impl = clspv::PodArgImpl::kUBO;
189 }
190 AddMetadata(F, impl);
191}
192
193void AutoPodArgsPass::AnnotateAllKernels(Module &M, clspv::PodArgImpl impl) {
194 for (auto &F : M) {
195 if (F.isDeclaration() || F.getCallingConv() != CallingConv::SPIR_KERNEL)
196 continue;
197
198 AddMetadata(F, impl);
199 }
200}
201
202void AutoPodArgsPass::AddMetadata(Function &F, clspv::PodArgImpl impl) {
203 auto md = MDTuple::get(
204 F.getContext(),
205 ConstantAsMetadata::get(ConstantInt::get(
206 IntegerType::get(F.getContext(), 32), static_cast<uint32_t>(impl))));
207 F.setMetadata(clspv::PodArgsImplMetadataName(), md);
208}
alan-baker7efcaaa2020-05-06 19:33:27 -0400209
210bool AutoPodArgsPass::ContainsArrayType(Type *type) const {
211 if (isa<ArrayType>(type)) {
212 return true;
213 } else if (auto struct_ty = dyn_cast<StructType>(type)) {
214 for (auto sub_type : struct_ty->elements()) {
215 if (ContainsArrayType(sub_type))
216 return true;
217 }
218 }
219
220 return false;
221}
222
223bool AutoPodArgsPass::ContainsSizedType(Type *type, uint32_t width) const {
224 if (auto int_ty = dyn_cast<IntegerType>(type)) {
225 return int_ty->getBitWidth() == width;
226 } else if (type->isHalfTy()) {
227 return width == 16;
228 } else if (auto array_ty = dyn_cast<ArrayType>(type)) {
229 return ContainsSizedType(array_ty->getElementType(), width);
230 } else if (auto vec_ty = dyn_cast<VectorType>(type)) {
231 return ContainsSizedType(vec_ty->getElementType(), width);
232 } else if (auto struct_ty = dyn_cast<StructType>(type)) {
233 for (auto sub_type : struct_ty->elements()) {
234 if (ContainsSizedType(sub_type, width))
235 return true;
236 }
237 }
238
239 return false;
240}