Cluster pod kernel args: Inline the inner function by default
Use option -no-inline-pod to avoid inlining, i.e. get the old
behaviour. But it's deprecated.
diff --git a/lib/ClusterPodKernelArgumentsPass.cpp b/lib/ClusterPodKernelArgumentsPass.cpp
index dd0a4c2..230096d 100644
--- a/lib/ClusterPodKernelArgumentsPass.cpp
+++ b/lib/ClusterPodKernelArgumentsPass.cpp
@@ -34,7 +34,9 @@
#include <llvm/IR/Metadata.h>
#include <llvm/IR/Module.h>
#include <llvm/Pass.h>
+#include <llvm/Support/CommandLine.h>
#include <llvm/Support/raw_ostream.h>
+#include <llvm/Transforms/Utils/Cloning.h>
#include "ArgKind.h"
@@ -42,6 +44,12 @@
#define DEBUG_TYPE "clusterpodkernelargs"
+// TODO(dneto): Remove this after experimentation.
+static llvm::cl::opt<bool> no_inline_pod_fn(
+ "no-inline-pod-inner-function", llvm::cl::init(false),
+ llvm::cl::desc("DEPRECATED. Avoid inlining the inner function created by "
+ "clustering pod kernel args"));
+
namespace {
struct ClusterPodKernelArgumentsPass : public ModulePass {
static char ID;
@@ -81,6 +89,8 @@
}
}
+ SmallVector<CallInst*, 8> CallList;
+
for (Function* F : WorkList) {
Changed = true;
@@ -245,9 +255,17 @@
auto Call = Builder.CreateCall(F, CalleeArgs);
Call->setCallingConv(F->getCallingConv());
+ CallList.push_back(Call);
Builder.CreateRetVoid();
}
+ if (!no_inline_pod_fn) {
+ for (CallInst *C : CallList) {
+ InlineFunctionInfo info;
+ Changed |= InlineFunction(C, info);
+ }
+ }
+
return Changed;
}