[llvm] r336587 - AMDGPU: Force inlining if LDS global address is used
Vlad Tsyrklevich via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 9 17:51:33 PDT 2018
This change broke the sanitizer bots with the following failure:
/b/sanitizer-x86_64-linux-fast/build/llvm/tools/clang/test/CodeGen/backend-unsupported-error.ll:6:10:
error: expected string not found in input
; CHECK: error: test.c:2:20: in function bar i32 (): unsupported call to
function foo.2
^
<stdin>:1:1: note: scanning from here
'-fp32-denormals' is not a recognized feature for this target (ignoring
feature)
^
<stdin>:4:1: note: possible intended match here
error: test.c:3:20: in function bar i32 (): unsupported call to function bar
^
I have reverted it in r336623.
On Mon, Jul 9, 2018 at 12:27 PM Matt Arsenault via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: arsenm
> Date: Mon Jul 9 12:22:22 2018
> New Revision: 336587
>
> URL: http://llvm.org/viewvc/llvm-project?rev=336587&view=rev
> Log:
> AMDGPU: Force inlining if LDS global address is used
>
> These won't work for the forseeable future. These aren't allowed
> from OpenCL, but IPO optimizations can make them appear.
>
> Also directly set the attributes on functions, regardless
> of the linkage rather than cloning functions like before.
>
> Added:
> llvm/trunk/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
> Modified:
> llvm/trunk/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
> llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
> llvm/trunk/test/CodeGen/AMDGPU/early-inline.ll
> llvm/trunk/test/CodeGen/AMDGPU/stress-calls.ll
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp?rev=336587&r1=336586&r2=336587&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp Mon Jul 9
> 12:22:22 2018
> @@ -14,6 +14,9 @@
>
> //===----------------------------------------------------------------------===//
>
> #include "AMDGPU.h"
> +#include "AMDGPUTargetMachine.h"
> +#include "Utils/AMDGPUBaseInfo.h"
> +#include "llvm/ADT/SmallPtrSet.h"
> #include "llvm/IR/Module.h"
> #include "llvm/Transforms/Utils/Cloning.h"
>
> @@ -30,13 +33,18 @@ static cl::opt<bool> StressCalls(
> class AMDGPUAlwaysInline : public ModulePass {
> bool GlobalOpt;
>
> + void recursivelyVisitUsers(GlobalValue &GV,
> + SmallPtrSetImpl<Function *>
> &FuncsToAlwaysInline);
> public:
> static char ID;
>
> AMDGPUAlwaysInline(bool GlobalOpt = false) :
> ModulePass(ID), GlobalOpt(GlobalOpt) { }
> bool runOnModule(Module &M) override;
> - StringRef getPassName() const override { return "AMDGPU Always Inline
> Pass"; }
> +
> + void getAnalysisUsage(AnalysisUsage &AU) const override {
> + AU.setPreservesAll();
> + }
> };
>
> } // End anonymous namespace
> @@ -46,15 +54,53 @@ INITIALIZE_PASS(AMDGPUAlwaysInline, "amd
>
> char AMDGPUAlwaysInline::ID = 0;
>
> +void AMDGPUAlwaysInline::recursivelyVisitUsers(
> + GlobalValue &GV,
> + SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
> + SmallVector<User *, 16> Stack;
> +
> + SmallPtrSet<const Value *, 8> Visited;
> +
> + for (User *U : GV.users())
> + Stack.push_back(U);
> +
> + while (!Stack.empty()) {
> + User *U = Stack.pop_back_val();
> + if (!Visited.insert(U).second)
> + continue;
> +
> + if (Instruction *I = dyn_cast<Instruction>(U)) {
> + Function *F = I->getParent()->getParent();
> + if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
> + FuncsToAlwaysInline.insert(F);
> + Stack.push_back(F);
> + }
> +
> + // No need to look at further users, but we do need to inline any
> callers.
> + continue;
> + }
> +
> + for (User *UU : U->users())
> + Stack.push_back(UU);
> + }
> +}
> +
> bool AMDGPUAlwaysInline::runOnModule(Module &M) {
> + AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M);
> +
> std::vector<GlobalAlias*> AliasesToRemove;
> - std::vector<Function *> FuncsToClone;
> +
> + SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
> + SmallPtrSet<Function *, 8> FuncsToNoInline;
>
> for (GlobalAlias &A : M.aliases()) {
> if (Function* F = dyn_cast<Function>(A.getAliasee())) {
> A.replaceAllUsesWith(F);
> AliasesToRemove.push_back(&A);
> }
> +
> + // FIXME: If the aliasee isn't a function, it's some kind of constant
> expr
> + // cast that won't be inlined through.
> }
>
> if (GlobalOpt) {
> @@ -63,31 +109,51 @@ bool AMDGPUAlwaysInline::runOnModule(Mod
> }
> }
>
> - auto NewAttr = StressCalls ? Attribute::NoInline :
> Attribute::AlwaysInline;
> - auto IncompatAttr
> - = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
> -
> - for (Function &F : M) {
> - if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
> - !F.hasFnAttribute(IncompatAttr))
> - FuncsToClone.push_back(&F);
> - }
> + // Always force inlining of any function that uses an LDS global
> address. This
> + // is something of a workaround because we don't have a way of
> supporting LDS
> + // objects defined in functions. LDS is always allocated by a kernel,
> and it
> + // is difficult to manage LDS usage if a function may be used by
> multiple
> + // kernels.
> + //
> + // OpenCL doesn't allow declaring LDS in non-kernels, so in practice
> this
> + // should only appear when IPO passes manages to move LDs defined in a
> kernel
> + // into a single user function.
> +
> + for (GlobalVariable &GV : M.globals()) {
> + // TODO: Region address
> + unsigned AS = GV.getType()->getAddressSpace();
> + if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS)
> + continue;
>
> - for (Function *F : FuncsToClone) {
> - ValueToValueMapTy VMap;
> - Function *NewFunc = CloneFunction(F, VMap);
> - NewFunc->setLinkage(GlobalValue::InternalLinkage);
> - F->replaceAllUsesWith(NewFunc);
> + recursivelyVisitUsers(GV, FuncsToAlwaysInline);
> }
>
> - for (Function &F : M) {
> - if (F.hasLocalLinkage() && !F.hasFnAttribute(IncompatAttr)) {
> - F.addFnAttr(NewAttr);
> + if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
> + auto IncompatAttr
> + = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
> +
> + for (Function &F : M) {
> + if (!F.isDeclaration() && !F.use_empty() &&
> + !F.hasFnAttribute(IncompatAttr)) {
> + if (StressCalls) {
> + if (!FuncsToAlwaysInline.count(&F))
> + FuncsToNoInline.insert(&F);
> + } else
> + FuncsToAlwaysInline.insert(&F);
> + }
> }
> }
> - return false;
> +
> + for (Function *F : FuncsToAlwaysInline)
> + F->addFnAttr(Attribute::AlwaysInline);
> +
> + for (Function *F : FuncsToNoInline)
> + F->addFnAttr(Attribute::NoInline);
> +
> + return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
> }
>
> ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
> return new AMDGPUAlwaysInline(GlobalOpt);
> }
> +
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=336587&r1=336586&r2=336587&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Jul 9
> 12:22:22 2018
> @@ -117,11 +117,12 @@ static cl::opt<bool, true> LateCFGStruct
> cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
> cl::Hidden);
>
> -static cl::opt<bool> EnableAMDGPUFunctionCalls(
> +static cl::opt<bool, true> EnableAMDGPUFunctionCalls(
> "amdgpu-function-calls",
> - cl::Hidden,
> cl::desc("Enable AMDGPU function call support"),
> - cl::init(false));
> + cl::location(AMDGPUTargetMachine::EnableFunctionCalls),
> + cl::init(false),
> + cl::Hidden);
>
> // Enable lib calls simplifications
> static cl::opt<bool> EnableLibCallSimplify(
> @@ -311,9 +312,10 @@ AMDGPUTargetMachine::AMDGPUTargetMachine
> initAsmInfo();
> }
>
> -AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
> -
> bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
> +bool AMDGPUTargetMachine::EnableFunctionCalls = false;
> +
> +AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
>
> StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
> Attribute GPUAttr = F.getFnAttribute("target-cpu");
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h?rev=336587&r1=336586&r2=336587&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h Mon Jul 9 12:22:22
> 2018
> @@ -41,6 +41,7 @@ protected:
>
> public:
> static bool EnableLateStructurizeCFG;
> + static bool EnableFunctionCalls;
>
> AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
> StringRef FS, TargetOptions Options,
>
> Modified: llvm/trunk/test/CodeGen/AMDGPU/early-inline.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/early-inline.ll?rev=336587&r1=336586&r2=336587&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/early-inline.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/early-inline.ll Mon Jul 9 12:22:22 2018
> @@ -16,10 +16,18 @@ entry:
> ; CHECK: mul i32
> ; CHECK-NOT: call i32
>
> -; CHECK: define i32 @c_alias
> define amdgpu_kernel void @caller(i32 %x) {
> entry:
> %res = call i32 @callee(i32 %x)
> store volatile i32 %res, i32 addrspace(1)* undef
> ret void
> }
> +
> +; CHECK-LABEL: @alias_caller(
> +; CHECK-NOT: call
> +define amdgpu_kernel void @alias_caller(i32 %x) {
> +entry:
> + %res = call i32 @c_alias(i32 %x)
> + store volatile i32 %res, i32 addrspace(1)* undef
> + ret void
> +}
>
> Added:
> llvm/trunk/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll?rev=336587&view=auto
>
> ==============================================================================
> ---
> llvm/trunk/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
> (added)
> +++
> llvm/trunk/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll Mon
> Jul 9 12:22:22 2018
> @@ -0,0 +1,77 @@
> +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls
> -amdgpu-always-inline %s | FileCheck -check-prefixes=CALLS-ENABLED,ALL %s
> +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls
> -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck
> -check-prefixes=STRESS-CALLS,ALL %s
> +
> +target datalayout =
> "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
> +
> + at lds0 = addrspace(3) global i32 undef, align 4
> + at lds1 = addrspace(3) global [512 x i32] undef, align 4
> + at nested.lds.address = addrspace(1) global i32 addrspace(3)* @lds0, align 4
> + at gds0 = addrspace(2) global i32 undef, align 4
> +
> + at alias.lds0 = alias i32, i32 addrspace(3)* @lds0
> + at lds.cycle = addrspace(3) global i32 ptrtoint (i32 addrspace(3)*
> @lds.cycle to i32), align 4
> +
> +
> +; ALL-LABEL: define i32 @load_lds_simple() #0 {
> +define i32 @load_lds_simple() {
> + %load = load i32, i32 addrspace(3)* @lds0, align 4
> + ret i32 %load
> +}
> +
> +; ALL-LABEL: define i32 @load_gds_simple() #0 {
> +define i32 @load_gds_simple() {
> + %load = load i32, i32 addrspace(2)* @gds0, align 4
> + ret i32 %load
> +}
> +
> +; ALL-LABEL: define i32 @load_lds_const_gep() #0 {
> +define i32 @load_lds_const_gep() {
> + %load = load i32, i32 addrspace(3)* getelementptr inbounds ([512 x
> i32], [512 x i32] addrspace(3)* @lds1, i64 0, i64 4), align 4
> + ret i32 %load
> +}
> +
> +; ALL-LABEL: define i32 @load_lds_var_gep(i32 %idx) #0 {
> +define i32 @load_lds_var_gep(i32 %idx) {
> + %gep = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)*
> @lds1, i32 0, i32 %idx
> + %load = load i32, i32 addrspace(3)* %gep, align 4
> + ret i32 %load
> +}
> +
> +; ALL-LABEL: define i32 addrspace(3)* @load_nested_address(i32 %idx) #0 {
> +define i32 addrspace(3)* @load_nested_address(i32 %idx) {
> + %load = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(1)*
> @nested.lds.address, align 4
> + ret i32 addrspace(3)* %load
> +}
> +
> +; ALL-LABEL: define i32 @load_lds_alias() #0 {
> +define i32 @load_lds_alias() {
> + %load = load i32, i32 addrspace(3)* @alias.lds0, align 4
> + ret i32 %load
> +}
> +
> +; ALL-LABEL: define i32 @load_lds_cycle() #0 {
> +define i32 @load_lds_cycle() {
> + %load = load i32, i32 addrspace(3)* @lds.cycle, align 4
> + ret i32 %load
> +}
> +
> +; ALL-LABEL: define i1 @icmp_lds_address() #0 {
> +define i1 @icmp_lds_address() {
> + ret i1 icmp eq (i32 addrspace(3)* @lds0, i32 addrspace(3)* null)
> +}
> +
> +; ALL-LABEL: define i32 @transitive_call() #0 {
> +define i32 @transitive_call() {
> + %call = call i32 @load_lds_simple()
> + ret i32 %call
> +}
> +
> +; ALL-LABEL: define i32 @recursive_call_lds(i32 %arg0) #0 {
> +define i32 @recursive_call_lds(i32 %arg0) {
> + %load = load i32, i32 addrspace(3)* @lds0, align 4
> + %add = add i32 %arg0, %load
> + %call = call i32 @recursive_call_lds(i32 %add)
> + ret i32 %call
> +}
> +
> +; ALL: attributes #0 = { alwaysinline }
>
> Modified: llvm/trunk/test/CodeGen/AMDGPU/stress-calls.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/stress-calls.ll?rev=336587&r1=336586&r2=336587&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/stress-calls.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/stress-calls.ll Mon Jul 9 12:22:22 2018
> @@ -1,4 +1,4 @@
> -; RUN: opt -S -amdgpu-stress-function-calls -amdgpu-always-inline %s |
> FileCheck %s
> +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls
> -amdgpu-always-inline %s | FileCheck %s
>
> ; CHECK: define internal fastcc i32 @alwaysinline_func(i32 %a) #0 {
> define internal fastcc i32 @alwaysinline_func(i32 %a) alwaysinline {
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180709/51498005/attachment.html>
More information about the llvm-commits
mailing list