[llvm] 1e0c540 - AMDGPU: Don't hard error on LDS globals in functions

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 11 12:34:18 PDT 2020


Author: Matt Arsenault
Date: 2020-03-11T15:34:11-04:00
New Revision: 1e0c540360e826d85e7f048d31895c4028e6499a

URL: https://github.com/llvm/llvm-project/commit/1e0c540360e826d85e7f048d31895c4028e6499a
DIFF: https://github.com/llvm/llvm-project/commit/1e0c540360e826d85e7f048d31895c4028e6499a.diff

LOG: AMDGPU: Don't hard error on LDS globals in functions

Instead, emit a trap and a warning. We force inlining of this
situation, so any function where this happens should be dead as
indirect or external calls are not yet supported. This should avoid
erroring on dead code.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
    llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index e72b3f4fde63..66801b7af542 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -71,7 +71,8 @@ class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
   static bool visitConstantExpr(const ConstantExpr *CE);
   static bool visitConstantExprsRecursively(
     const Constant *EntryC,
-    SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
+    SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc,
+    bool HasApertureRegs);
 };
 
 } // end anonymous namespace
@@ -93,6 +94,14 @@ static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
 }
 
+static bool isDSAddress(const Constant *C) {
+  const GlobalValue *GV = dyn_cast<GlobalValue>(C);
+  if (!GV)
+    return false;
+  unsigned AS = GV->getAddressSpace();
+  return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
+}
+
 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
@@ -104,7 +113,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
 
 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
   const Constant *EntryC,
-  SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
+  SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
+  bool IsFunc, bool HasApertureRegs) {
 
   if (!ConstantExprVisited.insert(EntryC).second)
     return false;
@@ -115,9 +125,13 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
   while (!Stack.empty()) {
     const Constant *C = Stack.pop_back_val();
 
+    // We need to trap on DS globals in non-entry functions.
+    if (IsFunc && isDSAddress(C))
+      return true;
+
     // Check this constant expression.
     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
-      if (visitConstantExpr(CE))
+      if (!HasApertureRegs && visitConstantExpr(CE))
         return true;
     }
 
@@ -301,11 +315,11 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
         }
       }
 
-      if (NeedQueuePtr || HasApertureRegs)
+      if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
         continue;
 
       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
-        if (castRequiresQueuePtr(ASC)) {
+        if (!HasApertureRegs && castRequiresQueuePtr(ASC)) {
           NeedQueuePtr = true;
           continue;
         }
@@ -316,7 +330,8 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
         if (!OpC)
           continue;
 
-        if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
+        if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
+                                          HasApertureRegs)) {
           NeedQueuePtr = true;
           break;
         }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 625db96467ad..7b659d3c61a3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1238,10 +1238,23 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
   if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
       G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
     if (!MFI->isEntryFunction()) {
+      SDLoc DL(Op);
       const Function &Fn = DAG.getMachineFunction().getFunction();
       DiagnosticInfoUnsupported BadLDSDecl(
-        Fn, "local memory global used by non-kernel function", SDLoc(Op).getDebugLoc());
+        Fn, "local memory global used by non-kernel function",
+        DL.getDebugLoc(), DS_Warning);
       DAG.getContext()->diagnose(BadLDSDecl);
+
+      // We currently don't have a way to correctly allocate LDS objects that
+      // aren't directly associated with a kernel. We do force inlining of
+      // functions that use local objects. However, if these dead functions are
+      // not eliminated, we don't want a compile time error. Just emit a warning
+      // and a trap, since there should be no callable path here.
+      SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode());
+      SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                                        Trap, DAG.getRoot());
+      DAG.setRoot(OutputChain);
+      return DAG.getUNDEF(Op.getValueType());
     }
 
     // XXX: What does the value of G->getOffset() mean?

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 63cc5246168c..99dd11c11b66 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1967,8 +1967,19 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
     if (!MFI->isEntryFunction()) {
       const Function &Fn = MF.getFunction();
       DiagnosticInfoUnsupported BadLDSDecl(
-        Fn, "local memory global used by non-kernel function", MI.getDebugLoc());
+        Fn, "local memory global used by non-kernel function", MI.getDebugLoc(),
+        DS_Warning);
       Fn.getContext().diagnose(BadLDSDecl);
+
+      // We currently don't have a way to correctly allocate LDS objects that
+      // aren't directly associated with a kernel. We do force inlining of
+      // functions that use local objects. However, if these dead functions are
+      // not eliminated, we don't want a compile time error. Just emit a warning
+      // and a trap, since there should be no callable path here.
+      B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+      B.buildUndef(DstReg);
+      MI.eraseFromParent();
+      return true;
     }
 
     // TODO: We could emit code to handle the initialization somewhere.

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
index 56571fd077ec..66122aa801a8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
@@ -1,13 +1,57 @@
-; Runs original SDAG test with -global-isel
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
 
-; RUN: not llc -global-isel -mtriple=amdgcn-amd-amdhsa -o /dev/null < %S/../lds-global-non-entry-func.ll 2>&1 | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
 
 @lds = internal addrspace(3) global float undef, align 4
 
-; CHECK: error: <unknown>:0:0: in function func_use_lds_global void (): local memory global used by non-kernel function
-; CHECK-NOT: error
-; CHECK-NOT: ERROR
+; ERR: warning: <unknown>:0:0: in function func_use_lds_global void (): local memory global used by non-kernel function
 define void @func_use_lds_global() {
+; GFX8-LABEL: func_use_lds_global:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v0, 0
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX8-NEXT:    s_trap 2
+; GFX8-NEXT:    ds_write_b32 v0, v0
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_use_lds_global:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX9-NEXT:    s_trap 2
+; GFX9-NEXT:    ds_write_b32 v0, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   store float 0.0, float addrspace(3)* @lds, align 4
   ret void
 }
+
+; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
+define void @func_use_lds_global_constexpr_cast() {
+; GFX8-LABEL: func_use_lds_global_constexpr_cast:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX8-NEXT:    s_trap 2
+; GFX8-NEXT:    flat_store_dword v[0:1], v0
+; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_use_lds_global_constexpr_cast:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX9-NEXT:    s_trap 2
+; GFX9-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
index d797466f068f..d81bb8a65ce6 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
@@ -1,9 +1,46 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
+
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
 
 @lds = internal addrspace(3) global float undef, align 4
 
-; CHECK: error: <unknown>:0:0: in function func_use_lds_global void (): local memory global used by non-kernel function
+; ERR: warning: <unknown>:0:0: in function func_use_lds_global void (): local memory global used by non-kernel function
 define void @func_use_lds_global() {
+; GFX8-LABEL: func_use_lds_global:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v0, 0
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    ds_write_b32 v0, v0
+; GFX8-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX8-NEXT:    s_trap 2
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_use_lds_global:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    ds_write_b32 v0, v0
+; GFX9-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX9-NEXT:    s_trap 2
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   store float 0.0, float addrspace(3)* @lds, align 4
   ret void
 }
+
+; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
+define void @func_use_lds_global_constexpr_cast() {
+; GCN-LABEL: func_use_lds_global_constexpr_cast:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GCN-NEXT:    s_trap 2
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list