[llvm] 2e58d92 - [NVPTX] Check Before inserting AddrSpaceCastInst in NVPTXLoweringAlloca (#106127)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 12 14:58:44 PDT 2024


Author: weiwei chen
Date: 2024-09-12T17:58:40-04:00
New Revision: 2e58d923a1a543166a2aa72d8b579bb5c7cc320b

URL: https://github.com/llvm/llvm-project/commit/2e58d923a1a543166a2aa72d8b579bb5c7cc320b
DIFF: https://github.com/llvm/llvm-project/commit/2e58d923a1a543166a2aa72d8b579bb5c7cc320b.diff

LOG: [NVPTX] Check Before inserting AddrSpaceCastInst in NVPTXLoweringAlloca (#106127)

If `allocaInst` is already in `ADDRESS_SPACE_LOCAL`, there is no need to
do an explicit cast which will actually fail assertion with
`AddrSpaceCastInst`. Only insert the cast when needed.

Added: 
    

Modified: 
    llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
    llvm/test/CodeGen/NVPTX/lower-alloca.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 369238436083c7..bf473610a05aa6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -24,9 +24,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "NVPTX.h"
 #include "NVPTXUtilities.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -55,8 +55,8 @@ class NVPTXLowerAlloca : public FunctionPass {
 
 char NVPTXLowerAlloca::ID = 1;
 
-INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca",
-                "Lower Alloca", false, false)
+INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", "Lower Alloca", false,
+                false)
 
 // =============================================================================
 // Main function for this pass.
@@ -70,14 +70,38 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
     for (auto &I : BB) {
       if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
         Changed = true;
+
+        PointerType *AllocInstPtrTy =
+            cast<PointerType>(allocaInst->getType()->getScalarType());
+        unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace();
+        assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC ||
+                AllocAddrSpace == ADDRESS_SPACE_LOCAL) &&
+               "AllocaInst can only be in Generic or Local address space for "
+               "NVPTX.");
+
+        Instruction *AllocaInLocalAS = allocaInst;
         auto ETy = allocaInst->getAllocatedType();
-        auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
-        auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
-        auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
-        auto NewASCToGeneric =
-            new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, "");
-        NewASCToLocal->insertAfter(allocaInst);
-        NewASCToGeneric->insertAfter(NewASCToLocal);
+
+        // We need to make sure that LLVM has info that alloca needs to go to
+        // ADDRESS_SPACE_LOCAL for InferAddressSpace pass.
+        //
+        // For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to
+        // ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that
+        // the alloca's users still use a generic pointer to operate on.
+        //
+        // For allocas already in ADDRESS_SPACE_LOCAL, we just need
+        // addrspacecast to ADDRESS_SPACE_GENERIC.
+        if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) {
+          auto ASCastToLocalAS = new AddrSpaceCastInst(
+              allocaInst, PointerType::get(ETy, ADDRESS_SPACE_LOCAL), "");
+          ASCastToLocalAS->insertAfter(allocaInst);
+          AllocaInLocalAS = ASCastToLocalAS;
+        }
+
+        auto AllocaInGenericAS = new AddrSpaceCastInst(
+            AllocaInLocalAS, PointerType::get(ETy, ADDRESS_SPACE_GENERIC), "");
+        AllocaInGenericAS->insertAfter(AllocaInLocalAS);
+
         for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
           // Check Load, Store, GEP, and BitCast Uses on alloca and make them
           // use the converted generic address, in order to expose non-generic
@@ -87,23 +111,23 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
           auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
           if (LI && LI->getPointerOperand() == allocaInst &&
               !LI->isVolatile()) {
-            LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric);
+            LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS);
             continue;
           }
           auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
           if (SI && SI->getPointerOperand() == allocaInst &&
               !SI->isVolatile()) {
-            SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric);
+            SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS);
             continue;
           }
           auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
           if (GI && GI->getPointerOperand() == allocaInst) {
-            GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric);
+            GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS);
             continue;
           }
           auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
           if (BI && BI->getOperand(0) == allocaInst) {
-            BI->setOperand(0, NewASCToGeneric);
+            BI->setOperand(0, AllocaInGenericAS);
             continue;
           }
         }

diff  --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
index b1c34c8b5ecd78..400184aaefb211 100644
--- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -S -nvptx-lower-alloca -infer-address-spaces | FileCheck %s
+; RUN: opt < %s -S -nvptx-lower-alloca | FileCheck %s --check-prefix LOWERALLOCAONLY
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
 
@@ -11,13 +12,32 @@ define void @kernel() {
   %A = alloca i32
 ; CHECK: addrspacecast ptr %A to ptr addrspace(5)
 ; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
+; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr %A to ptr addrspace(5)
+; LOWERALLOCAONLY: [[V2:%.*]] = addrspacecast ptr addrspace(5) [[V1]] to ptr
+; LOWERALLOCAONLY: store i32 0, ptr [[V2]], align 4
 ; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
   store i32 0, ptr %A
   call void @callee(ptr %A)
   ret void
 }
 
+define void @alloca_in_explicit_local_as() {
+; LABEL: @lower_alloca_addrspace5
+; PTX-LABEL: .visible .func alloca_in_explicit_local_as(
+  %A = alloca i32, addrspace(5)
+; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
+; PTX: st.local.u32 [%SP+0], {{%r[0-9]+}}
+; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr addrspace(5) %A to ptr
+; LOWERALLOCAONLY: store i32 0, ptr [[V1]], align 4
+  store i32 0, ptr addrspace(5) %A
+  call void @callee(ptr addrspace(5) %A)
+  ret void
+}
+
 declare void @callee(ptr)
+declare void @callee_addrspace5(ptr addrspace(5))
 
 !nvvm.annotations = !{!0}
+!nvvm.annotations = !{!1}
 !0 = !{ptr @kernel, !"kernel", i32 1}
+!1 = !{ptr @alloca_in_explicit_local_as, !"alloca_in_explicit_local_as", i32 1}


        


More information about the llvm-commits mailing list