[llvm] 2e58d92 - [NVPTX] Check Before inserting AddrSpaceCastInst in NVPTXLoweringAlloca (#106127)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 14:58:44 PDT 2024
Author: weiwei chen
Date: 2024-09-12T17:58:40-04:00
New Revision: 2e58d923a1a543166a2aa72d8b579bb5c7cc320b
URL: https://github.com/llvm/llvm-project/commit/2e58d923a1a543166a2aa72d8b579bb5c7cc320b
DIFF: https://github.com/llvm/llvm-project/commit/2e58d923a1a543166a2aa72d8b579bb5c7cc320b.diff
LOG: [NVPTX] Check Before inserting AddrSpaceCastInst in NVPTXLoweringAlloca (#106127)
If `allocaInst` is already in `ADDRESS_SPACE_LOCAL`, there is no need to
do an explicit cast which will actually fail assertion with
`AddrSpaceCastInst`. Only insert the cast when needed.
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
llvm/test/CodeGen/NVPTX/lower-alloca.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 369238436083c7..bf473610a05aa6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -24,9 +24,9 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXUtilities.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -55,8 +55,8 @@ class NVPTXLowerAlloca : public FunctionPass {
char NVPTXLowerAlloca::ID = 1;
-INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca",
- "Lower Alloca", false, false)
+INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", "Lower Alloca", false,
+ false)
// =============================================================================
// Main function for this pass.
@@ -70,14 +70,38 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
for (auto &I : BB) {
if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
Changed = true;
+
+ PointerType *AllocInstPtrTy =
+ cast<PointerType>(allocaInst->getType()->getScalarType());
+ unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace();
+ assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC ||
+ AllocAddrSpace == ADDRESS_SPACE_LOCAL) &&
+ "AllocaInst can only be in Generic or Local address space for "
+ "NVPTX.");
+
+ Instruction *AllocaInLocalAS = allocaInst;
auto ETy = allocaInst->getAllocatedType();
- auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
- auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
- auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
- auto NewASCToGeneric =
- new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, "");
- NewASCToLocal->insertAfter(allocaInst);
- NewASCToGeneric->insertAfter(NewASCToLocal);
+
+ // We need to make sure that LLVM has info that alloca needs to go to
+ // ADDRESS_SPACE_LOCAL for InferAddressSpace pass.
+ //
+ // For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to
+ // ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that
+ // the alloca's users still use a generic pointer to operate on.
+ //
+ // For allocas already in ADDRESS_SPACE_LOCAL, we just need
+ // addrspacecast to ADDRESS_SPACE_GENERIC.
+ if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) {
+ auto ASCastToLocalAS = new AddrSpaceCastInst(
+ allocaInst, PointerType::get(ETy, ADDRESS_SPACE_LOCAL), "");
+ ASCastToLocalAS->insertAfter(allocaInst);
+ AllocaInLocalAS = ASCastToLocalAS;
+ }
+
+ auto AllocaInGenericAS = new AddrSpaceCastInst(
+ AllocaInLocalAS, PointerType::get(ETy, ADDRESS_SPACE_GENERIC), "");
+ AllocaInGenericAS->insertAfter(AllocaInLocalAS);
+
for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
// use the converted generic address, in order to expose non-generic
@@ -87,23 +111,23 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
if (LI && LI->getPointerOperand() == allocaInst &&
!LI->isVolatile()) {
- LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric);
+ LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
if (SI && SI->getPointerOperand() == allocaInst &&
!SI->isVolatile()) {
- SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric);
+ SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
if (GI && GI->getPointerOperand() == allocaInst) {
- GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric);
+ GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
if (BI && BI->getOperand(0) == allocaInst) {
- BI->setOperand(0, NewASCToGeneric);
+ BI->setOperand(0, AllocaInGenericAS);
continue;
}
}
diff --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
index b1c34c8b5ecd78..400184aaefb211 100644
--- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -nvptx-lower-alloca -infer-address-spaces | FileCheck %s
+; RUN: opt < %s -S -nvptx-lower-alloca | FileCheck %s --check-prefix LOWERALLOCAONLY
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
@@ -11,13 +12,32 @@ define void @kernel() {
%A = alloca i32
; CHECK: addrspacecast ptr %A to ptr addrspace(5)
; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
+; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr %A to ptr addrspace(5)
+; LOWERALLOCAONLY: [[V2:%.*]] = addrspacecast ptr addrspace(5) [[V1]] to ptr
+; LOWERALLOCAONLY: store i32 0, ptr [[V2]], align 4
; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
store i32 0, ptr %A
call void @callee(ptr %A)
ret void
}
+define void @alloca_in_explicit_local_as() {
+; LABEL: @lower_alloca_addrspace5
+; PTX-LABEL: .visible .func alloca_in_explicit_local_as(
+ %A = alloca i32, addrspace(5)
+; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
+; PTX: st.local.u32 [%SP+0], {{%r[0-9]+}}
+; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr addrspace(5) %A to ptr
+; LOWERALLOCAONLY: store i32 0, ptr [[V1]], align 4
+ store i32 0, ptr addrspace(5) %A
+ call void @callee(ptr addrspace(5) %A)
+ ret void
+}
+
declare void @callee(ptr)
+declare void @callee_addrspace5(ptr addrspace(5))
!nvvm.annotations = !{!0}
+!nvvm.annotations = !{!1}
!0 = !{ptr @kernel, !"kernel", i32 1}
+!1 = !{ptr @alloca_in_explicit_local_as, !"alloca_in_explicit_local_as", i32 1}
More information about the llvm-commits
mailing list