[llvm] [InferAS] Support getAssumedAddrSpace for Arguments for NVPTX (PR #133991)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 1 16:22:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
---
Patch is 69.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133991.diff
8 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp (+4-8)
- (modified) llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (+15)
- (modified) llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp (+35-13)
- (modified) llvm/test/CodeGen/NVPTX/i1-ext-load.ll (+5-5)
- (modified) llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll (+13-33)
- (modified) llvm/test/CodeGen/NVPTX/lower-args.ll (+10-24)
- (modified) llvm/test/CodeGen/NVPTX/lower-byval-args.ll (+99-349)
- (added) llvm/test/Transforms/InferAddressSpaces/NVPTX/arguments.ll (+35)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 2637b9fab0d50..a683726facd0c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -678,11 +678,8 @@ static bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F) {
LLVM_DEBUG(dbgs() << "Lowering kernel args of " << F.getName() << "\n");
for (Argument &Arg : F.args()) {
- if (Arg.getType()->isPointerTy()) {
- if (Arg.hasByValAttr())
- handleByValParam(TM, &Arg);
- else if (TM.getDrvInterface() == NVPTX::CUDA)
- markPointerAsGlobal(&Arg);
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
+ handleByValParam(TM, &Arg);
} else if (Arg.getType()->isIntegerTy() &&
TM.getDrvInterface() == NVPTX::CUDA) {
HandleIntToPtr(Arg);
@@ -699,10 +696,9 @@ static bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F) {
cast<NVPTXTargetLowering>(TM.getSubtargetImpl()->getTargetLowering());
for (Argument &Arg : F.args())
- if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
- markPointerAsAS(&Arg, ADDRESS_SPACE_LOCAL);
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
adjustByValArgAlignment(&Arg, &Arg, TLI);
- }
+
return true;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index a89ca3037c7ff..e359735c20750 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -599,6 +599,21 @@ unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
if (isa<AllocaInst>(V))
return ADDRESS_SPACE_LOCAL;
+ if (const Argument *Arg = dyn_cast<Argument>(V)) {
+ if (isKernelFunction(*Arg->getParent())) {
+ const NVPTXTargetMachine &TM =
+ static_cast<const NVPTXTargetMachine &>(getTLI()->getTargetMachine());
+ if (TM.getDrvInterface() == NVPTX::CUDA && !Arg->hasByValAttr())
+ return ADDRESS_SPACE_GLOBAL;
+ } else {
+ // We assume that all device parameters that are passed byval will be
+ // placed in the local AS. Very simple cases will be updated after ISel to
+ // use the device param space where possible.
+ if (Arg->hasByValAttr())
+ return ADDRESS_SPACE_LOCAL;
+ }
+ }
+
return -1;
}
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 73a3f5e4d3694..965d6b6e45e6e 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -305,10 +305,15 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
}
// Returns true if V is an address expression.
-// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
-// getelementptr operators.
+// TODO: Currently, we consider only arguments and phi, bitcast, addrspacecast,
+// and getelementptr operators.
static bool isAddressExpression(const Value &V, const DataLayout &DL,
const TargetTransformInfo *TTI) {
+
+ if (const Argument *Arg = dyn_cast<Argument>(&V))
+ return Arg->getType()->isPointerTy() &&
+ TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
+
const Operator *Op = dyn_cast<Operator>(&V);
if (!Op)
return false;
@@ -341,6 +346,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
static SmallVector<Value *, 2>
getPointerOperands(const Value &V, const DataLayout &DL,
const TargetTransformInfo *TTI) {
+ if (isa<Argument>(&V))
+ return {};
+
const Operator &Op = cast<Operator>(V);
switch (Op.getOpcode()) {
case Instruction::PHI: {
@@ -505,13 +513,11 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
if (Visited.insert(V).second) {
PostorderStack.emplace_back(V, false);
- Operator *Op = cast<Operator>(V);
- for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
- if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
- PostorderStack.emplace_back(CE, false);
- }
- }
+ if (auto *Op = dyn_cast<Operator>(V))
+ for (auto &O : Op->operands())
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(O))
+ if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
+ PostorderStack.emplace_back(CE, false);
}
}
}
@@ -828,6 +834,18 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
isAddressExpression(*V, *DL, TTI));
+ if (auto *Arg = dyn_cast<Argument>(V)) {
+ // Arguments are address space casted in the function body, as we do not
+ // want to change the function signature.
+ Function *F = Arg->getParent();
+ BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt();
+
+ Type *NewPtrTy = PointerType::get(Arg->getContext(), NewAddrSpace);
+ auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy);
+ NewI->insertBefore(Insert);
+ return NewI;
+ }
+
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
@@ -966,8 +984,12 @@ bool InferAddressSpacesImpl::updateAddressSpace(
// of all its pointer operands.
unsigned NewAS = UninitializedAddressSpace;
- const Operator &Op = cast<Operator>(V);
- if (Op.getOpcode() == Instruction::Select) {
+ // isAddressExpression should guarantee that V is an operator or an argument.
+ assert(isa<Operator>(V) || isa<Argument>(V));
+
+ if (isa<Operator>(V) &&
+ cast<Operator>(V).getOpcode() == Instruction::Select) {
+ const Operator &Op = cast<Operator>(V);
Value *Src0 = Op.getOperand(1);
Value *Src1 = Op.getOperand(2);
@@ -1258,7 +1280,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
}
// Otherwise, replaces the use with flat(NewV).
- if (Instruction *VInst = dyn_cast<Instruction>(V)) {
+ if (isa<Instruction>(V) || isa<Instruction>(NewV)) {
// Don't create a copy of the original addrspacecast.
if (U == V && isa<AddrSpaceCastInst>(V))
return;
@@ -1268,7 +1290,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
InsertPos = std::next(NewVInst->getIterator());
else
- InsertPos = std::next(VInst->getIterator());
+ InsertPos = std::next(cast<Instruction>(V)->getIterator());
while (isa<PHINode>(InsertPos))
++InsertPos;
diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
index f5f1dd9fcf0ea..44ac46db254a7 100644
--- a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
@@ -12,14 +12,14 @@ define ptx_kernel void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
; CHECK: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK: ld.param.u64 %rd1, [foo_param_0];
-; CHECK: ld.param.u64 %rd2, [foo_param_1];
-; CHECK: cvta.to.global.u64 %rd3, %rd2;
-; CHECK: cvta.to.global.u64 %rd4, %rd1;
-; CHECK: ld.global.nc.u8 %rs1, [%rd4];
+; CHECK: cvta.to.global.u64 %rd2, %rd1;
+; CHECK: ld.param.u64 %rd3, [foo_param_1];
+; CHECK: cvta.to.global.u64 %rd4, %rd3;
+; CHECK: ld.global.nc.u8 %rs1, [%rd2];
; CHECK: cvt.u32.u8 %r1, %rs1;
; CHECK: add.s32 %r2, %r1, 1;
; CHECK: and.b32 %r3, %r2, 1;
-; CHECK: st.global.u32 [%rd3], %r3;
+; CHECK: st.global.u32 [%rd4], %r3;
; CHECK: ret;
%ld = load i1, ptr %ptr, align 1
%zext = zext i1 %ld to i32
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index e4e1f40d0d8b2..38b7400696c54 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -12,9 +12,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; OPT-LABEL: define dso_local noundef i32 @non_kernel_function(
; OPT-SAME: ptr noundef readonly byval([[STRUCT_UINT4:%.*]]) align 16 captures(none) [[A:%.*]], i1 noundef zeroext [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; OPT-NEXT: [[ENTRY:.*:]]
-; OPT-NEXT: [[A2:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(5)
-; OPT-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A2]] to ptr
-; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A1]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
+; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
; OPT-NEXT: [[IDX_EXT:%.*]] = sext i32 [[C]] to i64
; OPT-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[A_]], i64 [[IDX_EXT]]
; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 1
@@ -74,12 +72,10 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_int(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
; OPT-NEXT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
-; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
+; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
; OPT-NEXT: ret void
%tmp = load i32, ptr %input1, align 4
%add = add i32 %tmp, %input2
@@ -105,15 +101,13 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
-; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
+; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
; OPT-NEXT: ret void
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -233,11 +227,9 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
-; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
+; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
; OPT-NEXT: ret void
store ptr %input, ptr %addr, align 8
ret void
@@ -263,14 +255,12 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; PTX-NOT .local
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
-; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
-; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
+; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT]], align 8
; OPT-NEXT: ret void
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -311,13 +301,11 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4
; OPT-NEXT: [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]]
-; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
+; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
; OPT-NEXT: ret void
%val = load i32, ptr %input
@@ -361,15 +349,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4
; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
; OPT-NEXT: [[VAL2:%.*]] = load i32, ptr [[PTR2]], align 4
-; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT5]], align 8
+; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT]], align 8
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
; OPT-NEXT: ret i32 [[ADD]]
@@ -407,11 +393,9 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
@@ -423,7 +407,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; OPT: [[MERGE]]:
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
@@ -470,13 +454,11 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
@@ -488,7 +470,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; OPT: [[MERGE]]:
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
@@ -531,17 +513,15 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_select(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
; OPT-NEXT: [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
; OPT-NEXT: ret void
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index a1c0a86e9c4e4..8fa7d5c3e0cbc 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/133991
More information about the llvm-commits
mailing list