[llvm-branch-commits] [llvm] [AMDGPU][GISel] Use buildObjectPtrOffset instead of buildPtrAdd (PR #150899)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jul 28 01:39:28 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Fabian Ritter (ritter-x2a)
<details>
<summary>Changes</summary>
This concerns offset computations for kernargs and
RegBankLegalizeHelper::splitLoad, which should all be within the bounds of a
memory object. See #<!-- -->150392 for the motivation for introducing the
buildObjectPtrOffset function.
For SWDEV-516125.
---
Patch is 113.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150899.diff
10 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+10-10)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+16-16)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+45-45)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+64-64)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (+9-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir (+26-26)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index fedfa3f9dd900..3d494374fb33b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2295,8 +2295,8 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
LLT::scalar(32), commonAlignment(Align(64), Offset));
// Pointer address
- B.buildPtrAdd(LoadAddr, KernargPtrReg,
- B.buildConstant(LLT::scalar(64), Offset).getReg(0));
+ B.buildObjectPtrOffset(LoadAddr, KernargPtrReg,
+ B.buildConstant(LLT::scalar(64), Offset).getReg(0));
// Load address
return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
}
@@ -2317,8 +2317,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
MachineMemOperand::MOInvariant,
LLT::scalar(32), commonAlignment(Align(64), StructOffset));
- B.buildPtrAdd(LoadAddr, QueuePtr,
- B.buildConstant(LLT::scalar(64), StructOffset).getReg(0));
+ B.buildObjectPtrOffset(
+ LoadAddr, QueuePtr,
+ B.buildConstant(LLT::scalar(64), StructOffset).getReg(0));
return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
}
@@ -4500,8 +4501,7 @@ Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B,
llvm_unreachable("failed to find kernarg segment ptr");
auto COffset = B.buildConstant(LLT::scalar(64), Offset);
- // TODO: Should get nuw
- return B.buildPtrAdd(PtrTy, KernArgReg, COffset).getReg(0);
+ return B.buildObjectPtrOffset(PtrTy, KernArgReg, COffset).getReg(0);
}
/// Legalize a value that's loaded from kernel arguments. This is only used by
@@ -5676,8 +5676,8 @@ bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg,
AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR))
return false;
- // FIXME: This should be nuw
- B.buildPtrAdd(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
+ B.buildObjectPtrOffset(DstReg, KernargPtrReg,
+ B.buildConstant(IdxTy, Offset).getReg(0));
return true;
}
@@ -7019,8 +7019,8 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
// Pointer address
Register LoadAddr = MRI.createGenericVirtualRegister(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
- B.buildPtrAdd(LoadAddr, KernargPtrReg,
- B.buildConstant(LLT::scalar(64), Offset).getReg(0));
+ B.buildObjectPtrOffset(LoadAddr, KernargPtrReg,
+ B.buildConstant(LLT::scalar(64), Offset).getReg(0));
// Load address
Register Temp = B.buildLoad(S64, LoadAddr, *MMO).getReg(0);
B.buildCopy(SGPR01, Temp);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index f471881ee7693..b45627d9c1c5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -294,7 +294,8 @@ void RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
BasePlusOffset = Base;
} else {
auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
- BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0);
+ BasePlusOffset =
+ B.buildObjectPtrOffset({PtrRB, PtrTy}, Base, Offset).getReg(0);
}
auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 3e7a5671bb5de..33a9c5e258ea2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -24,7 +24,7 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], [[C]](s64)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
@@ -65,7 +65,7 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], [[C]](s64)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
@@ -105,7 +105,7 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY4]], [[C]](s64)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index 33862de91430c..57ee2c8f88073 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -31,7 +31,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -84,7 +84,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -230,7 +230,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -319,7 +319,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -668,7 +668,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -710,7 +710,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -756,7 +756,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -802,7 +802,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -852,7 +852,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -898,7 +898,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64)
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -949,7 +949,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -996,7 +996,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1047,7 +1047,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1098,7 +1098,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1153,7 +1153,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1200,7 +1200,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64)
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 559391709f41d..6f624b2536f1a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -91,7 +91,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -174,7 +174,7 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -252,7 +252,7 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -314,7 +314,7 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -376,7 +376,7 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -456,7 +456,7 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64)
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -518,7 +518,7 @@ define amdgp...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150899
More information about the llvm-branch-commits
mailing list