[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Enable ISD::PTRADD for 64-bit AS by default (PR #146076)
Fabian Ritter via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 27 06:28:24 PDT 2025
https://github.com/ritter-x2a created https://github.com/llvm/llvm-project/pull/146076
Also removes the command line option to control this feature.
There seem to be mainly two kinds of test changes:
- Some operands of addition instructions are swapped; that is to be expected
since PTRADD is not commutative.
- Improvements in code generation, probably because the legacy lowering enabled
some transformations that were sometimes harmful.
For SWDEV-516125.
>From bef09f8e86940d327f451c7b0c2639a86991037c Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Fri, 27 Jun 2025 05:38:52 -0400
Subject: [PATCH] [AMDGPU][SDAG] Enable ISD::PTRADD for 64-bit AS by default
Also removes the command line option to control this feature.
There seem to be mainly two kinds of test changes:
- Some operands of addition instructions are swapped; that is to be expected
since PTRADD is not commutative.
- Improvements in code generation, probably because the legacy lowering enabled
some transformations that were sometimes harmful.
For SWDEV-516125.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 +-
.../AMDGPU/infer-addrspace-flat-atomic.ll | 14 +--
llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll | 8 +-
.../AMDGPU/lower-module-lds-via-hybrid.ll | 4 +-
.../AMDGPU/lower-module-lds-via-table.ll | 16 ++--
.../match-perm-extract-vector-elt-bug.ll | 22 ++---
llvm/test/CodeGen/AMDGPU/memmove-var-size.ll | 16 ++--
.../AMDGPU/preload-implicit-kernargs.ll | 6 +-
.../AMDGPU/promote-constOffset-to-imm.ll | 8 +-
llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 7 +-
.../AMDGPU/ptradd-sdag-optimizations.ll | 94 ++++++-------------
.../AMDGPU/ptradd-sdag-undef-poison.ll | 6 +-
llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll | 27 +-----
13 files changed, 83 insertions(+), 155 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 71230078edc69..a3c344ecf962c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -63,14 +63,6 @@ static cl::opt<bool> UseDivergentRegisterIndexing(
cl::desc("Use indirect register addressing for divergent indexes"),
cl::init(false));
-// TODO: This option should be removed once we switch to always using PTRADD in
-// the SelectionDAG.
-static cl::opt<bool> UseSelectionDAGPTRADD(
- "amdgpu-use-sdag-ptradd", cl::Hidden,
- cl::desc("Generate ISD::PTRADD nodes for 64-bit pointer arithmetic in the "
- "SelectionDAG ISel"),
- cl::init(false));
-
static bool denormalModeIsFlushAllF32(const MachineFunction &MF) {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
return Info->getMode().FP32Denormals == DenormalMode::getPreserveSign();
@@ -10599,7 +10591,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
bool SITargetLowering::shouldPreservePtrArith(const Function &F,
EVT PtrVT) const {
- return UseSelectionDAGPTRADD && PtrVT == MVT::i64;
+ return PtrVT == MVT::i64;
}
bool SITargetLowering::canTransformPtrArithOutOfBounds(const Function &F,
diff --git a/llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll b/llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll
index 258aa9e299c3d..ed2755ed1e38b 100644
--- a/llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll
@@ -11,8 +11,8 @@ define protected amdgpu_kernel void @InferNothing(i32 %a, ptr %b, double %c) {
; CHECK-NEXT: v_mov_b32_e32 v0, s2
; CHECK-NEXT: v_mov_b32_e32 v1, s3
; CHECK-NEXT: s_lshl_b64 s[2:3], s[6:7], 3
-; CHECK-NEXT: s_add_u32 s0, s2, s0
-; CHECK-NEXT: s_addc_u32 s1, s3, s1
+; CHECK-NEXT: s_add_u32 s0, s0, s2
+; CHECK-NEXT: s_addc_u32 s1, s1, s3
; CHECK-NEXT: v_mov_b32_e32 v3, s1
; CHECK-NEXT: v_add_co_u32_e64 v2, vcc, -8, s0
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
@@ -69,13 +69,13 @@ define protected amdgpu_kernel void @InferMixed(i32 %a, ptr addrspace(1) %b, dou
; CHECK-NEXT: s_lshl_b64 s[2:3], s[6:7], 3
; CHECK-NEXT: s_add_u32 s0, s0, s2
; CHECK-NEXT: s_addc_u32 s1, s1, s3
+; CHECK-NEXT: s_add_u32 s0, s0, -8
+; CHECK-NEXT: s_addc_u32 s1, s1, -1
; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_wbinvl1_vol
-; CHECK-NEXT: v_mov_b32_e32 v1, s1
-; CHECK-NEXT: v_add_co_u32_e64 v0, vcc, -7, s0
-; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
+; CHECK-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] offset:1
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_wbinvl1_vol
; CHECK-NEXT: s_endpgm
@@ -113,7 +113,7 @@ define protected amdgpu_kernel void @InferPHI(i32 %a, ptr addrspace(1) %b, doubl
; CHECK-NEXT: s_addc_u32 s1, s1, s5
; CHECK-NEXT: s_add_u32 s4, s0, -8
; CHECK-NEXT: s_addc_u32 s5, s1, -1
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 9
+; CHECK-NEXT: s_cmp_eq_u64 s[4:5], 1
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
index 04abb75c3f912..42ee46bd2c110 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
@@ -46,8 +46,8 @@ define void @use_extern_normal() #0 {
; CHECK-NEXT: s_ashr_i32 s5, s15, 31
; CHECK-NEXT: v_mov_b32_e32 v0, 0x4048f5c3
; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
-; CHECK-NEXT: s_add_u32 s4, s4, s6
-; CHECK-NEXT: s_addc_u32 s5, s5, s7
+; CHECK-NEXT: s_add_u32 s4, s6, s4
+; CHECK-NEXT: s_addc_u32 s5, s7, s5
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v1, s4
@@ -70,8 +70,8 @@ define void @use_extern_overalign() #0 {
; CHECK-NEXT: s_ashr_i32 s5, s15, 31
; CHECK-NEXT: v_mov_b32_e32 v0, 0x42280000
; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
-; CHECK-NEXT: s_add_u32 s4, s4, s6
-; CHECK-NEXT: s_addc_u32 s5, s5, s7
+; CHECK-NEXT: s_add_u32 s4, s6, s4
+; CHECK-NEXT: s_addc_u32 s5, s7, s5
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v1, s4
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
index 2a7553ae5d92b..0b5ba81b3c24f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
@@ -84,8 +84,8 @@ define void @f2() {
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table at rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table at rel32@hi+12
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
-; GCN-NEXT: s_add_u32 s4, s4, s6
-; GCN-NEXT: s_addc_u32 s5, s5, s7
+; GCN-NEXT: s_add_u32 s4, s6, s4
+; GCN-NEXT: s_addc_u32 s5, s7, s5
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, s4
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
index dca9b71a757af..882e05cf9efda 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
@@ -49,8 +49,8 @@ define void @f0() {
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table at rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table at rel32@hi+12
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
-; GCN-NEXT: s_add_u32 s4, s4, s6
-; GCN-NEXT: s_addc_u32 s5, s5, s7
+; GCN-NEXT: s_add_u32 s4, s6, s4
+; GCN-NEXT: s_addc_u32 s5, s7, s5
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s4
@@ -90,8 +90,8 @@ define void @f1() {
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table at rel32@lo+8
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table at rel32@hi+16
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
-; GCN-NEXT: s_add_u32 s4, s4, s6
-; GCN-NEXT: s_addc_u32 s5, s5, s7
+; GCN-NEXT: s_add_u32 s4, s6, s4
+; GCN-NEXT: s_addc_u32 s5, s7, s5
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s4
@@ -131,8 +131,8 @@ define void @f2() {
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table at rel32@lo+12
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table at rel32@hi+20
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
-; GCN-NEXT: s_add_u32 s4, s4, s6
-; GCN-NEXT: s_addc_u32 s5, s5, s7
+; GCN-NEXT: s_add_u32 s4, s6, s4
+; GCN-NEXT: s_addc_u32 s5, s7, s5
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, s4
@@ -172,8 +172,8 @@ define void @f3() {
; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table at rel32@lo+16
; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table at rel32@hi+24
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4
-; GCN-NEXT: s_add_u32 s4, s4, s6
-; GCN-NEXT: s_addc_u32 s5, s5, s7
+; GCN-NEXT: s_add_u32 s4, s6, s4
+; GCN-NEXT: s_addc_u32 s5, s7, s5
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s4
diff --git a/llvm/test/CodeGen/AMDGPU/match-perm-extract-vector-elt-bug.ll b/llvm/test/CodeGen/AMDGPU/match-perm-extract-vector-elt-bug.ll
index 4896e504cfdf4..229b3ece6e5ea 100644
--- a/llvm/test/CodeGen/AMDGPU/match-perm-extract-vector-elt-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/match-perm-extract-vector-elt-bug.ll
@@ -13,9 +13,9 @@ define amdgpu_kernel void @test(ptr addrspace(1) %src, ptr addrspace(1) %dst) {
; GFX9-NEXT: s_and_b32 s4, s4, 0xffff
; GFX9-NEXT: s_mul_i32 s14, s14, s4
; GFX9-NEXT: s_add_i32 s5, s5, s14
-; GFX9-NEXT: v_add_u32_e32 v0, s5, v0
-; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX9-NEXT: v_lshlrev_b64 v[4:5], 4, v[0:1]
+; GFX9-NEXT: v_add_u32_e32 v1, s5, v0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_ashrrev_i64 v[4:5], 28, v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v4
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
@@ -37,12 +37,12 @@ define amdgpu_kernel void @test(ptr addrspace(1) %src, ptr addrspace(1) %dst) {
; GFX10-NEXT: s_load_dword s4, s[8:9], 0x1c
; GFX10-NEXT: s_load_dword s5, s[8:9], 0x38
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_and_b32 s4, s4, 0xffff
; GFX10-NEXT: s_mul_i32 s14, s14, s4
-; GFX10-NEXT: v_add3_u32 v0, s5, s14, v0
-; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX10-NEXT: v_lshlrev_b64 v[4:5], 4, v[0:1]
+; GFX10-NEXT: v_add3_u32 v2, s5, s14, v0
+; GFX10-NEXT: v_ashrrev_i64 v[4:5], 28, v[1:2]
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v4
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v5, vcc_lo
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, s2, v4
@@ -62,21 +62,19 @@ define amdgpu_kernel void @test(ptr addrspace(1) %src, ptr addrspace(1) %dst) {
; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x1c
; GFX11-NEXT: s_load_b32 s7, s[4:5], 0x38
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_and_b32 v1, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s4, s6, 0xffff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_mul_i32 s13, s13, s4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: v_add3_u32 v0, s7, s13, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX11-NEXT: v_lshlrev_b64 v[4:5], 4, v[0:1]
+; GFX11-NEXT: v_add3_u32 v1, s7, s13, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_ashrrev_i64 v[4:5], 28, v[0:1]
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, s0, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v5, vcc_lo
; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, s2, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_add_co_ci_u32_e64 v5, null, s3, v5, vcc_lo
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll b/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll
index 272daa9dd0b59..7187ece89ae04 100644
--- a/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll
@@ -388,8 +388,8 @@ define void @memmove_p0_p3(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align
; CHECK-NEXT: s_and_saveexec_b32 s7, s4
; CHECK-NEXT: s_cbranch_execz .LBB2_13
; CHECK-NEXT: ; %bb.11: ; %memmove_bwd_residual_loop.preheader
-; CHECK-NEXT: v_add_co_u32 v9, s4, v3, v0
-; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v4, v1, s4
+; CHECK-NEXT: v_add_co_u32 v9, s4, v0, v3
+; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v1, v4, s4
; CHECK-NEXT: v_add3_u32 v4, v3, v2, -1
; CHECK-NEXT: v_add_co_u32 v9, s4, v9, -1
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, -1, v10, s4
@@ -684,8 +684,8 @@ define void @memmove_p0_p5(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align
; CHECK-NEXT: s_and_saveexec_b32 s7, s4
; CHECK-NEXT: s_cbranch_execz .LBB4_13
; CHECK-NEXT: ; %bb.11: ; %memmove_bwd_residual_loop.preheader
-; CHECK-NEXT: v_add_co_u32 v9, s4, v3, v0
-; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v4, v1, s4
+; CHECK-NEXT: v_add_co_u32 v9, s4, v0, v3
+; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v1, v4, s4
; CHECK-NEXT: v_add3_u32 v4, v3, v2, -1
; CHECK-NEXT: v_add_co_u32 v9, s4, v9, -1
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, -1, v10, s4
@@ -1411,8 +1411,8 @@ define void @memmove_p3_p0(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align
; CHECK-NEXT: s_and_saveexec_b32 s7, s4
; CHECK-NEXT: s_cbranch_execz .LBB10_13
; CHECK-NEXT: ; %bb.11: ; %memmove_bwd_residual_loop.preheader
-; CHECK-NEXT: v_add_co_u32 v9, s4, v3, v1
-; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v4, v2, s4
+; CHECK-NEXT: v_add_co_u32 v9, s4, v1, v3
+; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v2, v4, s4
; CHECK-NEXT: v_add3_u32 v4, v3, v0, -1
; CHECK-NEXT: v_add_co_u32 v9, s4, v9, -1
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, -1, v10, s4
@@ -1889,8 +1889,8 @@ define void @memmove_p5_p0(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align
; CHECK-NEXT: s_and_saveexec_b32 s7, s4
; CHECK-NEXT: s_cbranch_execz .LBB15_13
; CHECK-NEXT: ; %bb.11: ; %memmove_bwd_residual_loop.preheader
-; CHECK-NEXT: v_add_co_u32 v9, s4, v3, v1
-; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v4, v2, s4
+; CHECK-NEXT: v_add_co_u32 v9, s4, v1, v3
+; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, v2, v4, s4
; CHECK-NEXT: v_add3_u32 v4, v3, v0, -1
; CHECK-NEXT: v_add_co_u32 v9, s4, v9, -1
; CHECK-NEXT: v_add_co_ci_u32_e64 v10, null, -1, v10, s4
diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll
index 79b531e3ce785..615740a2d0730 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll
@@ -277,8 +277,7 @@ define amdgpu_kernel void @random_incorrect_offset(ptr addrspace(1) inreg %out)
; GFX942-NEXT: .p2align 8
; GFX942-NEXT: ; %bb.2:
; GFX942-NEXT: .LBB8_0:
-; GFX942-NEXT: s_mov_b32 s4, 8
-; GFX942-NEXT: s_load_dword s0, s[0:1], s4 offset:0x2
+; GFX942-NEXT: s_load_dword s0, s[0:1], 0xa
; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v1, s0
@@ -293,8 +292,7 @@ define amdgpu_kernel void @random_incorrect_offset(ptr addrspace(1) inreg %out)
; GFX90a-NEXT: .p2align 8
; GFX90a-NEXT: ; %bb.2:
; GFX90a-NEXT: .LBB8_0:
-; GFX90a-NEXT: s_mov_b32 s0, 8
-; GFX90a-NEXT: s_load_dword s0, s[4:5], s0 offset:0x2
+; GFX90a-NEXT: s_load_dword s0, s[4:5], 0xa
; GFX90a-NEXT: v_mov_b32_e32 v0, 0
; GFX90a-NEXT: s_waitcnt lgkmcnt(0)
; GFX90a-NEXT: v_mov_b32_e32 v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index c4842c1f4f523..a78c3e854b011 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -612,8 +612,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_movk_i32 s1, 0x7f
; GFX10-NEXT: v_and_b32_e32 v6, 0xfe000000, v1
; GFX10-NEXT: v_lshl_or_b32 v0, v0, 3, v6
-; GFX10-NEXT: v_add_co_u32 v0, s0, v0, s34
-; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s35, s0
+; GFX10-NEXT: v_add_co_u32 v0, s0, s34, v0
+; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s35, 0, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x5000, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: .LBB1_1: ; %for.cond.preheader
@@ -830,8 +830,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX11-NEXT: v_and_b32_e32 v6, 0xfe000000, v1
; GFX11-NEXT: v_lshl_or_b32 v0, v0, 3, v6
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_add_co_u32 v0, s0, v0, s34
-; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s35, s0
+; GFX11-NEXT: v_add_co_u32 v0, s0, s34, v0
+; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s35, 0, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x5000, v0
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll
index ff90f1f175c3c..40f39a24d7a99 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX6,GFX6_PTRADD %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX6,GFX6_LEGACY %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=GFX6 %s
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF.
@@ -34,7 +33,3 @@ define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in
store i32 %result, ptr addrspace(1) %out
ret void
}
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX6_LEGACY: {{.*}}
-; GFX6_PTRADD: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 64e041103a563..6e552bd1317b8 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -disable-separate-const-offset-from-gep=1 -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX942,GFX942_PTRADD %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -disable-separate-const-offset-from-gep=1 -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX942,GFX942_LEGACY %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -disable-separate-const-offset-from-gep=1 < %s | FileCheck --check-prefixes=GFX942 %s
; Tests for DAG combines and folds related to the ISD::PTRADD SelectionDAG
; opcode. The RUN lines uses -disable-separate-const-offset-from-gep to disable
@@ -24,21 +23,13 @@ define i64 @global_load_ZTwoUses(ptr addrspace(1) %base, i64 %voffset) {
}
define i64 @global_load_gep_add_reassoc(ptr addrspace(1) %base, i64 %voffset) {
-; GFX942_PTRADD-LABEL: global_load_gep_add_reassoc:
-; GFX942_PTRADD: ; %bb.0:
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
-; GFX942_PTRADD-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:24
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942_LEGACY-LABEL: global_load_gep_add_reassoc:
-; GFX942_LEGACY: ; %bb.0:
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
-; GFX942_LEGACY-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:24
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0)
-; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_load_gep_add_reassoc:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:24
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%add0 = add nuw nsw i64 %voffset, 24
%gep0 = getelementptr nuw inbounds i8, ptr addrspace(1) %base, i64 %add0
%l = load i64, ptr addrspace(1) %gep0, align 8
@@ -222,23 +213,14 @@ define ptr addrspace(1) @shl_neg_offset(ptr addrspace(1) %p, i64 %noffset, i64 %
; Check that offsets are folded into global addresses if possible. For example,
; this is relevant when using --amdgpu-lower-module-lds-strategy=table.
define ptr addrspace(1) @complextype_global_gep(i64 %offset) {
-; GFX942_PTRADD-LABEL: complextype_global_gep:
-; GFX942_PTRADD: ; %bb.0:
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1]
-; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, v0 at rel32@lo+14
-; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, v0 at rel32@hi+22
-; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
-; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942_LEGACY-LABEL: complextype_global_gep:
-; GFX942_LEGACY: ; %bb.0:
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT: s_getpc_b64 s[0:1]
-; GFX942_LEGACY-NEXT: s_add_u32 s0, s0, v0 at rel32@lo+14
-; GFX942_LEGACY-NEXT: s_addc_u32 s1, s1, v0 at rel32@hi+22
-; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: complextype_global_gep:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_getpc_b64 s[0:1]
+; GFX942-NEXT: s_add_u32 s0, s0, v0 at rel32@lo+14
+; GFX942-NEXT: s_addc_u32 s1, s1, v0 at rel32@hi+22
+; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%gep0 = getelementptr inbounds %complextype, ptr addrspace(1) @v0, i64 0, i32 1, i64 %offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2
ret ptr addrspace(1) %gep1
@@ -431,36 +413,20 @@ define ptr @gep_disjoint_or(ptr %base) {
; Check that AssertAlign nodes between ptradd nodes don't block offset folding,
; taken from preload-implicit-kernargs.ll
define amdgpu_kernel void @random_incorrect_offset(ptr addrspace(1) inreg %out) #0 {
-; GFX942_PTRADD-LABEL: random_incorrect_offset:
-; GFX942_PTRADD: ; %bb.1:
-; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_PTRADD-NEXT: s_branch .LBB21_0
-; GFX942_PTRADD-NEXT: .p2align 8
-; GFX942_PTRADD-NEXT: ; %bb.2:
-; GFX942_PTRADD-NEXT: .LBB21_0:
-; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0xa
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, 0
-; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, s0
-; GFX942_PTRADD-NEXT: global_store_dword v0, v1, s[2:3]
-; GFX942_PTRADD-NEXT: s_endpgm
-;
-; GFX942_LEGACY-LABEL: random_incorrect_offset:
-; GFX942_LEGACY: ; %bb.1:
-; GFX942_LEGACY-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_LEGACY-NEXT: s_branch .LBB21_0
-; GFX942_LEGACY-NEXT: .p2align 8
-; GFX942_LEGACY-NEXT: ; %bb.2:
-; GFX942_LEGACY-NEXT: .LBB21_0:
-; GFX942_LEGACY-NEXT: s_mov_b32 s4, 8
-; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], s4 offset:0x2
-; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, 0
-; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, s0
-; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[2:3]
-; GFX942_LEGACY-NEXT: s_endpgm
+; GFX942-LABEL: random_incorrect_offset:
+; GFX942: ; %bb.1:
+; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_branch .LBB21_0
+; GFX942-NEXT: .p2align 8
+; GFX942-NEXT: ; %bb.2:
+; GFX942-NEXT: .LBB21_0:
+; GFX942-NEXT: s_load_dword s0, s[0:1], 0xa
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v1, s0
+; GFX942-NEXT: global_store_dword v0, v1, s[2:3]
+; GFX942-NEXT: s_endpgm
%imp_arg_ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr i8, ptr addrspace(4) %imp_arg_ptr, i32 2
%load = load i32, ptr addrspace(4) %gep
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-undef-poison.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-undef-poison.ll
index 1934ce395e63d..e7c715f0a38bf 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-undef-poison.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-undef-poison.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=amdgpu-isel -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX942,GFX942_PTRADD %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=amdgpu-isel -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX942,GFX942_LEGACY %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=amdgpu-isel < %s | FileCheck --check-prefixes=GFX942 %s
; Tests for undef and poison DAG folds for the ISD::PTRADD SelectionDAG opcode.
; If any additions are generated for these tests, the folds don't work.
@@ -44,6 +43,3 @@ define ptr @undef_base(ptr %p, i64 %offset) {
%gep1 = getelementptr i8, ptr undef, i64 %offset
ret ptr %gep1
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX942_LEGACY: {{.*}}
-; GFX942_PTRADD: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll
index 1c4a9547ed189..42158f18da525 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll
@@ -1,14 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-use-sdag-ptradd=1 < %s | FileCheck %s -check-prefixes=GFX8,GFX8_PTRADD
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-use-sdag-ptradd=0 < %s | FileCheck %s -check-prefixes=GFX8,GFX8_LEGACY
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-use-sdag-ptradd=1 < %s | FileCheck %s -check-prefixes=GFX942,GFX942_PTRADD
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-use-sdag-ptradd=0 < %s | FileCheck %s -check-prefixes=GFX942,GFX942_LEGACY
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -amdgpu-use-sdag-ptradd=1 < %s | FileCheck %s -check-prefixes=GFX10,GFX10_PTRADD
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -amdgpu-use-sdag-ptradd=0 < %s | FileCheck %s -check-prefixes=GFX10,GFX10_LEGACY
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-use-sdag-ptradd=1 < %s | FileCheck %s -check-prefixes=GFX11,GFX11_PTRADD
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-use-sdag-ptradd=0 < %s | FileCheck %s -check-prefixes=GFX11,GFX11_LEGACY
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-use-sdag-ptradd=1 < %s | FileCheck %s -check-prefixes=GFX12,GFX12_PTRADD
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-use-sdag-ptradd=0 < %s | FileCheck %s -check-prefixes=GFX12,GFX12_LEGACY
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck %s -check-prefixes=GFX8
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck %s -check-prefixes=GFX942
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck %s -check-prefixes=GFX10
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12
; Tests for the ISD::PTRADD SelectionDAG opcode. This only tests 64-bit address
; spaces since PTRADD is currently only used for these.
@@ -509,15 +504,3 @@ entry:
store i32 %val, ptr addrspace(1) %gep.to, align 4
ret void
}
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX10_LEGACY: {{.*}}
-; GFX10_PTRADD: {{.*}}
-; GFX11_LEGACY: {{.*}}
-; GFX11_PTRADD: {{.*}}
-; GFX12_LEGACY: {{.*}}
-; GFX12_PTRADD: {{.*}}
-; GFX8_LEGACY: {{.*}}
-; GFX8_PTRADD: {{.*}}
-; GFX942_LEGACY: {{.*}}
-; GFX942_PTRADD: {{.*}}
More information about the llvm-branch-commits
mailing list