[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in various special cases (PR #145330)
Fabian Ritter via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 27 06:27:56 PDT 2025
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/145330
>From 083ff661ca6f83339902ebb603a38d53a6f0a695 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Tue, 17 Jun 2025 04:03:53 -0400
Subject: [PATCH 1/2] [AMDGPU][SDAG] Handle ISD::PTRADD in various special
cases
There are more places in SIISelLowering.cpp and AMDGPUISelDAGToDAG.cpp
that check for ISD::ADD in a pointer context, but as far as I can tell
those are only relevant for 32-bit pointer arithmetic (like frame
indices/scratch addresses and LDS), for which we don't enable PTRADD
generation yet.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 21 +-
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 +-
llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 67 ++----
.../AMDGPU/ptradd-sdag-optimizations.ll | 196 ++++++------------
6 files changed, 105 insertions(+), 194 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45edcf9992706..efe4639535536 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8219,7 +8219,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
GlobalAddressSDNode *G = nullptr;
if (Src.getOpcode() == ISD::GlobalAddress)
G = cast<GlobalAddressSDNode>(Src);
- else if (Src.getOpcode() == ISD::ADD &&
+ else if (Src->isAnyAdd() &&
Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
Src.getOperand(1).getOpcode() == ISD::Constant) {
G = cast<GlobalAddressSDNode>(Src.getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 977579e851e33..81286c66e8ffc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -617,8 +617,14 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
// operands on the new node are also disjoint.
SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
: SDNodeFlags::None);
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::PTRADD) {
+ // It isn't a ptradd anymore if it doesn't operate on the entire
+ // pointer.
+ Opcode = ISD::ADD;
+ }
SDValue X = DAG.getNode(
- Op.getOpcode(), dl, SmallVT,
+ Opcode, dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
@@ -2853,6 +2859,11 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, And1);
}
[[fallthrough]];
+ case ISD::PTRADD:
+ if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
+ break;
+ // PTRADD behaves like ADD if pointers are represented as integers.
+ [[fallthrough]];
case ISD::ADD:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
@@ -2962,10 +2973,10 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.getOpcode() == ISD::MUL) {
Known = KnownBits::mul(KnownOp0, KnownOp1);
- } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
+ } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
Known = KnownBits::computeForAddSub(
- Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
- Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
+ Op->isAnyAdd(), Flags.hasNoSignedWrap(), Flags.hasNoUnsignedWrap(),
+ KnownOp0, KnownOp1);
}
break;
}
@@ -5608,7 +5619,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
return true;
}
- if (N->getOpcode() == ISD::ADD) {
+ if (N->isAnyAdd()) {
SDValue N1 = N->getOperand(0);
SDValue N2 = N->getOperand(1);
if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 6e990cb2e160c..ee73ad5dda945 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1449,7 +1449,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
C1 = nullptr;
}
- if (N0.getOpcode() == ISD::ADD) {
+ if (N0->isAnyAdd()) {
// (add N2, N3) -> addr64, or
// (add (add N2, N3), C1) -> addr64
SDValue N2 = N0.getOperand(0);
@@ -1899,7 +1899,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
}
// Match the variable offset.
- if (Addr.getOpcode() == ISD::ADD) {
+ if (Addr->isAnyAdd()) {
LHS = Addr.getOperand(0);
RHS = Addr.getOperand(1);
@@ -2230,7 +2230,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
SDValue N0, N1;
// Extract the base and offset if possible.
- if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
+ if (CurDAG->isBaseWithConstantOffset(Addr) || Addr->isAnyAdd()) {
N0 = Addr.getOperand(0);
N1 = Addr.getOperand(1);
} else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a51dca006253e..52c811d27e804 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10477,7 +10477,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue VOffset;
// Try to split SAddr and VOffset. Global and LDS pointers share the same
// immediate offset, so we cannot use a regular SelectGlobalSAddr().
- if (Addr->isDivergent() && Addr.getOpcode() == ISD::ADD) {
+ if (Addr->isDivergent() && Addr->isAnyAdd()) {
SDValue LHS = Addr.getOperand(0);
SDValue RHS = Addr.getOperand(1);
@@ -12027,8 +12027,7 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, unsigned AddrSpace,
// We only do this to handle cases where it's profitable when there are
// multiple uses of the add, so defer to the standard combine.
- if ((N0.getOpcode() != ISD::ADD && N0.getOpcode() != ISD::OR) ||
- N0->hasOneUse())
+ if ((!N0->isAnyAdd() && N0.getOpcode() != ISD::OR) || N0->hasOneUse())
return SDValue();
const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
@@ -12067,6 +12066,8 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, unsigned AddrSpace,
N->getFlags().hasNoUnsignedWrap() &&
(N0.getOpcode() == ISD::OR || N0->getFlags().hasNoUnsignedWrap()));
+ // Use ISD::ADD even if the original operation was ISD::PTRADD, since we can't
+ // be sure that the new left operand is a proper base pointer.
return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset, Flags);
}
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll
index fab56383ffa8a..ff90f1f175c3c 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll
@@ -5,50 +5,26 @@
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF.
define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
-; GFX6_PTRADD-LABEL: v_add_i32:
-; GFX6_PTRADD: ; %bb.0:
-; GFX6_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX6_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX6_PTRADD-NEXT: s_mov_b32 s7, 0x100f000
-; GFX6_PTRADD-NEXT: s_mov_b32 s10, 0
-; GFX6_PTRADD-NEXT: s_mov_b32 s11, s7
-; GFX6_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6_PTRADD-NEXT: v_mov_b32_e32 v1, s3
-; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, s2, v0
-; GFX6_PTRADD-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GFX6_PTRADD-NEXT: s_mov_b32 s8, s10
-; GFX6_PTRADD-NEXT: s_mov_b32 s9, s10
-; GFX6_PTRADD-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
-; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
-; GFX6_PTRADD-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
-; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
-; GFX6_PTRADD-NEXT: s_mov_b32 s6, -1
-; GFX6_PTRADD-NEXT: s_mov_b32 s4, s0
-; GFX6_PTRADD-NEXT: s_mov_b32 s5, s1
-; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, v2, v0
-; GFX6_PTRADD-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; GFX6_PTRADD-NEXT: s_endpgm
-;
-; GFX6_LEGACY-LABEL: v_add_i32:
-; GFX6_LEGACY: ; %bb.0:
-; GFX6_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX6_LEGACY-NEXT: s_mov_b32 s7, 0x100f000
-; GFX6_LEGACY-NEXT: s_mov_b32 s10, 0
-; GFX6_LEGACY-NEXT: s_mov_b32 s11, s7
-; GFX6_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX6_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6_LEGACY-NEXT: s_mov_b64 s[8:9], s[2:3]
-; GFX6_LEGACY-NEXT: v_mov_b32_e32 v1, 0
-; GFX6_LEGACY-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
-; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
-; GFX6_LEGACY-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
-; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
-; GFX6_LEGACY-NEXT: s_mov_b32 s6, -1
-; GFX6_LEGACY-NEXT: s_mov_b32 s4, s0
-; GFX6_LEGACY-NEXT: s_mov_b32 s5, s1
-; GFX6_LEGACY-NEXT: v_add_i32_e32 v0, vcc, v2, v0
-; GFX6_LEGACY-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; GFX6_LEGACY-NEXT: s_endpgm
+; GFX6-LABEL: v_add_i32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX6-NEXT: s_mov_b32 s7, 0x100f000
+; GFX6-NEXT: s_mov_b32 s10, 0
+; GFX6-NEXT: s_mov_b32 s11, s7
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX6-NEXT: v_mov_b32_e32 v1, 0
+; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: s_mov_b32 s6, -1
+; GFX6-NEXT: s_mov_b32 s4, s0
+; GFX6-NEXT: s_mov_b32 s5, s1
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0
+; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
%b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
@@ -60,4 +36,5 @@ define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX6: {{.*}}
+; GFX6_LEGACY: {{.*}}
+; GFX6_PTRADD: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 0cd920616c515..893deb35fe822 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -294,27 +294,15 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr.
define amdgpu_kernel void @uniform_base_varying_offset_imm(ptr addrspace(1) %p) {
-; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm:
-; GFX942_PTRADD: ; %bb.0: ; %entry
-; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0
-; GFX942_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1
-; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
-; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off offset:16
-; GFX942_PTRADD-NEXT: s_endpgm
-;
-; GFX942_LEGACY-LABEL: uniform_base_varying_offset_imm:
-; GFX942_LEGACY: ; %bb.0: ; %entry
-; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX942_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 1
-; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[0:1] offset:16
-; GFX942_LEGACY-NEXT: s_endpgm
+; GFX942-LABEL: uniform_base_varying_offset_imm:
+; GFX942: ; %bb.0: ; %entry
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT: v_mov_b32_e32 v1, 1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: global_store_dword v0, v1, s[0:1] offset:16
+; GFX942-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%shift = shl i32 %tid, 2
@@ -328,33 +316,18 @@ entry:
; Adjusted from global-saddr-load.ll. Tests PTRADD handling in
; AMDGPUDAGToDAGISel::SelectSMRDBaseOffset.
define amdgpu_kernel void @global_load_saddr_i32_uniform_offset(ptr addrspace(1) %sbase, i32 %soffset, ptr addrspace(1) %r) {
-; GFX942_PTRADD-LABEL: global_load_saddr_i32_uniform_offset:
-; GFX942_PTRADD: ; %bb.0:
-; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX942_PTRADD-NEXT: s_load_dword s6, s[4:5], 0x8
-; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, 0
-; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, s6
-; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, 0
-; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
-; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, s0
-; GFX942_PTRADD-NEXT: global_store_dword v0, v1, s[2:3]
-; GFX942_PTRADD-NEXT: s_endpgm
-;
-; GFX942_LEGACY-LABEL: global_load_saddr_i32_uniform_offset:
-; GFX942_LEGACY: ; %bb.0:
-; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX942_LEGACY-NEXT: s_load_dword s6, s[4:5], 0x8
-; GFX942_LEGACY-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
-; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, 0
-; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0
-; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, s0
-; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[2:3]
-; GFX942_LEGACY-NEXT: s_endpgm
+; GFX942-LABEL: global_load_saddr_i32_uniform_offset:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v1, s0
+; GFX942-NEXT: global_store_dword v0, v1, s[2:3]
+; GFX942-NEXT: s_endpgm
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%load = load i32, ptr addrspace(1) %gep0
@@ -366,28 +339,15 @@ define amdgpu_kernel void @global_load_saddr_i32_uniform_offset(ptr addrspace(1)
; Adjusted from llvm.amdgcn.global.load.lds.ll, tests the offset lowering for
; Intrinsic::amdgcn_global_load_lds.
define void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) {
-; GFX942_PTRADD-LABEL: global_load_lds_dword_saddr_and_vaddr:
-; GFX942_PTRADD: ; %bb.0: ; %main_body
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, v1
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v3, 0
-; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[0:1], 0, v[2:3]
-; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
-; GFX942_PTRADD-NEXT: s_mov_b32 m0, s0
-; GFX942_PTRADD-NEXT: s_nop 0
-; GFX942_PTRADD-NEXT: global_load_lds_dword v[2:3], off offset:48 sc1
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942_LEGACY-LABEL: global_load_lds_dword_saddr_and_vaddr:
-; GFX942_LEGACY: ; %bb.0: ; %main_body
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s2, v0
-; GFX942_LEGACY-NEXT: s_mov_b32 m0, s2
-; GFX942_LEGACY-NEXT: s_nop 0
-; GFX942_LEGACY-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: global_load_lds_dword_saddr_and_vaddr:
+; GFX942: ; %bb.0: ; %main_body
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_readfirstlane_b32 s2, v0
+; GFX942-NEXT: s_mov_b32 m0, s2
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
main_body:
%voffset.64 = zext i32 %voffset to i64
%gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64
@@ -398,29 +358,17 @@ main_body:
; Taken from shl_add_ptr_global.ll, tests PTRADD handling in
; SITargetLowering::performSHLPtrCombine.
define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) {
-; GFX942_PTRADD-LABEL: shl_base_global_ptr_global_atomic_fadd:
-; GFX942_PTRADD: ; %bb.0:
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT: s_mov_b64 s[0:1], 0x80
-; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
-; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v6, 0x42c80000
-; GFX942_PTRADD-NEXT: global_atomic_add_f32 v[4:5], v6, off
-; GFX942_PTRADD-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0)
-; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942_LEGACY-LABEL: shl_base_global_ptr_global_atomic_fadd:
-; GFX942_LEGACY: ; %bb.0:
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5]
-; GFX942_LEGACY-NEXT: v_mov_b32_e32 v6, 0x42c80000
-; GFX942_LEGACY-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512
-; GFX942_LEGACY-NEXT: s_mov_b64 s[0:1], 0x80
-; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
-; GFX942_LEGACY-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0)
-; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: shl_base_global_ptr_global_atomic_fadd:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0x42c80000
+; GFX942-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512
+; GFX942-NEXT: s_mov_b64 s[0:1], 0x80
+; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
%cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
%shl = shl i64 %cast, 2
@@ -433,27 +381,16 @@ define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr a
; Test PTRADD handling in TargetLowering::SimplifyDemandedBits and
; TargetLowering::ShrinkDemandedOp.
define i32 @gep_in_const_as_cast_to_const32_as(ptr addrspace(4) %src, i64 %offset) {
-; GFX942_PTRADD-LABEL: gep_in_const_as_cast_to_const32_as:
-; GFX942_PTRADD: ; %bb.0: ; %entry
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
-; GFX942_PTRADD-NEXT: s_mov_b32 s1, 0
-; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
-; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
-; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, s0
-; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942_LEGACY-LABEL: gep_in_const_as_cast_to_const32_as:
-; GFX942_LEGACY: ; %bb.0: ; %entry
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT: v_add_u32_e32 v0, v0, v2
-; GFX942_LEGACY-NEXT: s_mov_b32 s1, 0
-; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s0, v0
-; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], 0x0
-; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, s0
-; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: gep_in_const_as_cast_to_const32_as:
+; GFX942: ; %bb.0: ; %entry
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_add_u32_e32 v0, v0, v2
+; GFX942-NEXT: s_mov_b32 s1, 0
+; GFX942-NEXT: v_readfirstlane_b32 s0, v0
+; GFX942-NEXT: s_load_dword s0, s[0:1], 0x0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, s0
+; GFX942-NEXT: s_setpc_b64 s[30:31]
entry:
%gep = getelementptr i8, ptr addrspace(4) %src, i64 %offset
%gep.cast = addrspacecast ptr addrspace(4) %gep to ptr addrspace(6)
@@ -465,29 +402,14 @@ entry:
; Test PTRADD handling in isMemSrcFromConstant.
define void @replace_const0_memcpy_by_memset(ptr align 4 %dst) {
-; GFX942_PTRADD-LABEL: replace_const0_memcpy_by_memset:
-; GFX942_PTRADD: ; %bb.0: ; %entry
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1]
-; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, CG at gotpcrel32@lo+4
-; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, CG at gotpcrel32@hi+12
-; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4
-; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942_PTRADD-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
-; GFX942_PTRADD-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942_LEGACY-LABEL: replace_const0_memcpy_by_memset:
-; GFX942_LEGACY: ; %bb.0: ; %entry
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT: v_mov_b32_e32 v2, 0
-; GFX942_LEGACY-NEXT: v_mov_b32_e32 v3, v2
-; GFX942_LEGACY-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
+; GFX942-LABEL: replace_const0_memcpy_by_memset:
+; GFX942: ; %bb.0: ; %entry
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
entry:
%gep = getelementptr i8, ptr addrspace(4) @CG, i64 4
tail call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 4 %dst, ptr addrspace(4) noundef nonnull align 4 %gep, i64 8, i1 false)
>From bd98fa825e4b3c2ad27b230c15558c5f56c9dbbd Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Fri, 27 Jun 2025 07:50:57 -0400
Subject: [PATCH 2/2] isBaseWithConstantOffset(Addr) -> Addr->isAnyAdd()
Op->isAnyAdd() -> Op.getOpcode() != ISD::SUB
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++--
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 81286c66e8ffc..967d65f41f983 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2975,8 +2975,8 @@ bool TargetLowering::SimplifyDemandedBits(
Known = KnownBits::mul(KnownOp0, KnownOp1);
} else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
Known = KnownBits::computeForAddSub(
- Op->isAnyAdd(), Flags.hasNoSignedWrap(), Flags.hasNoUnsignedWrap(),
- KnownOp0, KnownOp1);
+ Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
+ Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
}
break;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index ee73ad5dda945..ee456ea1289e1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2230,7 +2230,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
SDValue N0, N1;
// Extract the base and offset if possible.
- if (CurDAG->isBaseWithConstantOffset(Addr) || Addr->isAnyAdd()) {
+ if (Addr->isAnyAdd() || CurDAG->isADDLike(Addr)) {
N0 = Addr.getOperand(0);
N1 = Addr.getOperand(1);
} else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
More information about the llvm-branch-commits
mailing list