[llvm] [AMDGPU] Fix negative immediate offset for unbuffered smem loads (PR #89165)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 12:23:25 PDT 2024
https://github.com/vangthao95 updated https://github.com/llvm/llvm-project/pull/89165
>From 15a34d5ae6aace9b6e68d857596099915363985d Mon Sep 17 00:00:00 2001
From: Vang Thao <Vang.Thao at amd.com>
Date: Wed, 17 Apr 2024 21:06:43 -0400
Subject: [PATCH 1/3] [AMDGPU] Fix negative immediate offset for unbuffered
smem loads
For unbuffered smem loads, it is illegal for the immediate offset to be negative if the resulting IOFFSET + (SGPR[Offset] or M0 or zero) is negative.
New PR of https://github.com/llvm/llvm-project/pull/79553.
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 55 ++++-
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 12 +-
.../AMDGPU/AMDGPUInstructionSelector.cpp | 19 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 +
.../GlobalISel/inst-select-load-constant.mir | 20 +-
llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll | 6 +-
.../AMDGPU/cgp-addressing-modes-smem.ll | 12 +-
.../AMDGPU/gfx12_scalar_subword_loads.ll | 68 ++++--
llvm/test/CodeGen/AMDGPU/global-saddr-load.ll | 204 +++++++++++++-----
llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll | 38 +++-
llvm/test/CodeGen/AMDGPU/smrd.ll | 6 +-
11 files changed, 345 insertions(+), 99 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index bba7682cd7a0d..bf65244255f34 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1984,8 +1984,10 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
// not null) offset. If Imm32Only is true, match only 32-bit immediate
// offsets available on CI.
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
- SDValue *SOffset, SDValue *Offset,
- bool Imm32Only, bool IsBuffer) const {
+ SDValue *SBase, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only,
+ bool IsBuffer,
+ bool HasSOffset) const {
assert((!SOffset || !Offset) &&
"Cannot match both soffset and offset at the same time!");
@@ -2016,7 +2018,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);
if (EncodedOffset && Offset && !Imm32Only) {
*Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
- return true;
+ if (EncodedOffset >= 0 || IsBuffer || HasSOffset ||
+ !Subtarget->hasSignedSMRDImmOffset())
+ return true;
+ // For unbuffered smem loads, it is illegal for the Immediate Offset to be
+ // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
+ // Handle the case where the Immediate Offset is negative and there is no
+ // SOffset.
+ return false;
}
// SGPR and literal offsets are unsigned.
@@ -2072,13 +2081,34 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
// true, match only 32-bit immediate offsets available on CI.
bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
SDValue *SOffset, SDValue *Offset,
- bool Imm32Only,
- bool IsBuffer) const {
+ bool Imm32Only, bool IsBuffer,
+ bool HasSOffset) const {
if (SOffset && Offset) {
assert(!Imm32Only && !IsBuffer);
SDValue B;
- return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
- SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
+ if (!SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false, true))
+ return false;
+
+ if (!SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false, true))
+ return false;
+
+ if (IsBuffer || Imm32Only || !Subtarget->hasSignedSMRDImmOffset())
+ return true;
+
+ // For unbuffered smem loads, it is illegal for the Immediate Offset to be
+ // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
+ // Handle the case where the Immediate Offset + SOffset is negative.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset)) {
+ int64_t ByteOffset = C->getSExtValue();
+ if (ByteOffset >= 0)
+ return true;
+
+ KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);
+ if (ByteOffset + SKnown.getMinValue().getSExtValue() < 0)
+ return false;
+ }
+
+ return true;
}
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
@@ -2097,11 +2127,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
}
if (!N0 || !N1)
return false;
- if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
+
+ if (SelectSMRDOffset(N1, &N0, SOffset, Offset, Imm32Only, IsBuffer,
+ HasSOffset)) {
SBase = N0;
return true;
}
- if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
+ if (SelectSMRDOffset(N0, &N1, SOffset, Offset, Imm32Only, IsBuffer,
+ HasSOffset)) {
SBase = N1;
return true;
}
@@ -2149,14 +2182,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
}
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
- return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
+ return SelectSMRDOffset(N, /*SBase=*/nullptr, /* SOffset */ nullptr, &Offset,
/* Imm32Only */ false, /* IsBuffer */ true);
}
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
SDValue &Offset) const {
assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
- return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
+ return SelectSMRDOffset(N, /*SBase=*/nullptr, /* SOffset */ nullptr, &Offset,
/* Imm32Only */ true, /* IsBuffer */ true);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index f987b747c0e21..b1ad16af3c35a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -183,13 +183,15 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &SAddr, SDValue &Offset) const;
- bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
- SDValue *Offset, bool Imm32Only = false,
- bool IsBuffer = false) const;
+ bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SBase,
+ SDValue *SOffset, SDValue *Offset,
+ bool Imm32Only = false, bool IsBuffer = false,
+ bool HasSOffset = false) const;
SDValue Expand32BitAddress(SDValue Addr) const;
bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false,
- bool IsBuffer = false) const;
+ bool IsBuffer = false,
+ bool HasSOffset = false) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
@@ -201,6 +203,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
SDValue &Offset) const;
+ bool SelectSMRDPrefetchImm(SDValue Addr, SDValue &SBase,
+ SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index e13c13913d4e8..10dda8a9e1eaa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4211,6 +4211,17 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
Base = GEPI2.SgprParts[0];
*SOffset = OffsetReg;
*Offset = *EncodedImm;
+ if (*Offset >= 0 || !STI.hasSignedSMRDImmOffset())
+ return true;
+
+ // For unbuffered smem loads, it is illegal for the Immediate Offset
+ // to be negative if the resulting (Offset + (M0 or SOffset or zero)
+ // is negative. Handle the case where the Immediate Offset + SOffset
+ // is negative.
+ auto SKnown = KB->getKnownBits(*SOffset);
+ if (*Offset + SKnown.getMinValue().getSExtValue() < 0)
+ return false;
+
return true;
}
}
@@ -4221,7 +4232,13 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
Base = GEPI.SgprParts[0];
*Offset = *EncodedImm;
- return true;
+ if (*Offset >= 0 || !STI.hasSignedSMRDImmOffset())
+ return true;
+ // For unbuffered smem loads, it is illegal for the Immediate Offset to be
+ // negative if the resulting (Offset + (M0 or SOffset or zero is negative.
+ // Handle the case where the Immediate Offset is negative and there is no
+ // SOffset.
+ return false;
}
// SGPR offset is unsigned.
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 8a4a46ce50d1d..25c24c924f0a2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1315,6 +1315,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// of sign-extending.
bool hasGetPCZeroExtension() const { return GFX12Insts; }
+ // \returns true if the target supports signed immediate offset for SMRD
+ // instructions.
+ bool hasSignedSMRDImmOffset() const { return getGeneration() >= GFX9; }
+
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
index c44477273dad0..504f7697a0fcc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
@@ -1234,7 +1234,15 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load (s32), addrspace 4)
+ ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
+ ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
+ ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -1
@@ -1304,7 +1312,15 @@ body: |
; GFX10: liveins: $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load (s32), addrspace 4)
+ ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -524288
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1
+ ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
+ ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -524288
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
index 139f82b3dc9f7..9ee0acf2aa2db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
@@ -88,11 +88,13 @@ entry:
ret void
}
-; GFX9_10 can use a signed immediate byte offset
+; GFX9+ can use a signed immediate byte offset but not without sgpr[offset]
; GCN-LABEL: {{^}}smrd6:
; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4
; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0
-; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], -0x4
+; GFX9_10: s_add_u32 s2, s2, -4
+; GFX9_10: s_addc_u32 s3, s3, -1
+; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0
define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
entry:
%tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -1
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
index 54dc5b8b9d3dd..41d2360dd5e1e 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
@@ -297,9 +297,11 @@ define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr,
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: .LBB5_1: ; %loop
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dword s3, s[0:1], -0x190
; GFX9-NEXT: s_add_i32 s2, s2, -1
+; GFX9-NEXT: s_add_u32 s4, s0, 0xfffffe70
+; GFX9-NEXT: s_addc_u32 s5, s1, -1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_load_dword s3, s[4:5], 0x0
; GFX9-NEXT: s_cmp_lg_u32 s2, 0
; GFX9-NEXT: s_cbranch_scc1 .LBB5_1
; GFX9-NEXT: ; %bb.2: ; %end
@@ -307,10 +309,14 @@ define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr,
;
; GFX12-LABEL: test_sink_smem_offset_neg400:
; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_movk_i32 s4, 0xfe70
+; GFX12-NEXT: s_mov_b32 s5, -1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: .LBB5_1: ; %loop
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_load_b32 s3, s[0:1], -0x190
+; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0
; GFX12-NEXT: s_add_co_i32 s2, s2, -1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_cmp_lg_u32 s2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
index c69207c0472e7..08da89ec0fb22 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
@@ -19,15 +19,31 @@ define amdgpu_ps void @test_s_load_i8(ptr addrspace(4) inreg %in, ptr addrspace(
}
define amdgpu_ps void @test_s_load_i8_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
-; GCN-LABEL: test_s_load_i8_imm:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_load_i8 s0, s[0:1], -0x64
-; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_mov_b32_e32 v2, s0
-; GCN-NEXT: global_store_b32 v[0:1], v2, off
-; GCN-NEXT: s_nop 0
-; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GCN-NEXT: s_endpgm
+; DAG-LABEL: test_s_load_i8_imm:
+; DAG: ; %bb.0:
+; DAG-NEXT: s_movk_i32 s2, 0xff9c
+; DAG-NEXT: s_mov_b32 s3, -1
+; DAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; DAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; DAG-NEXT: s_load_i8 s0, s[0:1], 0x0
+; DAG-NEXT: s_wait_kmcnt 0x0
+; DAG-NEXT: v_mov_b32_e32 v2, s0
+; DAG-NEXT: global_store_b32 v[0:1], v2, off
+; DAG-NEXT: s_nop 0
+; DAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; DAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_s_load_i8_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_add_co_u32 s0, s0, 0xffffff9c
+; GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
+; GISEL-NEXT: s_load_i8 s0, s[0:1], 0x0
+; GISEL-NEXT: s_wait_kmcnt 0x0
+; GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
%gep = getelementptr i8, ptr addrspace(4) %in, i64 -100
%ld = load i8, ptr addrspace(4) %gep
%sext = sext i8 %ld to i32
@@ -195,15 +211,31 @@ define amdgpu_ps void @test_s_load_i16(ptr addrspace(4) inreg %in, ptr addrspace
}
define amdgpu_ps void @test_s_load_i16_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
-; GCN-LABEL: test_s_load_i16_imm:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_load_i16 s0, s[0:1], -0xc8
-; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_mov_b32_e32 v2, s0
-; GCN-NEXT: global_store_b32 v[0:1], v2, off
-; GCN-NEXT: s_nop 0
-; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GCN-NEXT: s_endpgm
+; DAG-LABEL: test_s_load_i16_imm:
+; DAG: ; %bb.0:
+; DAG-NEXT: s_movk_i32 s2, 0xff38
+; DAG-NEXT: s_mov_b32 s3, -1
+; DAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; DAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; DAG-NEXT: s_load_i16 s0, s[0:1], 0x0
+; DAG-NEXT: s_wait_kmcnt 0x0
+; DAG-NEXT: v_mov_b32_e32 v2, s0
+; DAG-NEXT: global_store_b32 v[0:1], v2, off
+; DAG-NEXT: s_nop 0
+; DAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; DAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_s_load_i16_imm:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_add_co_u32 s0, s0, 0xffffff38
+; GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
+; GISEL-NEXT: s_load_i16 s0, s[0:1], 0x0
+; GISEL-NEXT: s_wait_kmcnt 0x0
+; GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GISEL-NEXT: global_store_b32 v[0:1], v2, off
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GISEL-NEXT: s_endpgm
%gep = getelementptr i16, ptr addrspace(4) %in, i64 -100
%ld = load i16, ptr addrspace(4) %gep
%sext = sext i16 %ld to i32
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
index d9cbbc11f9a73..2f7e91faa4184 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
@@ -157,12 +157,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4096(ptr addrspace(1) inr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
;
-; GFX12-LABEL: global_load_saddr_i8_offset_neg4096:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x1000
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xf000
+; GFX12-SDAG-NEXT: s_mov_b32 s1, -1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
+; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg4096:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xfffff000
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1
+; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096
%load = load i8, ptr addrspace(1) %gep0
%zext = zext i8 %load to i32
@@ -198,12 +211,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4097(ptr addrspace(1) inr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
;
-; GFX12-LABEL: global_load_saddr_i8_offset_neg4097:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x1001
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xefff
+; GFX12-SDAG-NEXT: s_mov_b32 s1, -1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
+; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg4097:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xffffefff
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1
+; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097
%load = load i8, ptr addrspace(1) %gep0
%zext = zext i8 %load to i32
@@ -239,12 +265,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4098(ptr addrspace(1) inr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
;
-; GFX12-LABEL: global_load_saddr_i8_offset_neg4098:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x1002
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xeffe
+; GFX12-SDAG-NEXT: s_mov_b32 s1, -1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
+; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg4098:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xffffeffe
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1
+; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098
%load = load i8, ptr addrspace(1) %gep0
%zext = zext i8 %load to i32
@@ -376,12 +415,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2048(ptr addrspace(1) inr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
;
-; GFX12-LABEL: global_load_saddr_i8_offset_neg2048:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x800
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xf800
+; GFX12-SDAG-NEXT: s_mov_b32 s1, -1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
+; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg2048:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xfffff800
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1
+; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048
%load = load i8, ptr addrspace(1) %gep0
%zext = zext i8 %load to i32
@@ -413,12 +465,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2049(ptr addrspace(1) inr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
;
-; GFX12-LABEL: global_load_saddr_i8_offset_neg2049:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x801
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xf7ff
+; GFX12-SDAG-NEXT: s_mov_b32 s1, -1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
+; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg2049:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xfffff7ff
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1
+; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049
%load = load i8, ptr addrspace(1) %gep0
%zext = zext i8 %load to i32
@@ -450,12 +515,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2050(ptr addrspace(1) inr
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
;
-; GFX12-LABEL: global_load_saddr_i8_offset_neg2050:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x802
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xf7fe
+; GFX12-SDAG-NEXT: s_mov_b32 s1, -1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
+; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg2050:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xfffff7fe
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1
+; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050
%load = load i8, ptr addrspace(1) %gep0
%zext = zext i8 %load to i32
@@ -525,12 +603,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_0xFFFFFF(ptr addrspace(1) in
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
;
-; GFX12-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x800000
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_mov_b32 s0, 0xff800000
+; GFX12-SDAG-NEXT: s_mov_b32 s1, -1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
+; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xff800000
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1
+; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608
%load = load i8, ptr addrspace(1) %gep0
%zext = zext i8 %load to i32
@@ -1721,12 +1812,29 @@ define amdgpu_ps float @global_load_saddr_i8_zext_uniform_offset_immoffset(ptr a
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
;
-; GFX12-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], s4 offset:-0x18
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-NEXT: ; return to shader part epilog
+; GFX12-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_mov_b32 s5, 0
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[4:5]
+; GFX12-SDAG-NEXT: s_movk_i32 s2, 0xffe8
+; GFX12-SDAG-NEXT: s_mov_b32 s3, -1
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, s4
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, 0
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xffffffe8
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
+; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
index 77fd0bc058aca..2b517736ecff3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
@@ -53,14 +53,25 @@ entry:
}
define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
-; GFX12-LABEL: prefetch_data_sgpr_min_offset:
-; GFX12: ; %bb.0: ; %entry
-; GFX12-NEXT: s_prefetch_data s[0:1], -0x800000, null, 0
-; GFX12-NEXT: s_endpgm
+; GFX12-SDAG-LABEL: prefetch_data_sgpr_min_offset:
+; GFX12-SDAG: ; %bb.0: ; %entry
+; GFX12-SDAG-NEXT: s_mov_b32 s2, 0xff800000
+; GFX12-SDAG-NEXT: s_mov_b32 s3, -1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
+; GFX12-SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_data_sgpr_min_offset:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: prefetch_data_sgpr_min_offset:
+; GFX12-GISEL: ; %bb.0: ; %entry
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
+; GFX12-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
+; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
@@ -215,14 +226,25 @@ entry:
}
define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
-; GFX12-LABEL: prefetch_inst_sgpr_min_offset:
-; GFX12: ; %bb.0: ; %entry
-; GFX12-NEXT: s_prefetch_inst s[0:1], -0x800000, null, 0
-; GFX12-NEXT: s_endpgm
+; GFX12-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
+; GFX12-SDAG: ; %bb.0: ; %entry
+; GFX12-SDAG-NEXT: s_mov_b32 s2, 0xff800000
+; GFX12-SDAG-NEXT: s_mov_b32 s3, -1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
+; GFX12-SDAG-NEXT: s_endpgm
;
; GFX11-LABEL: prefetch_inst_sgpr_min_offset:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
+; GFX12-GISEL: ; %bb.0: ; %entry
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
+; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
+; GFX12-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
+; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index 4ce9260b8d53d..52db7fea08e05 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -88,11 +88,13 @@ entry:
ret void
}
-; GFX9_10 can use a signed immediate byte offset
+; GFX9+ can use a signed immediate byte offset but not without sgpr[offset]
; GCN-LABEL: {{^}}smrd6:
; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4
; SICIVI: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0
-; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, -0x4
+; GFX9_10: s_add_u32 s2, s2, -4
+; GFX9_10: s_addc_u32 s3, s3, -1
+; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0
define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
entry:
%tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -1
>From c369787b0890965c023955f8f28333f3d3c8dd2e Mon Sep 17 00:00:00 2001
From: Vang Thao <Vang.Thao at amd.com>
Date: Wed, 8 May 2024 15:27:13 -0400
Subject: [PATCH 2/3] Restructured some parts of the changes
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 81 +++++++++----------
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 12 +--
.../AMDGPU/AMDGPUInstructionSelector.cpp | 17 ++--
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 19 +++--
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 4 +-
5 files changed, 66 insertions(+), 67 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index bf65244255f34..8310b034ec429 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1984,10 +1984,10 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
// not null) offset. If Imm32Only is true, match only 32-bit immediate
// offsets available on CI.
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
- SDValue *SBase, SDValue *SOffset,
- SDValue *Offset, bool Imm32Only,
- bool IsBuffer,
- bool HasSOffset) const {
+ SDValue *SOffset, SDValue *Offset,
+ bool Imm32Only, bool IsBuffer,
+ bool HasSOffset,
+ int64_t ImmOffset) const {
assert((!SOffset || !Offset) &&
"Cannot match both soffset and offset at the same time!");
@@ -1995,18 +1995,28 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
if (!C) {
if (!SOffset)
return false;
+ bool Changed = false;
if (ByteOffsetNode.getValueType().isScalarInteger() &&
ByteOffsetNode.getValueType().getSizeInBits() == 32) {
*SOffset = ByteOffsetNode;
- return true;
- }
- if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
+ Changed = true;
+ } else if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
*SOffset = ByteOffsetNode.getOperand(0);
- return true;
+ Changed = true;
}
}
- return false;
+ // For unbuffered smem loads, it is illegal for the Immediate Offset to be
+ // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
+ // Handle the case where the Immediate Offset + SOffset is negative.
+ if (AMDGPU::hasSMRDSignedImmOffset(*Subtarget) && Changed &&
+ !IsBuffer & !Imm32Only && ImmOffset < 0) {
+ KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);
+ if (ImmOffset + SKnown.getMinValue().getSExtValue() < 0)
+ return false;
+ }
+
+ return Changed;
}
SDLoc SL(ByteOffsetNode);
@@ -2014,18 +2024,11 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
// GFX9 and GFX10 have signed byte immediate offsets. The immediate
// offset for S_BUFFER instructions is unsigned.
int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
- std::optional<int64_t> EncodedOffset =
- AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);
+ std::optional<int64_t> EncodedOffset = AMDGPU::getSMRDEncodedOffset(
+ *Subtarget, ByteOffset, IsBuffer, HasSOffset);
if (EncodedOffset && Offset && !Imm32Only) {
*Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
- if (EncodedOffset >= 0 || IsBuffer || HasSOffset ||
- !Subtarget->hasSignedSMRDImmOffset())
- return true;
- // For unbuffered smem loads, it is illegal for the Immediate Offset to be
- // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
- // Handle the case where the Immediate Offset is negative and there is no
- // SOffset.
- return false;
+ return true;
}
// SGPR and literal offsets are unsigned.
@@ -2082,33 +2085,21 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
SDValue *SOffset, SDValue *Offset,
bool Imm32Only, bool IsBuffer,
- bool HasSOffset) const {
+ bool HasSOffset,
+ int64_t ImmOffset) const {
if (SOffset && Offset) {
assert(!Imm32Only && !IsBuffer);
SDValue B;
- if (!SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false, true))
- return false;
- if (!SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false, true))
+ if (!SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false, true))
return false;
- if (IsBuffer || Imm32Only || !Subtarget->hasSignedSMRDImmOffset())
- return true;
+ int64_t ImmOff = 0;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset))
+ ImmOff = C->getSExtValue();
- // For unbuffered smem loads, it is illegal for the Immediate Offset to be
- // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
- // Handle the case where the Immediate Offset + SOffset is negative.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset)) {
- int64_t ByteOffset = C->getSExtValue();
- if (ByteOffset >= 0)
- return true;
-
- KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);
- if (ByteOffset + SKnown.getMinValue().getSExtValue() < 0)
- return false;
- }
-
- return true;
+ return SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false, true,
+ ImmOff);
}
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
@@ -2128,13 +2119,13 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
if (!N0 || !N1)
return false;
- if (SelectSMRDOffset(N1, &N0, SOffset, Offset, Imm32Only, IsBuffer,
- HasSOffset)) {
+ if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
+ ImmOffset)) {
SBase = N0;
return true;
}
- if (SelectSMRDOffset(N0, &N1, SOffset, Offset, Imm32Only, IsBuffer,
- HasSOffset)) {
+ if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
+ ImmOffset)) {
SBase = N1;
return true;
}
@@ -2182,14 +2173,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
}
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
- return SelectSMRDOffset(N, /*SBase=*/nullptr, /* SOffset */ nullptr, &Offset,
+ return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
/* Imm32Only */ false, /* IsBuffer */ true);
}
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
SDValue &Offset) const {
assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
- return SelectSMRDOffset(N, /*SBase=*/nullptr, /* SOffset */ nullptr, &Offset,
+ return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
/* Imm32Only */ true, /* IsBuffer */ true);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index b1ad16af3c35a..d8bb2a9d11e4f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -183,15 +183,15 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &SAddr, SDValue &Offset) const;
- bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SBase,
- SDValue *SOffset, SDValue *Offset,
- bool Imm32Only = false, bool IsBuffer = false,
- bool HasSOffset = false) const;
+ bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false,
+ bool IsBuffer = false, bool HasSOffset = false,
+ int64_t ImmOffset = 0) const;
SDValue Expand32BitAddress(SDValue Addr) const;
bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false,
- bool IsBuffer = false,
- bool HasSOffset = false) const;
+ bool IsBuffer = false, bool HasSOffset = false,
+ int64_t ImmOffset = 0) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 10dda8a9e1eaa..9f238795cbb7c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4198,10 +4198,11 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
return false;
const GEPInfo &GEPI = AddrInfo[0];
- std::optional<int64_t> EncodedImm =
- AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, false);
+ std::optional<int64_t> EncodedImm;
if (SOffset && Offset) {
+ EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false,
+ /*HasSOffset=*/true);
if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
AddrInfo.size() > 1) {
const GEPInfo &GEPI2 = AddrInfo[1];
@@ -4211,7 +4212,7 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
Base = GEPI2.SgprParts[0];
*SOffset = OffsetReg;
*Offset = *EncodedImm;
- if (*Offset >= 0 || !STI.hasSignedSMRDImmOffset())
+ if (*Offset >= 0 || !AMDGPU::hasSMRDSignedImmOffset(STI))
return true;
// For unbuffered smem loads, it is illegal for the Immediate Offset
@@ -4229,16 +4230,12 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
return false;
}
+ EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false,
+ /*HasSOffset=*/false);
if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
Base = GEPI.SgprParts[0];
*Offset = *EncodedImm;
- if (*Offset >= 0 || !STI.hasSignedSMRDImmOffset())
- return true;
- // For unbuffered smem loads, it is illegal for the Immediate Offset to be
- // negative if the resulting (Offset + (M0 or SOffset or zero is negative.
- // Handle the case where the Immediate Offset is negative and there is no
- // SOffset.
- return false;
+ return true;
}
// SGPR offset is unsigned.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4e0074451aa58..2f10e8b6e9935 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -158,6 +158,12 @@ namespace llvm {
namespace AMDGPU {
+/// \returns true if the target supports signed immediate offset for SMRD
+/// instructions.
+bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
+ return isGFX9Plus(ST);
+}
+
/// \returns True if \p STI is AMDHSA.
bool isHsaAbi(const MCSubtargetInfo &STI) {
return STI.getTargetTriple().getOS() == Triple::AMDHSA;
@@ -2874,10 +2880,6 @@ static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
return isGCN3Encoding(ST) || isGFX10Plus(ST);
}
-static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
- return isGFX9Plus(ST);
-}
-
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset) {
if (isGFX12Plus(ST))
@@ -2912,7 +2914,14 @@ uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
}
std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
- int64_t ByteOffset, bool IsBuffer) {
+ int64_t ByteOffset, bool IsBuffer,
+ bool HasSOffset) {
+ // For unbuffered smem loads, it is illegal for the Immediate Offset to be
+ // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
+ // Handle case where SOffset is not present.
+ if (!IsBuffer && hasSMRDSignedImmOffset(ST) && !HasSOffset && ByteOffset < 0)
+ return std::nullopt;
+
if (isGFX12Plus(ST)) // 24 bit signed offsets
return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
: std::nullopt;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 943588fe701cc..a326ac927ef6c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1300,6 +1300,7 @@ bool hasVOPD(const MCSubtargetInfo &STI);
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
unsigned hasKernargPreload(const MCSubtargetInfo &STI);
+bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
/// Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
@@ -1472,7 +1473,8 @@ uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
/// S_LOAD instructions have a signed offset, on other subtargets it is
/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
- int64_t ByteOffset, bool IsBuffer);
+ int64_t ByteOffset, bool IsBuffer,
+ bool HasSOffset = false);
/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
/// instruction. This is only useful on CI.s
>From 7ce94de3ba1b83fd8141bba00f4c5e597602c059 Mon Sep 17 00:00:00 2001
From: Vang Thao <Vang.Thao at amd.com>
Date: Tue, 28 May 2024 15:09:49 -0400
Subject: [PATCH 3/3] Restructure SOffset check in SelectSMRDBaseOffset
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 40 ++++++++++++-------
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 +
2 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8310b034ec429..08804e4a86358 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1980,6 +1980,23 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return true;
}
+// For unbuffered smem loads, it is illegal for the Immediate Offset to be
+// negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
+// Handle the case where the Immediate Offset + SOffset is negative.
+bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset,
+ bool Imm32Only,
+ bool IsBuffer,
+ int64_t ImmOffset) const {
+ if (AMDGPU::hasSMRDSignedImmOffset(*Subtarget) && !IsBuffer & !Imm32Only &&
+ ImmOffset < 0) {
+ KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);
+ if (ImmOffset + SKnown.getMinValue().getSExtValue() < 0)
+ return false;
+ }
+
+ return true;
+}
+
// Match an immediate (if Offset is not null) or an SGPR (if SOffset is
// not null) offset. If Imm32Only is true, match only 32-bit immediate
// offsets available on CI.
@@ -1995,28 +2012,21 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
if (!C) {
if (!SOffset)
return false;
- bool Changed = false;
+
if (ByteOffsetNode.getValueType().isScalarInteger() &&
ByteOffsetNode.getValueType().getSizeInBits() == 32) {
*SOffset = ByteOffsetNode;
- Changed = true;
- } else if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
+ return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
+ ImmOffset);
+ }
+ if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
*SOffset = ByteOffsetNode.getOperand(0);
- Changed = true;
+ return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
+ ImmOffset);
}
}
- // For unbuffered smem loads, it is illegal for the Immediate Offset to be
- // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
- // Handle the case where the Immediate Offset + SOffset is negative.
- if (AMDGPU::hasSMRDSignedImmOffset(*Subtarget) && Changed &&
- !IsBuffer & !Imm32Only && ImmOffset < 0) {
- KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);
- if (ImmOffset + SKnown.getMinValue().getSExtValue() < 0)
- return false;
- }
-
- return Changed;
+ return false;
}
SDLoc SL(ByteOffsetNode);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index d8bb2a9d11e4f..d145511ccaae4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -143,6 +143,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool isFlatScratchBaseLegal(SDValue Addr) const;
bool isFlatScratchBaseLegalSV(SDValue Addr) const;
bool isFlatScratchBaseLegalSVImm(SDValue Addr) const;
+ bool isSOffsetLegalWithImmOffset(SDValue *SOffset, bool Imm32Only,
+ bool IsBuffer, int64_t ImmOffset = 0) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
More information about the llvm-commits
mailing list