[llvm] 8cd79bc - [AMDGPU][GlobalISel] Support register offsets for SMRDs.
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 5 05:58:24 PDT 2022
Author: Ivan Kosarev
Date: 2022-07-05T13:41:06+01:00
New Revision: 8cd79bc12c366de01cedb88e2e2fc026619e64ea
URL: https://github.com/llvm/llvm-project/commit/8cd79bc12c366de01cedb88e2e2fc026619e64ea
DIFF: https://github.com/llvm/llvm-project/commit/8cd79bc12c366de01cedb88e2e2fc026619e64ea.diff
LOG: [AMDGPU][GlobalISel] Support register offsets for SMRDs.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D128836
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3f242fdb6d8e..5b6fcb78bcbe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3235,7 +3235,7 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
// Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0)
const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
- return false;
+ return Register();
if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
return Def->getOperand(1).getReg();
@@ -3851,27 +3851,36 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
getAddrModeInfo(*MI, *MRI, AddrInfo);
// FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
- // then we can select all ptr + 32-bit offsets not just immediate offsets.
- if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
+ // then we can select all ptr + 32-bit offsets.
+ if (AddrInfo.empty())
return None;
const GEPInfo &GEPInfo = AddrInfo[0];
+ Register PtrReg = GEPInfo.SgprParts[0];
+
// SGPR offset is unsigned.
- if (!GEPInfo.Imm || GEPInfo.Imm < 0 || !isUInt<32>(GEPInfo.Imm))
- return None;
+ if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) &&
+ GEPInfo.Imm != 0) {
+ // If we make it this far we have a load with an 32-bit immediate offset.
+ // It is OK to select this using a sgpr offset, because we have already
+ // failed trying to select this load into one of the _IMM variants since
+ // the _IMM Patterns are considered before the _SGPR patterns.
+ Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
+ .addImm(GEPInfo.Imm);
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
+ }
- // If we make it this far we have a load with an 32-bit immediate offset.
- // It is OK to select this using a sgpr offset, because we have already
- // failed trying to select this load into one of the _IMM variants since
- // the _IMM Patterns are considered before the _SGPR patterns.
- Register PtrReg = GEPInfo.SgprParts[0];
- Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
- .addImm(GEPInfo.Imm);
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
- }};
+ if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) {
+ if (Register OffsetReg =
+ matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) {
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
+ }
+ }
+
+ return None;
}
std::pair<Register, int>
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
index 03a4c8a6fadc..530f42dff2aa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
@@ -6,6 +6,7 @@
define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
define amdgpu_kernel void @smrd_wide() { ret void }
define amdgpu_kernel void @constant_address_positive() { ret void }
+ define amdgpu_kernel void @smrd_sgpr() { ret void }
...
---
@@ -210,3 +211,24 @@ body: |
%3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
S_ENDPGM 0, implicit %3
...
+
+---
+
+# Test a load with a register offset.
+# GCN-LABEL: name: smrd_sgpr{{$}}
+# GCN: S_LOAD_DWORD_SGPR %0, %1, 0
+
+name: smrd_sgpr
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ %1:sgpr(s32) = COPY $sgpr2
+ %2:sgpr(s64) = G_ZEXT %1:sgpr(s32)
+ %4:sgpr(p4) = G_PTR_ADD %0, %2
+ %5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
+ S_ENDPGM 0, implicit %5
+...
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
index 6523621098a8..a4ff0b449dbd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
@@ -2,10 +2,14 @@
; from a register.
; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -global-isel -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GISEL %s
; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.13, addrspace 4)
+; GISEL: $[[OFFSET:.*]] = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
+; GISEL: S_LOAD_DWORDX4_SGPR killed renamable {{.*}}, killed renamable $[[OFFSET]], 0 :: (invariant load (<4 x s32>) from {{.*}}, addrspace 4)
+
define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
.entry:
%5 = call i64 @llvm.amdgcn.s.getpc() #3
More information about the llvm-commits
mailing list