[PATCH] D91336: AMDGPU/GlobalISel: Fix negative offset folding for buffer_load
Petar Avramovic via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 12 05:58:21 PST 2020
Petar.Avramovic updated this revision to Diff 304806.
Petar.Avramovic added a comment.
Pre-commit the test.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D91336/new/
https://reviews.llvm.org/D91336
Files:
llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
@@ -62,8 +62,9 @@
; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
- ; FAST: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4)
; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
; GREEDY-LABEL: name: s_buffer_load_negative_offset
; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
@@ -72,8 +73,9 @@
; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
- ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4)
; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $vgpr0
Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1346,7 +1346,7 @@
}
Register Base;
- unsigned Offset;
+ int64_t Offset;
std::tie(Base, Offset) =
AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset);
@@ -1373,7 +1373,8 @@
}
// Handle the variable sgpr + vgpr case.
- if (MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI)) {
+ MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI);
+ if (Add && Offset >= 0) {
Register Src0 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(1).getReg());
Register Src1 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(2).getReg());
Index: llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
+++ llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
@@ -20,7 +20,7 @@
namespace AMDGPU {
/// Returns base register and constant offset.
-std::pair<Register, unsigned>
+std::pair<Register, int64_t>
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg);
bool isLegalVOP3PShuffleMask(ArrayRef<int> Mask);
Index: llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -13,14 +13,14 @@
using namespace llvm;
using namespace MIPatternMatch;
-std::pair<Register, unsigned>
+std::pair<Register, int64_t>
AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (!Def)
return std::make_pair(Reg, 0);
if (Def->getOpcode() == TargetOpcode::G_CONSTANT) {
- unsigned Offset;
+ int64_t Offset;
const MachineOperand &Op = Def->getOperand(1);
if (Op.isImm())
Offset = Op.getImm();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D91336.304806.patch
Type: text/x-patch
Size: 4171 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201112/5dea1df5/attachment.bin>
More information about the llvm-commits
mailing list