[PATCH] D91336: AMDGPU/GlobalISel: Fix negative offset folding for buffer_load
Petar Avramovic via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 19 06:26:48 PST 2020
Petar.Avramovic updated this revision to Diff 306394.
Petar.Avramovic edited the summary of this revision.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D91336/new/
https://reviews.llvm.org/D91336
Files:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
@@ -62,8 +62,9 @@
; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
- ; FAST: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4)
; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
; GREEDY-LABEL: name: s_buffer_load_negative_offset
; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
@@ -72,8 +73,9 @@
; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
- ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4)
; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $vgpr0
Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1321,6 +1321,8 @@
return Def->getOperand(0).getReg();
}
+static bool signBitNotSet(unsigned Val) { return (Val & 0x80000000) == 0; }
+
// Analyze a combined offset from an llvm.amdgcn.s.buffer intrinsic and store
// the three offsets (voffset, soffset and instoffset)
static unsigned setBufferOffsets(MachineIRBuilder &B,
@@ -1352,8 +1354,9 @@
AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset);
uint32_t SOffset, ImmOffset;
- if (Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
- &RBI.Subtarget, Alignment)) {
+ if (Offset != 0 && signBitNotSet(Offset) &&
+ AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset, &RBI.Subtarget,
+ Alignment)) {
if (RBI.getRegBank(Base, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
VOffsetReg = Base;
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
@@ -1373,7 +1376,8 @@
}
// Handle the variable sgpr + vgpr case.
- if (MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI)) {
+ MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI);
+ if (Add && signBitNotSet(Offset)) {
Register Src0 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(1).getReg());
Register Src1 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(2).getReg());
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D91336.306394.patch
Type: text/x-patch
Size: 3689 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201119/eaef37db/attachment.bin>
More information about the llvm-commits
mailing list