[llvm] 8110fcc - AMDGPU/GlobalISel: Fix negative offset folding for buffer_load
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 27 05:46:57 PDT 2021
Author: Petar Avramovic
Date: 2021-04-27T14:45:22+02:00
New Revision: 8110fcc8fc56cb5d2fc0cbab882ddfdae2536201
URL: https://github.com/llvm/llvm-project/commit/8110fcc8fc56cb5d2fc0cbab882ddfdae2536201
DIFF: https://github.com/llvm/llvm-project/commit/8110fcc8fc56cb5d2fc0cbab882ddfdae2536201.diff
LOG: AMDGPU/GlobalISel: Fix negative offset folding for buffer_load
Buffer_load does unsigned offset calculations. Don't fold
operands of 32-bit add that are likely to cause unsigned add
overflow (common case is when one of the operands is negative).
Differential Revision: https://reviews.llvm.org/D91336
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 88b2f40ba820..86686ce6660a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1330,8 +1330,8 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset);
uint32_t SOffset, ImmOffset;
- if (Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
- &RBI.Subtarget, Alignment)) {
+ if ((int)Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
+ &RBI.Subtarget, Alignment)) {
if (RBI.getRegBank(Base, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
VOffsetReg = Base;
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
@@ -1351,7 +1351,8 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
}
// Handle the variable sgpr + vgpr case.
- if (MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI)) {
+ MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI);
+ if (Add && (int)Offset >= 0) {
Register Src0 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(1).getReg());
Register Src1 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(2).getReg());
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
index 8fa4d347da67..dba7c36d7a97 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir
@@ -62,8 +62,9 @@ body: |
; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
- ; FAST: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4)
; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
; GREEDY-LABEL: name: s_buffer_load_negative_offset
; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0
@@ -72,8 +73,9 @@ body: |
; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60
; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]]
- ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4)
+ ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4)
; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $vgpr0
More information about the llvm-commits
mailing list