[llvm-branch-commits] [llvm] ae8f4b2 - [AMDGPU] Folding of FI operand with flat scratch
Stanislav Mekhanoshin via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Dec 22 10:52:20 PST 2020
Author: Stanislav Mekhanoshin
Date: 2020-12-22T10:48:04-08:00
New Revision: ae8f4b2178c46da1f10eb9279c9b44fab8b85417
URL: https://github.com/llvm/llvm-project/commit/ae8f4b2178c46da1f10eb9279c9b44fab8b85417
DIFF: https://github.com/llvm/llvm-project/commit/ae8f4b2178c46da1f10eb9279c9b44fab8b85417.diff
LOG: [AMDGPU] Folding of FI operand with flat scratch
Differential Revision: https://reviews.llvm.org/D93501
Added:
llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index bfba432848d4..06cce54e540c 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -172,9 +172,23 @@ static bool frameIndexMayFold(const SIInstrInfo *TII,
const MachineInstr &UseMI,
int OpNo,
const MachineOperand &OpToFold) {
- return OpToFold.isFI() &&
- TII->isMUBUF(UseMI) &&
- OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr);
+ if (!OpToFold.isFI())
+ return false;
+
+ if (TII->isMUBUF(UseMI))
+ return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
+ AMDGPU::OpName::vaddr);
+ if (!TII->isFLATScratch(UseMI))
+ return false;
+
+ int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
+ AMDGPU::OpName::saddr);
+ if (OpNo == SIdx)
+ return true;
+
+ int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
+ AMDGPU::OpName::vaddr);
+ return OpNo == VIdx && SIdx == -1;
}
FunctionPass *llvm::createSIFoldOperandsPass() {
@@ -631,25 +645,36 @@ void SIFoldOperands::foldOperand(
// Sanity check that this is a stack access.
// FIXME: Should probably use stack pseudos before frame lowering.
- if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
- MFI->getScratchRSrcReg())
- return;
+ if (TII->isMUBUF(*UseMI)) {
+ if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
+ MFI->getScratchRSrcReg())
+ return;
- // Ensure this is either relative to the current frame or the current wave.
- MachineOperand &SOff =
- *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
- if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) &&
- (!SOff.isImm() || SOff.getImm() != 0))
- return;
+ // Ensure this is either relative to the current frame or the current
+ // wave.
+ MachineOperand &SOff =
+ *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
+ if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) &&
+ (!SOff.isImm() || SOff.getImm() != 0))
+ return;
+
+ // If this is relative to the current wave, update it to be relative to
+ // the current frame.
+ if (SOff.isImm())
+ SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false);
+ }
// A frame index will resolve to a positive constant, so it should always be
// safe to fold the addressing mode, even pre-GFX9.
UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
- // If this is relative to the current wave, update it to be relative to the
- // current frame.
- if (SOff.isImm())
- SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false);
+ if (TII->isFLATScratch(*UseMI) &&
+ AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
+ AMDGPU::OpName::vaddr) != -1) {
+ unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
+ UseMI->setDesc(TII->get(NewOpc));
+ }
+
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 4625cefa1e3e..75aedee1ec6b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1184,6 +1184,9 @@ namespace AMDGPU {
LLVM_READONLY
int getFlatScratchInstSTfromSS(uint16_t Opcode);
+ LLVM_READONLY
+ int getFlatScratchInstSSfromSV(uint16_t Opcode);
+
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 746d08b8ce0e..e48138e56d71 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2524,6 +2524,13 @@ def getFlatScratchInstSTfromSS : InstrMapping {
let ValueCols = [["ST"]];
}
+def getFlatScratchInstSSfromSV : InstrMapping {
+ let FilterClass = "FlatScratchInst";
+ let RowFields = ["SVOp"];
+ let ColFields = ["Mode"];
+ let KeyCol = ["SV"];
+ let ValueCols = [["SS"]];
+}
include "SIInstructions.td"
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ab203c44e022..c91a59003319 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1498,6 +1498,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int64_t Offset = FrameInfo.getObjectOffset(Index);
if (ST.enableFlatScratch()) {
if (TII->isFLATScratch(*MI)) {
+ assert((int16_t)FIOperandNum ==
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::saddr));
+
// The offset is always swizzled, just replace it
if (FrameReg)
FIOp.ChangeToRegister(FrameReg, false);
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
new file mode 100644
index 000000000000..37cec99ae0ac
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
@@ -0,0 +1,88 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-enable-flat-scratch -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name: test_fold_fi_scratch_load_vgpr
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ ; GCN-LABEL: name: test_fold_fi_scratch_load_vgpr
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; GCN: S_ENDPGM 0
+ %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ %1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ S_ENDPGM 0
+
+...
+
+---
+name: test_fold_fi_scratch_load_sgpr
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ ; GCN-LABEL: name: test_fold_fi_scratch_load_sgpr
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
+ ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ ; GCN: S_ENDPGM 0
+ %0:sgpr_32 = S_MOV_B32 %stack.0
+ %1:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %0:sgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
+ S_ENDPGM 0
+
+...
+
+---
+name: test_fold_fi_scratch_store_vgpr
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ ; GCN-LABEL: name: test_fold_fi_scratch_store_vgpr
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: S_ENDPGM 0
+ %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ %1:vgpr_32 = IMPLICIT_DEF
+ SCRATCH_STORE_DWORD %1:vgpr_32, %0:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ S_ENDPGM 0
+
+...
+
+---
+name: test_no_fold_fi_scratch_store_vgpr
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ ; GCN-LABEL: name: test_no_fold_fi_scratch_store_vgpr
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: SCRATCH_STORE_DWORD [[V_MOV_B32_e32_]], [[DEF]], 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: S_ENDPGM 0
+ %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ %1:vgpr_32 = IMPLICIT_DEF
+ SCRATCH_STORE_DWORD %0:vgpr_32, %1:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ S_ENDPGM 0
+
+...
+
+---
+name: test_fold_fi_scratch_store_sgpr
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ ; GCN-LABEL: name: test_fold_fi_scratch_store_sgpr
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: S_ENDPGM 0
+ %0:sgpr_32 = S_MOV_B32 %stack.0
+ %1:vgpr_32 = IMPLICIT_DEF
+ SCRATCH_STORE_DWORD_SADDR %1:vgpr_32, %0:sgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index 2c18e724278f..fcd37840002a 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -166,7 +166,7 @@ define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* b
; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
; GFX9-MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4{{$}}
-; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, [[SP]], off offset:4{{$}}
+; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4{{$}}
; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, i32 %arg2) #0 {
More information about the llvm-branch-commits
mailing list