[PATCH] D121411: [AMDGPU] Support v_mov_b64 in dpp combine
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 11 11:58:28 PST 2022
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG31f215ab0c9f: [AMDGPU] Support v_mov_b64 in dpp combine (authored by rampitec).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D121411/new/
https://reviews.llvm.org/D121411
Files:
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
Index: llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -1,4 +1,5 @@
# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
+# RUN: llc -march=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN
---
# GCN-LABEL: name: dpp64_old_impdef
Index: llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
+++ llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A
+; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10
; GCN-LABEL: {{^}}dpp64_ceil:
@@ -50,6 +51,7 @@
; GCN-LABEL: {{^}}dpp64_div:
; GCN: global_load_dwordx2 [[V:v\[[0-9:]+\]]],
+; DPPMOV64: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; GFX90A-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; GFX10-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; GCN: v_div_scale_f64
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2157,6 +2157,13 @@
SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
+ if (ST.hasMovB64() &&
+ AMDGPU::isLegal64BitDPPControl(
+ getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
+ MI.setDesc(get(AMDGPU::V_MOV_B64_dpp));
+ return std::make_pair(&MI, nullptr);
+ }
+
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
MachineFunction *MF = MBB.getParent();
Index: llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -167,7 +167,9 @@
return nullptr;
case AMDGPU::COPY:
case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B64_PSEUDO: {
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_MOV_B64_e32:
+ case AMDGPU::V_MOV_B64_e64: {
auto &Op1 = Def->getOperand(1);
if (Op1.isImm())
return &Op1;
@@ -183,6 +185,7 @@
bool CombBCZ,
bool IsShrinkable) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
+ MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
auto OrigOp = OrigMI.getOpcode();
@@ -383,6 +386,7 @@
bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
+ MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
@@ -399,7 +403,8 @@
return false;
}
- if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
+ if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
+ MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
assert(DppCtrl && DppCtrl->isImm());
if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
@@ -616,7 +621,8 @@
if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
- } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
+ } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
+ MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
if (ST->has64BitDPP() && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D121411.414720.patch
Type: text/x-patch
Size: 4370 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220311/4a624b88/attachment.bin>
More information about the llvm-commits
mailing list