[llvm] 611959f - [AMDGPU] Fixed v_swap_b32 match
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 21 10:14:34 PDT 2020
Author: Stanislav Mekhanoshin
Date: 2020-10-21T10:14:24-07:00
New Revision: 611959f004d78f99e45fdc940a7a4322b85d04d9
URL: https://github.com/llvm/llvm-project/commit/611959f004d78f99e45fdc940a7a4322b85d04d9
DIFF: https://github.com/llvm/llvm-project/commit/611959f004d78f99e45fdc940a7a4322b85d04d9.diff
LOG: [AMDGPU] Fixed v_swap_b32 match
1. Fixed liveness issue with implicit kills.
2. Fixed potential problem with an indirect mov.
Fixes: SWDEV-256848
Differential Revision: https://reviews.llvm.org/D89599
Added:
Modified:
llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 94ade095e7a9..29cb1001fcad 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -437,6 +437,22 @@ getSubRegForIndex(Register Reg, unsigned Sub, unsigned I,
return TargetInstrInfo::RegSubRegPair(Reg, Sub);
}
+static void dropInstructionKeepingImpDefs(MachineInstr &MI,
+ const SIInstrInfo *TII) {
+ for (unsigned i = MI.getDesc().getNumOperands() +
+ MI.getDesc().getNumImplicitUses() +
+ MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &Op = MI.getOperand(i);
+ if (!Op.isDef())
+ continue;
+ BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+ TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
+ }
+
+ MI.eraseFromParent();
+}
+
// Match:
// mov t, x
// mov x, y
@@ -476,18 +492,25 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
if (!TRI.isVGPR(MRI, X))
return nullptr;
+ if (MovT.hasRegisterImplicitUseOperand(AMDGPU::M0))
+ return nullptr;
+
const unsigned SearchLimit = 16;
unsigned Count = 0;
+ bool KilledT = false;
for (auto Iter = std::next(MovT.getIterator()),
E = MovT.getParent()->instr_end();
- Iter != E && Count < SearchLimit; ++Iter, ++Count) {
+ Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
MachineInstr *MovY = &*Iter;
+ KilledT = MovY->killsRegister(T, &TRI);
+
if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
MovY->getOpcode() != AMDGPU::COPY) ||
!MovY->getOperand(1).isReg() ||
MovY->getOperand(1).getReg() != T ||
- MovY->getOperand(1).getSubReg() != Tsub)
+ MovY->getOperand(1).getSubReg() != Tsub ||
+ MovY->hasRegisterImplicitUseOperand(AMDGPU::M0))
continue;
Register Y = MovY->getOperand(0).getReg();
@@ -521,32 +544,53 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
MovX = nullptr;
break;
}
+ // Implicit use of M0 is an indirect move.
+ if (I->hasRegisterImplicitUseOperand(AMDGPU::M0))
+ continue;
+
+ if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0 : 1)))
+ continue;
+
MovX = &*I;
}
if (!MovX)
continue;
- LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
+ LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
for (unsigned I = 0; I < Size; ++I) {
TargetInstrInfo::RegSubRegPair X1, Y1;
X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
- BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
- TII->get(AMDGPU::V_SWAP_B32))
+ MachineBasicBlock &MBB = *MovT.getParent();
+ auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
+ TII->get(AMDGPU::V_SWAP_B32))
.addDef(X1.Reg, 0, X1.SubReg)
.addDef(Y1.Reg, 0, Y1.SubReg)
.addReg(Y1.Reg, 0, Y1.SubReg)
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
+ if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
+ // Drop implicit EXEC.
+ MIB->RemoveOperand(MIB->getNumExplicitOperands());
+ MIB->copyImplicitOps(*MBB.getParent(), *MovX);
+ }
}
MovX->eraseFromParent();
- MovY->eraseFromParent();
+ dropInstructionKeepingImpDefs(*MovY, TII);
MachineInstr *Next = &*std::next(MovT.getIterator());
- if (MRI.use_nodbg_empty(T))
- MovT.eraseFromParent();
- else
+
+ if (MRI.use_nodbg_empty(T)) {
+ dropInstructionKeepingImpDefs(MovT, TII);
+ } else {
Xop.setIsKill(false);
+ for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
+ unsigned OpNo = MovT.getNumExplicitOperands() + I;
+ const MachineOperand &Op = MovT.getOperand(OpNo);
+ if (Op.isKill() && TRI.regsOverlap(X, Op.getReg()))
+ MovT.RemoveOperand(OpNo);
+ }
+ }
return Next;
}
diff --git a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
index 3190641ae691..d557060207df 100644
--- a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
+++ b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
@@ -517,7 +517,9 @@ body: |
...
# GCN-LABEL: name: swap_virt_copy_subreg_impdef_super
-# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
+# GCN: %2:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %2.sub1:vreg_64 = COPY %0.sub1
+# GCN-NEXT: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
---
name: swap_virt_copy_subreg_impdef_super
registers:
@@ -672,3 +674,222 @@ body: |
%1 = COPY %2
S_ENDPGM 0
...
+
+# GCN-LABEL: name: swap_liveness_error_mov
+# GCN: $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF
+# GCN-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7
+# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec
+
+---
+name: swap_liveness_error_mov
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr5, $vgpr1_vgpr2
+
+ $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit killed $vgpr1_vgpr2
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr5, implicit $exec
+ $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7
+ $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7
+ $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: swap_liveness_error_copy
+# GCN: $vgpr6 = COPY $vgpr1
+# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF
+# GCN-NEXT: $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7
+# GCN-NEXT: $vgpr5 = COPY $vgpr6
+
+---
+name: swap_liveness_error_copy
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr5, $vgpr1_vgpr2
+
+ $vgpr6 = COPY $vgpr1, implicit killed $vgpr1_vgpr2
+ $vgpr1 = COPY killed $vgpr5
+ $vgpr5 = COPY $vgpr6, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7
+ $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7
+ $vgpr5 = COPY $vgpr6
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: swap_killed_t_early
+# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
+
+---
+name: swap_killed_t_early
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: swap_killed_t_late
+# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
+
+---
+name: swap_killed_t_late
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2
+ $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: swap_killed_x
+# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+
+---
+name: swap_killed_x
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: indirect_mov_t
+# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+
+---
+name: indirect_mov_t
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: indirect_mov_x
+# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+
+---
+name: indirect_mov_x
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: indirect_mov_y
+# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0
+# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+
+---
+name: indirect_mov_y
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: implicit_ops_mov_x_swap_b32
+# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2
+
+---
+name: implicit_ops_mov_x_swap_b32
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: implict_ops_mov_x_swap_b64
+# GCN: %2:vreg_64 = COPY %0
+# GCN-NEXT: %0:vreg_64 = COPY %1, implicit $vgpr0
+# GCN-NEXT: %1:vreg_64 = COPY %2
+
+---
+name: implict_ops_mov_x_swap_b64
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: vreg_64 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %0 = COPY %1, implicit $vgpr0
+ %1 = COPY %2
+...
+
+# GCN-LABEL: implicit_ops_mov_t_swap_b32
+# GCN: $vgpr1 = IMPLICIT_DEF
+# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
+
+---
+name: implicit_ops_mov_t_swap_b32
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2, implicit-def $vgpr1
+ $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: implicit_ops_mov_y_swap_b32
+# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF
+
+---
+name: implicit_ops_mov_y_swap_b32
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec, implicit $vgpr2, implicit-def $vgpr0_vgpr1, implicit killed $vgpr3
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
More information about the llvm-commits
mailing list