[llvm] r345514 - [AMDGPU] Match v_swap_b32
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 29 10:26:01 PDT 2018
Author: rampitec
Date: Mon Oct 29 10:26:01 2018
New Revision: 345514
URL: http://llvm.org/viewvc/llvm-project?rev=345514&view=rev
Log:
[AMDGPU] Match v_swap_b32
Differential Revision: https://reviews.llvm.org/D52677
Added:
llvm/trunk/test/CodeGen/AMDGPU/v_swap_b32.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=345514&r1=345513&r2=345514&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Oct 29 10:26:01 2018
@@ -516,6 +516,10 @@ public:
return FMA;
}
+ bool hasSwap() const {
+ return GFX9Insts;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=345514&r1=345513&r2=345514&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Mon Oct 29 10:26:01 2018
@@ -212,6 +212,169 @@ static void shrinkScalarCompare(const SI
}
}
+// This is the same as MachineInstr::readsRegister/modifiesRegister except
+// it takes subregs into account.
+static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
+ unsigned Reg, unsigned SubReg,
+ const SIRegisterInfo &TRI) {
+ for (const MachineOperand &MO : R) {
+ if (!MO.isReg())
+ continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (TRI.regsOverlap(Reg, MO.getReg()))
+ return true;
+ } else if (MO.getReg() == Reg &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
+ TRI.getSubRegIndexLaneMask(MO.getSubReg());
+ if (Overlap.any())
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool instReadsReg(const MachineInstr *MI,
+ unsigned Reg, unsigned SubReg,
+ const SIRegisterInfo &TRI) {
+ return instAccessReg(MI->uses(), Reg, SubReg, TRI);
+}
+
+static bool instModifiesReg(const MachineInstr *MI,
+ unsigned Reg, unsigned SubReg,
+ const SIRegisterInfo &TRI) {
+ return instAccessReg(MI->defs(), Reg, SubReg, TRI);
+}
+
+static TargetInstrInfo::RegSubRegPair
+getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
+ const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
+ if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
+ } else {
+ LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
+ Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger()));
+ }
+ }
+ return TargetInstrInfo::RegSubRegPair(Reg, Sub);
+}
+
+// Match:
+// mov t, x
+// mov x, y
+// mov y, t
+//
+// =>
+//
+// mov t, x (t is potentially dead and move eliminated)
+// v_swap_b32 x, y
+//
+// Returns next valid instruction pointer if was able to create v_swap_b32.
+//
+// This shall not be done too early not to prevent possible folding which may
+// remove matched moves, and this should prefereably be done before RA to
+// release saved registers and also possibly after RA which can insert copies
+// too.
+//
+// This is really just a generic peephole that is not a canocical shrinking,
+// although requirements match the pass placement and it reduces code size too.
+static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
+ const SIInstrInfo *TII) {
+ assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MovT.getOpcode() == AMDGPU::COPY);
+
+ unsigned T = MovT.getOperand(0).getReg();
+ unsigned Tsub = MovT.getOperand(0).getSubReg();
+ MachineOperand &Xop = MovT.getOperand(1);
+
+ if (!Xop.isReg())
+ return nullptr;
+ unsigned X = Xop.getReg();
+ unsigned Xsub = Xop.getSubReg();
+
+ unsigned Size = TII->getOpSize(MovT, 0) / 4;
+
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ if (!TRI.isVGPR(MRI, X))
+ return false;
+
+ for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
+ if (YTop.getSubReg() != Tsub)
+ continue;
+
+ MachineInstr &MovY = *YTop.getParent();
+ if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+ MovY.getOpcode() != AMDGPU::COPY) ||
+ MovY.getOperand(1).getSubReg() != Tsub)
+ continue;
+
+ unsigned Y = MovY.getOperand(0).getReg();
+ unsigned Ysub = MovY.getOperand(0).getSubReg();
+
+ if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
+ continue;
+
+ MachineInstr *MovX = nullptr;
+ auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
+ for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
+ if (instReadsReg(&*I, X, Xsub, TRI) ||
+ instModifiesReg(&*I, Y, Ysub, TRI) ||
+ instModifiesReg(&*I, T, Tsub, TRI) ||
+ (MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
+ MovX = nullptr;
+ break;
+ }
+ if (!instReadsReg(&*I, Y, Ysub, TRI)) {
+ if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
+ MovX = nullptr;
+ break;
+ }
+ continue;
+ }
+ if (MovX ||
+ (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+ I->getOpcode() != AMDGPU::COPY) ||
+ I->getOperand(0).getReg() != X ||
+ I->getOperand(0).getSubReg() != Xsub) {
+ MovX = nullptr;
+ break;
+ }
+ MovX = &*I;
+ }
+
+ if (!MovX || I == E)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
+
+ for (unsigned I = 0; I < Size; ++I) {
+ TargetInstrInfo::RegSubRegPair X1, Y1;
+ X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
+ Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
+ BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
+ TII->get(AMDGPU::V_SWAP_B32))
+ .addDef(X1.Reg, 0, X1.SubReg)
+ .addDef(Y1.Reg, 0, Y1.SubReg)
+ .addReg(Y1.Reg, 0, Y1.SubReg)
+ .addReg(X1.Reg, 0, X1.SubReg).getInstr();
+ }
+ MovX->eraseFromParent();
+ MovY.eraseFromParent();
+ MachineInstr *Next = &*std::next(MovT.getIterator());
+ if (MRI.use_nodbg_empty(T))
+ MovT.eraseFromParent();
+ else
+ Xop.setIsKill(false);
+
+ return Next;
+ }
+
+ return nullptr;
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -252,6 +415,14 @@ bool SIShrinkInstructions::runOnMachineF
}
}
+ if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MI.getOpcode() == AMDGPU::COPY)) {
+ if (auto *NextMI = matchSwap(MI, MRI, TII)) {
+ Next = NextMI->getIterator();
+ continue;
+ }
+ }
+
// Combine adjacent s_nops to use the immediate operand encoding how long
// to wait.
//
Added: llvm/trunk/test/CodeGen/AMDGPU/v_swap_b32.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/v_swap_b32.mir?rev=345514&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/v_swap_b32.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/v_swap_b32.mir Mon Oct 29 10:26:01 2018
@@ -0,0 +1,564 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: swap_phys_condensed
+# GCN: bb.0:
+# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
+# GCN-NEXT: S_SETPC_B64_return
+---
+name: swap_phys_condensed
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: swap_phys_sparse
+# GCN: bb.0:
+# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
+# GCN-NEXT: S_SETPC_B64_return
+---
+name: swap_phys_sparse
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: swap_phys_liveout
+# GCN: bb.0:
+# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
+# GCN-NEXT: S_SETPC_B64_return
+---
+name: swap_phys_liveout
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr2, implicit $vgpr1
+...
+
+# GCN-LABEL: name: swap_phys_b64
+# GCN: bb.0:
+# GCN-NEXT: $vgpr0, $vgpr2 = V_SWAP_B32 $vgpr2, $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr1, $vgpr3 = V_SWAP_B32 $vgpr3, $vgpr1, implicit $exec
+---
+name: swap_phys_b64
+body: |
+ bb.0:
+ $vgpr4_vgpr5 = COPY killed $vgpr0_vgpr1
+ $vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3
+ $vgpr2_vgpr3 = COPY killed $vgpr4_vgpr5
+...
+
+# GCN-LABEL: name: swap_phys_overlap_x
+# GCN: bb.0:
+# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+---
+name: swap_phys_overlap_x
+body: |
+ bb.0:
+ $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+...
+
+# GCN-LABEL: name: swap_phys_clobber_y
+# GCN: bb.0:
+# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+---
+name: swap_phys_clobber_y
+body: |
+ bb.0:
+ $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+ S_ENDPGM
+...
+
+# GCN-LABEL: name: swap_virt_copy_condense
+# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
+---
+name: swap_virt_copy_condense
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %0 = COPY %1
+ %1 = COPY %2
+...
+
+# GCN-LABEL: name: swap_virt_copy_sparse
+# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
+---
+name: swap_virt_copy_sparse
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ S_NOP 0
+ %0 = COPY %1
+ S_NOP 0
+ %1 = COPY %2
+...
+
+# GCN-LABEL: name: swap_virt_copy_subreg
+# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
+---
+name: swap_virt_copy_subreg
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: vreg_64 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2.sub0 = COPY %0.sub0
+ %2.sub1 = COPY %0.sub1
+ %0.sub0 = COPY %1.sub0
+ %0.sub1 = COPY %1.sub1
+ %1.sub0 = COPY %2.sub0
+...
+
+# GCN-LABEL: name: swap_virt_mov
+# GCN: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
+---
+name: swap_virt_mov
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = V_MOV_B32_e32 %0, implicit $exec
+ %0 = V_MOV_B32_e32 %1, implicit $exec
+ %1 = V_MOV_B32_e32 %2, implicit $exec
+...
+
+# GCN-LABEL: name: swap_virt_read_x
+# GCN: bb.0:
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %2:vgpr_32 = COPY %0
+# GCN-NEXT: %3:vgpr_32 = COPY %0
+# GCN-NEXT: %0:vgpr_32 = COPY %1
+# GCN-NEXT: %1:vgpr_32 = COPY %2
+# GCN-NEXT: S_ENDPGM
+
+---
+name: swap_virt_read_x
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %3 = COPY %0
+ %0 = COPY %1
+ %1 = COPY %2
+ S_ENDPGM
+...
+
+# GCN-LABEL: name: swap_virt_read_t_twice
+# GCN: bb.0:
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %2:vgpr_32 = COPY %0
+# GCN-NEXT: %3:vgpr_32 = COPY %2
+# GCN-NEXT: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec
+# GCN-NEXT: S_ENDPGM
+
+---
+name: swap_virt_read_t_twice
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %3 = COPY %2
+ %0 = COPY %1
+ %1 = COPY %2
+ S_ENDPGM
+...
+
+# GCN-LABEL: name: swap_virt_clobber_y
+# GCN: bb.0:
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %2:vgpr_32 = COPY %0
+# GCN-NEXT: %0:vgpr_32 = COPY %1
+# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = COPY %2
+# GCN-NEXT: S_ENDPGM
+
+---
+name: swap_virt_clobber_y
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %0 = COPY %1
+ %1 = IMPLICIT_DEF
+ %1 = COPY %2
+ S_ENDPGM
+...
+
+# GCN-LABEL: name: swap_virt_clobber_x1
+# GCN: bb.0:
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %2:vgpr_32 = COPY %0
+# GCN-NEXT: %0:vgpr_32 = COPY %1
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = COPY %2
+# GCN-NEXT: S_ENDPGM
+
+---
+name: swap_virt_clobber_x1
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %0 = COPY %1
+ %0 = IMPLICIT_DEF
+ %1 = COPY %2
+ S_ENDPGM
+...
+
+# GCN-LABEL: name: swap_virt_clobber_x2
+# GCN: bb.0:
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %2:vgpr_32 = COPY %0
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %0:vgpr_32 = COPY %1
+# GCN-NEXT: %1:vgpr_32 = COPY %2
+# GCN-NEXT: S_ENDPGM
+
+---
+name: swap_virt_clobber_x2
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %0 = IMPLICIT_DEF
+ %0 = COPY %1
+ %1 = COPY %2
+ S_ENDPGM
+...
+
+# GCN-LABEL: name: swap_virt_clobber_t
+# GCN: bb.0:
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %2:vgpr_32 = COPY %0
+# GCN-NEXT: %0:vgpr_32 = COPY %1
+# GCN-NEXT: %2:vgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:vgpr_32 = COPY %2
+# GCN-NEXT: S_ENDPGM
+
+---
+name: swap_virt_clobber_t
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %0 = COPY %1
+ %2 = IMPLICIT_DEF
+ %1 = COPY %2
+ S_ENDPGM
+...
+
+# GCN-LABEL: name: swap_virt_copy_subreg_overlap_x_full
+# GCN: bb.0:
+# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
+# GCN-NEXT: %3:vreg_64 = COPY %0
+# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0
+# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0
+---
+name: swap_virt_copy_subreg_overlap_x_full
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: vreg_64 }
+ - { id: 3, class: vreg_64 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2.sub0 = COPY %0.sub0
+ %3 = COPY %0
+ %0.sub0 = COPY %1.sub0
+ %1.sub0 = COPY %2.sub0
+...
+
+# GCN-LABEL: name: swap_virt_copy_subreg_overlap_x_part
+# GCN: bb.0:
+# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
+# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
+# GCN-NEXT: %3:vreg_64 = COPY %0.sub0_sub1
+# GCN-NEXT: %0.sub0:vreg_128 = COPY %1.sub0
+# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0
+---
+name: swap_virt_copy_subreg_overlap_x_part
+registers:
+ - { id: 0, class: vreg_128 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: vreg_64 }
+ - { id: 3, class: vreg_64 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2.sub0 = COPY %0.sub0
+ %3 = COPY %0.sub0_sub1
+ %0.sub0 = COPY %1.sub0
+ %1.sub0 = COPY %2.sub0
+...
+
+# GCN-LABEL: name: swap_virt_copy_subreg_wide_y
+# GCN: bb.0:
+# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
+# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0
+# GCN-NEXT: %1:vreg_64 = COPY %2
+---
+name: swap_virt_copy_subreg_wide_y
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: vreg_64 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2.sub0 = COPY %0.sub0
+ %0.sub0 = COPY %1.sub0
+ %1 = COPY %2
+...
+
+# GCN-LABEL: name: swap_virt_b64
+# GCN: bb.0:
+# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
+# GCN-NEXT: %0.sub1:vreg_64, %1.sub1:vreg_64 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec
+---
+name: swap_virt_b64
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: vreg_64 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %0 = COPY %1
+ %1 = COPY %2
+...
+
+# GCN-LABEL: name: swap_virt_b128
+# GCN: bb.0:
+# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
+# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF
+# GCN-NEXT: %0.sub0:vreg_128, %1.sub0:vreg_128 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
+# GCN-NEXT: %0.sub1:vreg_128, %1.sub1:vreg_128 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec
+# GCN-NEXT: %0.sub2:vreg_128, %1.sub2:vreg_128 = V_SWAP_B32 %1.sub2, %0.sub2, implicit $exec
+# GCN-NEXT: %0.sub3:vreg_128, %1.sub3:vreg_128 = V_SWAP_B32 %1.sub3, %0.sub3, implicit $exec
+---
+name: swap_virt_b128
+registers:
+ - { id: 0, class: vreg_128 }
+ - { id: 1, class: vreg_128 }
+ - { id: 2, class: vreg_128 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %0 = COPY %1
+ %1 = COPY %2
+...
+
+# GCN-LABEL: name: swap_virt_b128_sub0_1
+# GCN: bb.0:
+# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
+# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF
+# GCN-NEXT: %0.sub0:vreg_128, %1.sub0:vreg_128 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
+# GCN-NEXT: %0.sub1:vreg_128, %1.sub1:vreg_128 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec
+# GCN-NEXT: S_ENDPGM
+---
+name: swap_virt_b128_sub0_1
+registers:
+ - { id: 0, class: vreg_128 }
+ - { id: 1, class: vreg_128 }
+ - { id: 2, class: vreg_128 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2.sub0_sub1 = COPY %0.sub0_sub1
+ %0.sub0_sub1 = COPY %1.sub0_sub1
+ %1.sub0_sub1 = COPY %2.sub0_sub1
+ S_ENDPGM
+...
+
+# GCN-LABEL: name: swap_virt_b128_sub2_3
+# GCN: bb.0:
+# GCN-NEXT: %0:vreg_128 = IMPLICIT_DEF
+# GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF
+# GCN-NEXT: %0.sub2:vreg_128, %1.sub2:vreg_128 = V_SWAP_B32 %1.sub2, %0.sub2, implicit $exec
+# GCN-NEXT: %0.sub3:vreg_128, %1.sub3:vreg_128 = V_SWAP_B32 %1.sub3, %0.sub3, implicit $exec
+# GCN-NEXT: S_ENDPGM
+---
+name: swap_virt_b128_sub2_3
+registers:
+ - { id: 0, class: vreg_128 }
+ - { id: 1, class: vreg_128 }
+ - { id: 2, class: vreg_128 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2.sub2_sub3 = COPY %0.sub2_sub3
+ %0.sub2_sub3 = COPY %1.sub2_sub3
+ %1.sub2_sub3 = COPY %2.sub2_sub3
+ S_ENDPGM
+...
+
+
+# GCN-LABEL: name: swap_virt_s_to_s
+# GCN: bb.0:
+# GCN-NEXT: %0:sgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %1:sgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: %2:sgpr_32 = COPY %0
+# GCN-NEXT: %0:sgpr_32 = COPY %1
+# GCN-NEXT: %1:sgpr_32 = COPY %2
+---
+name: swap_virt_s_to_s
+registers:
+ - { id: 0, class: sgpr_32 }
+ - { id: 1, class: sgpr_32 }
+ - { id: 2, class: sgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = COPY %0
+ %0 = COPY %1
+ %1 = COPY %2
+...
+
+# GCN-LABEL: name: swap_virt_copy_subreg_impdef_super
+# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
+---
+name: swap_virt_copy_subreg_impdef_super
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: vreg_64 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2.sub0 = COPY %0.sub0, implicit-def %2, implicit $exec
+ %2.sub1 = COPY %0.sub1
+ %0.sub0 = COPY %1.sub0
+ %0.sub1 = COPY %1.sub1
+ %1.sub0 = COPY %2.sub0
+...
+
+# GCN-LABEL: name: swap_virt_copy_subreg_impuse_x
+# GCN: bb.0:
+# GCN-NEXT: %0:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %1:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %2.sub0:vreg_64 = COPY %0.sub0
+# GCN-NEXT: %2.sub1:vreg_64 = COPY %0.sub1
+# GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0, implicit %0
+# GCN-NEXT: %0.sub1:vreg_64 = COPY %1.sub1
+# GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0
+# GCN-NEXT: S_ENDPGM
+---
+name: swap_virt_copy_subreg_impuse_x
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: vreg_64 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2.sub0 = COPY %0.sub0
+ %2.sub1 = COPY %0.sub1
+ %0.sub0 = COPY %1.sub0, implicit %0
+ %0.sub1 = COPY %1.sub1
+ %1.sub0 = COPY %2.sub0
+ S_ENDPGM
+...
More information about the llvm-commits
mailing list