[llvm] 4c0251d - [AMDGPU] Enable SGPR copy folding
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 25 15:09:05 PDT 2019
Author: Stanislav Mekhanoshin
Date: 2019-10-25T15:08:30-07:00
New Revision: 4c0251da149c99f49550d6c938e6e7f45075194d
URL: https://github.com/llvm/llvm-project/commit/4c0251da149c99f49550d6c938e6e7f45075194d
DIFF: https://github.com/llvm/llvm-project/commit/4c0251da149c99f49550d6c938e6e7f45075194d.diff
LOG: [AMDGPU] Enable SGPR copy folding
That used to fail in the last testcase function because after
%0:sreg_64.sub0 was folded into %3:sreg_32_xm0_xexec COPY, it
was further folded into S_STORE_DWORD_IMM. Its legal effective
subreg class is SReg_32 while instruction expects more restricted
SReg_32_XM0_EXEC. However, SIInstrInfo::isLegalRegOperand()
passed the legality check and it was caught in the verifier.
Borrowed code from the verifier to check for RC legality.
Differential Revision: https://reviews.llvm.org/D69445
Added:
llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index f24510b18665..c28ff5ef6a0f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -668,7 +668,6 @@ void SIFoldOperands::foldOperand(
} else {
if (UseMI->isCopy() && OpToFold.isReg() &&
UseMI->getOperand(0).getReg().isVirtual() &&
- TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
!UseMI->getOperand(1).getSubReg()) {
LLVM_DEBUG(dbgs() << "Folding " << OpToFold
<< "\n into " << *UseMI << '\n');
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6e9143305426..272a7fc442d8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3921,20 +3921,18 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
? MRI.getRegClass(Reg)
: RI.getPhysRegClass(Reg);
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
- RC = TRI->getSubRegClass(RC, MO.getSubReg());
-
- // In order to be legal, the common sub-class must be equal to the
- // class of the current operand. For example:
- //
- // v_mov_b32 s0 ; Operand defined as vsrc_b32
- // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
- //
- // s_sendmsg 0, s0 ; Operand defined as m0reg
- // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+ const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass);
+ if (MO.getSubReg()) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+ const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF);
+ if (!SuperRC)
+ return false;
- return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+ DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg());
+ if (!DRC)
+ return false;
+ }
+ return RC->hasSuperClassEq(DRC);
}
bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
new file mode 100644
index 000000000000..a33c97c4f5bb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
@@ -0,0 +1,48 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands,dead-mi-elimination -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+
+# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_full
+# GCN: %0:sgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: S_STORE_DWORD_IMM %0, undef $sgpr10_sgpr11, 0, 0, 0
+
+name: fold_sgpr_to_sgpr_copy_full
+body: |
+ bb.0:
+
+ %0:sgpr_32 = IMPLICIT_DEF
+ %1:sgpr_32 = COPY %0
+ %2:sgpr_32 = COPY %1
+ S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+...
+
+# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg
+# GCN: %0:sreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %2:sgpr_32 = COPY %0.sub0
+# GCN-NEXT: S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+
+name: fold_sgpr_to_sgpr_copy_subreg
+body: |
+ bb.0:
+
+ %0:sreg_64 = IMPLICIT_DEF
+ %1:sgpr_32 = COPY %0.sub0
+ %2:sgpr_32 = COPY %1
+ S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+...
+
+# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg2
+# GCN: %0:sreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %3:sreg_32_xm0_xexec = COPY %0.sub0
+# GCN-NEXT: S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0
+
+name: fold_sgpr_to_sgpr_copy_subreg2
+body: |
+ bb.0:
+
+ %0:sreg_64 = IMPLICIT_DEF
+ %1:sgpr_32 = COPY %0.sub0
+ %2:sgpr_32 = COPY %1
+ %3:sreg_32_xm0_xexec = COPY %2
+ S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0
+...
More information about the llvm-commits
mailing list