[llvm] 4c0251d - [AMDGPU] Enable SGPR copy folding

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 25 15:09:05 PDT 2019


Author: Stanislav Mekhanoshin
Date: 2019-10-25T15:08:30-07:00
New Revision: 4c0251da149c99f49550d6c938e6e7f45075194d

URL: https://github.com/llvm/llvm-project/commit/4c0251da149c99f49550d6c938e6e7f45075194d
DIFF: https://github.com/llvm/llvm-project/commit/4c0251da149c99f49550d6c938e6e7f45075194d.diff

LOG: [AMDGPU] Enable SGPR copy folding

That used to fail in the last testcase function because after
%0:sreg_64.sub0 was folded into %3:sreg_32_xm0_xexec COPY, it
was further folded into S_STORE_DWORD_IMM. Its legal effective
subreg class is SReg_32 while instruction expects more restricted
SReg_32_XM0_EXEC. However, SIInstrInfo::isLegalRegOperand()
passed the legality check and it was caught in the verifier.

Borrowed code from the verifier to check for RC legality.

Differential Revision: https://reviews.llvm.org/D69445

Added: 
    llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index f24510b18665..c28ff5ef6a0f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -668,7 +668,6 @@ void SIFoldOperands::foldOperand(
   } else {
     if (UseMI->isCopy() && OpToFold.isReg() &&
         UseMI->getOperand(0).getReg().isVirtual() &&
-        TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
         !UseMI->getOperand(1).getSubReg()) {
       LLVM_DEBUG(dbgs() << "Folding " << OpToFold
                         << "\n into " << *UseMI << '\n');

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6e9143305426..272a7fc442d8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3921,20 +3921,18 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
                                       ? MRI.getRegClass(Reg)
                                       : RI.getPhysRegClass(Reg);
 
-  const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
-  RC = TRI->getSubRegClass(RC, MO.getSubReg());
-
-  // In order to be legal, the common sub-class must be equal to the
-  // class of the current operand.  For example:
-  //
-  // v_mov_b32 s0 ; Operand defined as vsrc_b32
-  //              ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
-  //
-  // s_sendmsg 0, s0 ; Operand defined as m0reg
-  //                 ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+  const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass);
+  if (MO.getSubReg()) {
+    const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+    const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF);
+    if (!SuperRC)
+      return false;
 
-  return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+    DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg());
+    if (!DRC)
+      return false;
+  }
+  return RC->hasSuperClassEq(DRC);
 }
 
 bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,

diff  --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
new file mode 100644
index 000000000000..a33c97c4f5bb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
@@ -0,0 +1,48 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands,dead-mi-elimination -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+
+# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_full
+# GCN:      %0:sgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: S_STORE_DWORD_IMM %0, undef $sgpr10_sgpr11, 0, 0, 0
+
+name:            fold_sgpr_to_sgpr_copy_full
+body:             |
+  bb.0:
+
+    %0:sgpr_32 = IMPLICIT_DEF
+    %1:sgpr_32 = COPY %0
+    %2:sgpr_32 = COPY %1
+    S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+...
+
+# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg
+# GCN:      %0:sreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %2:sgpr_32 = COPY %0.sub0
+# GCN-NEXT: S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+
+name:            fold_sgpr_to_sgpr_copy_subreg
+body:             |
+  bb.0:
+
+    %0:sreg_64 = IMPLICIT_DEF
+    %1:sgpr_32 = COPY %0.sub0
+    %2:sgpr_32 = COPY %1
+    S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+...
+
+# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg2
+# GCN:      %0:sreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %3:sreg_32_xm0_xexec = COPY %0.sub0
+# GCN-NEXT: S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0
+
+name:            fold_sgpr_to_sgpr_copy_subreg2
+body:             |
+  bb.0:
+
+    %0:sreg_64 = IMPLICIT_DEF
+    %1:sgpr_32 = COPY %0.sub0
+    %2:sgpr_32 = COPY %1
+    %3:sreg_32_xm0_xexec = COPY %2
+    S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0
+...


        


More information about the llvm-commits mailing list