[llvm] r338368 - [SystemZ] Improve decoding in case of instructions with four register operands.

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 31 06:00:43 PDT 2018


Author: jonpa
Date: Tue Jul 31 06:00:42 2018
New Revision: 338368

URL: http://llvm.org/viewvc/llvm-project?rev=338368&view=rev
Log:
[SystemZ] Improve decoding in case of instructions with four register operands.

Since z13, the max group size will be 2 if any μop has more than 3 register
sources.

This has been ignored sofar in the SystemZHazardRecognizer, but is now
handled by recognizing those instructions and adjusting the tracking of
decoding and the cost heuristic for grouping.

Review: Ulrich Weigand
https://reviews.llvm.org/D49847

Modified:
    llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.h
    llvm/trunk/lib/Target/SystemZ/SystemZMachineScheduler.cpp
    llvm/trunk/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll

Modified: llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.cpp?rev=338368&r1=338367&r2=338368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.cpp Tue Jul 31 06:00:42 2018
@@ -81,6 +81,7 @@ getHazardType(SUnit *m, int Stalls) {
 
 void SystemZHazardRecognizer::Reset() {
   CurrGroupSize = 0;
+  CurrGroupHas4RegOps = false;
   clearProcResCounters();
   GrpCount = 0;
   LastFPdOpCycleIdx = UINT_MAX;
@@ -99,6 +100,12 @@ SystemZHazardRecognizer::fitsIntoCurrent
   if (SC->BeginGroup)
     return (CurrGroupSize == 0);
 
+  // An instruction with 4 register operands will not fit in last slot.
+  assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) ||
+          "Current decoder group is already full!");
+  if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+    return false;
+
   // Since a full group is handled immediately in EmitInstruction(),
   // SU should fit into current group. NumSlots should be 1 or 0,
   // since it is not a cracked or expanded instruction.
@@ -108,6 +115,23 @@ SystemZHazardRecognizer::fitsIntoCurrent
   return true;
 }
 
+bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+  const MCInstrDesc &MID = MI->getDesc();
+  unsigned Count = 0;
+  for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
+    const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
+    if (RC == nullptr)
+      continue;
+    if (OpIdx >= MID.getNumDefs() &&
+        MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+      continue;
+    Count++;
+  }
+  return Count >= 4;
+}
+
 void SystemZHazardRecognizer::nextGroup() {
   if (CurrGroupSize == 0)
     return;
@@ -119,6 +143,7 @@ void SystemZHazardRecognizer::nextGroup(
 
   // Reset counter for next group.
   CurrGroupSize = 0;
+  CurrGroupHas4RegOps = false;
 
   // Decrease counters for execution units by one.
   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
@@ -172,6 +197,8 @@ void SystemZHazardRecognizer::dumpSU(SUn
     OS << "/EndsGroup";
   if (SU->isUnbuffered)
     OS << "/Unbuffered";
+  if (has4RegOps(SU->getInstr()))
+    OS << "/4RegOps";
 }
 
 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
@@ -184,6 +211,7 @@ void SystemZHazardRecognizer::dumpCurrGr
     dbgs() << "{ " << CurGroupDbg << " }";
     dbgs() << " (" << CurrGroupSize << " decoder slot"
            << (CurrGroupSize > 1 ? "s":"")
+           << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
            << ")\n";
   }
 }
@@ -294,11 +322,14 @@ EmitInstruction(SUnit *SU) {
   // Insert SU into current group by increasing number of slots used
   // in current group.
   CurrGroupSize += getNumDecoderSlots(SU);
-  assert (CurrGroupSize <= 3);
+  CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
+  unsigned GroupLim =
+    ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
+  assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
 
   // Check if current group is now full/ended. If so, move on to next
   // group to be ready to evaluate more candidates.
-  if (CurrGroupSize == 3 || SC->EndGroup)
+  if (CurrGroupSize == GroupLim || SC->EndGroup)
     nextGroup();
 }
 
@@ -325,6 +356,10 @@ int SystemZHazardRecognizer::groupingCos
     return -1;
   }
 
+  // An instruction with 4 register operands will not fit in last slot.
+  if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+    return 1;
+
   // Most instructions can be placed in any decoder slot.
   return 0;
 }

Modified: llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.h?rev=338368&r1=338367&r2=338368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZHazardRecognizer.h Tue Jul 31 06:00:42 2018
@@ -45,15 +45,17 @@ namespace llvm {
 /// SystemZHazardRecognizer maintains the state for one MBB during scheduling.
 class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
 
-#ifndef NDEBUG
   const SystemZInstrInfo *TII;
-#endif
   const TargetSchedModel *SchedModel;
 
   /// Keep track of the number of decoder slots used in the current
   /// decoder group.
   unsigned CurrGroupSize;
 
+  /// True if an instruction with four reg operands have been scheduled into
+  /// the current decoder group.
+  bool CurrGroupHas4RegOps;
+
   /// The tracking of resources here are quite similar to the common
   /// code use of a critical resource. However, z13 differs in the way
   /// that it has two processor sides which may be interesting to
@@ -73,6 +75,9 @@ class SystemZHazardRecognizer : public S
   /// Return true if MI fits into current decoder group.
   bool fitsIntoCurrentGroup(SUnit *SU) const;
 
+  /// Return true if this instruction has four register operands.
+  bool has4RegOps(const MachineInstr *MI) const;
+
   /// Two decoder groups per cycle are formed (for z13), meaning 2x3
   /// instructions. This function returns a number between 0 and 5,
   /// representing the current decoder slot of the current cycle.  If an SU
@@ -105,11 +110,7 @@ class SystemZHazardRecognizer : public S
 public:
   SystemZHazardRecognizer(const SystemZInstrInfo *tii,
                           const TargetSchedModel *SM)
-      :
-#ifndef NDEBUG
-        TII(tii),
-#endif
-        SchedModel(SM) {
+      : TII(tii), SchedModel(SM) {
     Reset();
   }
 

Modified: llvm/trunk/lib/Target/SystemZ/SystemZMachineScheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZMachineScheduler.cpp?rev=338368&r1=338367&r2=338368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZMachineScheduler.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZMachineScheduler.cpp Tue Jul 31 06:00:42 2018
@@ -169,8 +169,7 @@ SUnit *SystemZPostRASchedStrategy::pickN
     return *Available.begin();
   }
 
-  // All nodes that are possible to schedule are stored by in the
-  // Available set.
+  // All nodes that are possible to schedule are stored in the Available set.
   LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
 
   Candidate Best;

Modified: llvm/trunk/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll?rev=338368&r1=338367&r2=338368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll Tue Jul 31 06:00:42 2018
@@ -688,8 +688,8 @@ define <8 x float> @fun30(<8 x float> %v
 ; CHECK-NEXT:    vpkg %v6, %v6, %v7
 ; CHECK-NEXT:    vpkg %v4, %v4, %v5
 ; CHECK-NEXT:    vn %v5, %v16, %v6
-; CHECK-NEXT:    vsel %v24, %v3, %v2, %v5
-; CHECK-NEXT:    vldeb %v17, %v17
+; CHECK-DAG:     vsel %v24, %v3, %v2, %v5
+; CHECK-DAG:     vldeb %v17, %v17
 ; CHECK-NEXT:    vldeb %v18, %v18
 ; CHECK-NEXT:    vfchdb %v17, %v18, %v17
 ; CHECK-NEXT:    vmrhf %v18, %v30, %v30




More information about the llvm-commits mailing list