[llvm] r347503 - [llvm-mca] Refactor some of the logic in InstrBuilder, and add a verifyOperands method.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 23 12:26:57 PST 2018


Author: adibiagio
Date: Fri Nov 23 12:26:57 2018
New Revision: 347503

URL: http://llvm.org/viewvc/llvm-project?rev=347503&view=rev
Log:
[llvm-mca] Refactor some of the logic in InstrBuilder, and add a verifyOperands method.

With this change, InstrBuilder emits an error if the MCInst sequence contains an
instruction with a variadic opcode, and a non-zero number of variadic operands.

Currently we don't know how to correctly analyze variadic opcodes. The problem
with variadic operands is that there is no information for them in the opcode
descriptor (i.e. MCInstrDesc). That means, we don't know which variadic operands
are defs, and which are uses.

In future, we could try to conservatively assume that any extra register
operands is both a register use and a register definition.

This patch fixes a subtle bug in the evaluation of read/write operands for ARM
VLD1 with implicit index update. Added test vld1-index-update.s

Added:
    llvm/trunk/test/tools/llvm-mca/ARM/vld1-index-update.s
Modified:
    llvm/trunk/tools/llvm-mca/include/InstrBuilder.h
    llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp

Added: llvm/trunk/test/tools/llvm-mca/ARM/vld1-index-update.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/ARM/vld1-index-update.s?rev=347503&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/ARM/vld1-index-update.s (added)
+++ llvm/trunk/test/tools/llvm-mca/ARM/vld1-index-update.s Fri Nov 23 12:26:57 2018
@@ -0,0 +1,72 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=armv7-unknown-unknown -mcpu=swift -timeline -iterations=5 < %s | FileCheck %s
+
+# Register r1 is updated in one cycle by instruction vld1.32, so the add.w can
+# start one cycle later.
+
+add.w	r1, r1, r12
+vld1.32	{d16, d17}, [r1]!
+
+# CHECK:      Iterations:        5
+# CHECK-NEXT: Instructions:      10
+# CHECK-NEXT: Total Cycles:      16
+# CHECK-NEXT: Total uOps:        15
+
+# CHECK:      Dispatch Width:    3
+# CHECK-NEXT: uOps Per Cycle:    0.94
+# CHECK-NEXT: IPC:               0.63
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.50                        add	r1, r1, r12
+# CHECK-NEXT:  2      4     1.00    *                   vld1.32	{d16, d17}, [r1]!
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - SwiftUnitDiv
+# CHECK-NEXT: [1]   - SwiftUnitP0
+# CHECK-NEXT: [2]   - SwiftUnitP1
+# CHECK-NEXT: [3]   - SwiftUnitP2
+# CHECK-NEXT: [4.0] - SwiftUnitP01
+# CHECK-NEXT: [4.1] - SwiftUnitP01
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4.0]  [4.1]
+# CHECK-NEXT:  -      -      -     1.00   1.00   1.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4.0]  [4.1]  Instructions:
+# CHECK-NEXT:  -      -      -      -      -     1.00   add	r1, r1, r12
+# CHECK-NEXT:  -      -      -     1.00   1.00    -     vld1.32	{d16, d17}, [r1]!
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     012345
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeER .    .    .   add	r1, r1, r12
+# CHECK-NEXT: [0,1]     D=eeeeER  .    .   vld1.32	{d16, d17}, [r1]!
+# CHECK-NEXT: [1,0]     .D=eE--R  .    .   add	r1, r1, r12
+# CHECK-NEXT: [1,1]     .D==eeeeER.    .   vld1.32	{d16, d17}, [r1]!
+# CHECK-NEXT: [2,0]     . D==eE--R.    .   add	r1, r1, r12
+# CHECK-NEXT: [2,1]     . D===eeeeER   .   vld1.32	{d16, d17}, [r1]!
+# CHECK-NEXT: [3,0]     .  D===eE--R   .   add	r1, r1, r12
+# CHECK-NEXT: [3,1]     .  D====eeeeER .   vld1.32	{d16, d17}, [r1]!
+# CHECK-NEXT: [4,0]     .   D====eE--R .   add	r1, r1, r12
+# CHECK-NEXT: [4,1]     .   D=====eeeeER   vld1.32	{d16, d17}, [r1]!
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     5     3.0    0.2    1.6       add	r1, r1, r12
+# CHECK-NEXT: 1.     5     4.0    0.0    0.0       vld1.32	{d16, d17}, [r1]!

Modified: llvm/trunk/tools/llvm-mca/include/InstrBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/include/InstrBuilder.h?rev=347503&r1=347502&r2=347503&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/include/InstrBuilder.h (original)
+++ llvm/trunk/tools/llvm-mca/include/InstrBuilder.h Fri Nov 23 12:26:57 2018
@@ -52,8 +52,8 @@ class InstrBuilder {
   InstrBuilder(const InstrBuilder &) = delete;
   InstrBuilder &operator=(const InstrBuilder &) = delete;
 
-  Error populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
-  Error populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+  void populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+  void populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
   Error verifyInstrDesc(const InstrDesc &ID, const MCInst &MCI) const;
 
 public:

Modified: llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp?rev=347503&r1=347502&r2=347503&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp (original)
+++ llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp Fri Nov 23 12:26:57 2018
@@ -188,32 +188,98 @@ static void computeMaxLatency(InstrDesc
   ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
 }
 
-Error InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
-                                   unsigned SchedClassID) {
+static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
+  // Variadic opcodes are not correctly supported.
+  if (MCDesc.isVariadic()) {
+    if (MCI.getNumOperands() - MCDesc.getNumOperands()) {
+      return make_error<InstructionError<MCInst>>(
+          "Don't know how to process this variadic opcode.", MCI);
+    }
+  }
+
+  // Count register definitions, and skip non register operands in the process.
+  unsigned I, E;
+  unsigned NumExplicitDefs = MCDesc.getNumDefs();
+  for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
+    const MCOperand &Op = MCI.getOperand(I);
+    if (Op.isReg())
+      --NumExplicitDefs;
+  }
+
+  if (NumExplicitDefs) {
+    return make_error<InstructionError<MCInst>>(
+        "Expected more register operand definitions.", MCI);
+  }
+
+  if (MCDesc.hasOptionalDef()) {
+    // Always assume that the optional definition is the last operand.
+    const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
+    if (I == MCI.getNumOperands() || !Op.isReg()) {
+      std::string Message =
+          "expected a register operand for an optional definition. Instruction "
+          "has not been correctly analyzed.";
+      return make_error<InstructionError<MCInst>>(Message, MCI);
+    }
+  }
+
+  return ErrorSuccess();
+}
+
+void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
+                                  unsigned SchedClassID) {
   const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
   const MCSchedModel &SM = STI.getSchedModel();
   const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
 
-  // These are for now the (strong) assumptions made by this algorithm:
-  //  * The number of explicit and implicit register definitions in a MCInst
-  //    matches the number of explicit and implicit definitions according to
-  //    the opcode descriptor (MCInstrDesc).
-  //  * Register definitions take precedence over register uses in the operands
-  //    list.
-  //  * If an opcode specifies an optional definition, then the optional
-  //    definition is always the last operand in the sequence, and it can be
-  //    set to zero (i.e. "no register").
+  // Assumptions made by this algorithm:
+  //  1. The number of explicit and implicit register definitions in a MCInst
+  //     matches the number of explicit and implicit definitions according to
+  //     the opcode descriptor (MCInstrDesc).
+  //  2. Uses start at index #(MCDesc.getNumDefs()).
+  //  3. There can only be a single optional register definition, an it is
+  //     always the last operand of the sequence (excluding extra operands
+  //     contributed by variadic opcodes).
   //
   // These assumptions work quite well for most out-of-order in-tree targets
   // like x86. This is mainly because the vast majority of instructions is
   // expanded to MCInst using a straightforward lowering logic that preserves
   // the ordering of the operands.
+  //
+  // About assumption 1.
+  // The algorithm allows non-register operands between register operand
+  // definitions. This helps to handle some special ARM instructions with
+  // implicit operand increment (-mtriple=armv7):
+  //
+  // vld1.32  {d18, d19}, [r1]!  @ <MCInst #1463 VLD1q32wb_fixed
+  //                             @  <MCOperand Reg:59>
+  //                             @  <MCOperand Imm:0>     (!!)
+  //                             @  <MCOperand Reg:67>
+  //                             @  <MCOperand Imm:0>
+  //                             @  <MCOperand Imm:14>
+  //                             @  <MCOperand Reg:0>>
+  //
+  // MCDesc reports:
+  //  6 explicit operands.
+  //  1 optional definition
+  //  2 explicit definitions (!!)
+  //
+  // The presence of an 'Imm' operand between the two register definitions
+  // breaks the assumption that "register definitions are always at the
+  // beginning of the operand sequence".
+  //
+  // To workaround this issue, this algorithm ignores (i.e. skips) any
+  // non-register operands between register definitions.  The optional
+  // definition is still at index #(NumOperands-1).
+  //
+  // According to assumption 2. register reads start at #(NumExplicitDefs-1).
+  // That means, register R1 from the example is both read and written.
   unsigned NumExplicitDefs = MCDesc.getNumDefs();
   unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
   unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
   unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
   if (MCDesc.hasOptionalDef())
     TotalDefs++;
+
   ID.Writes.resize(TotalDefs);
   // Iterate over the operands list, and skip non-register operands.
   // The first NumExplictDefs register operands are expected to be register
@@ -241,19 +307,15 @@ Error InstrBuilder::populateWrites(Instr
     }
     Write.IsOptionalDef = false;
     LLVM_DEBUG({
-      dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+      dbgs() << "\t\t[Def]    OpIdx=" << Write.OpIndex
              << ", Latency=" << Write.Latency
              << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
     });
     CurrentDef++;
   }
 
-  if (CurrentDef != NumExplicitDefs) {
-    return make_error<InstructionError<MCInst>>(
-        "Expected more register operand definitions.", MCI);
-  }
-
-  CurrentDef = 0;
+  assert(CurrentDef == NumExplicitDefs &&
+         "Expected more register operand definitions.");
   for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
     unsigned Index = NumExplicitDefs + CurrentDef;
     WriteDescriptor &Write = ID.Writes[Index];
@@ -275,7 +337,7 @@ Error InstrBuilder::populateWrites(Instr
     Write.IsOptionalDef = false;
     assert(Write.RegisterID != 0 && "Expected a valid phys register!");
     LLVM_DEBUG({
-      dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+      dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
              << ", PhysReg=" << MRI.getName(Write.RegisterID)
              << ", Latency=" << Write.Latency
              << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
@@ -283,75 +345,52 @@ Error InstrBuilder::populateWrites(Instr
   }
 
   if (MCDesc.hasOptionalDef()) {
-    // Always assume that the optional definition is the last operand of the
-    // MCInst sequence.
-    const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
-    if (i == MCI.getNumOperands() || !Op.isReg()) {
-      std::string Message =
-          "expected a register operand for an optional definition. Instruction "
-          "has not been correctly analyzed.";
-      return make_error<InstructionError<MCInst>>(Message, MCI);
-    }
-
-    WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
-    Write.OpIndex = MCI.getNumOperands() - 1;
+    WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
+    Write.OpIndex = MCDesc.getNumOperands() - 1;
     // Assign a default latency for this write.
     Write.Latency = ID.MaxLatency;
     Write.SClassOrWriteResourceID = 0;
     Write.IsOptionalDef = true;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
   }
-
-  return ErrorSuccess();
 }
 
-Error InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
-                                  unsigned SchedClassID) {
+void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
+                                 unsigned SchedClassID) {
   const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
-  unsigned NumExplicitDefs = MCDesc.getNumDefs();
-
-  // Skip explicit definitions.
-  unsigned i = 0;
-  for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
-    const MCOperand &Op = MCI.getOperand(i);
-    if (Op.isReg())
-      NumExplicitDefs--;
-  }
-
-  if (NumExplicitDefs) {
-    return make_error<InstructionError<MCInst>>(
-        "Expected more register operand definitions.", MCI);
-  }
-
-  unsigned NumExplicitUses = MCI.getNumOperands() - i;
+  unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
   unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
-  if (MCDesc.hasOptionalDef()) {
-    assert(NumExplicitUses);
-    NumExplicitUses--;
-  }
+  // Remove the optional definition.
+  if (MCDesc.hasOptionalDef())
+    --NumExplicitUses;
   unsigned TotalUses = NumExplicitUses + NumImplicitUses;
-  if (!TotalUses)
-    return ErrorSuccess();
 
   ID.Reads.resize(TotalUses);
-  for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
-    ReadDescriptor &Read = ID.Reads[CurrentUse];
-    Read.OpIndex = i + CurrentUse;
-    Read.UseIndex = CurrentUse;
+  for (unsigned I = 0; I < NumExplicitUses; ++I) {
+    ReadDescriptor &Read = ID.Reads[I];
+    Read.OpIndex = MCDesc.getNumDefs() + I;
+    Read.UseIndex = I;
     Read.SchedClassID = SchedClassID;
-    LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
+    LLVM_DEBUG(dbgs() << "\t\t[Use]    OpIdx=" << Read.OpIndex
                       << ", UseIndex=" << Read.UseIndex << '\n');
   }
 
-  for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
-    ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
-    Read.OpIndex = ~CurrentUse;
-    Read.UseIndex = NumExplicitUses + CurrentUse;
-    Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
+  // For the purpose of ReadAdvance, implicit uses come directly after explicit
+  // uses. The "UseIndex" must be updated according to that implicit layout.
+  for (unsigned I = 0; I < NumImplicitUses; ++I) {
+    ReadDescriptor &Read = ID.Reads[NumExplicitUses + I];
+    Read.OpIndex = ~I;
+    Read.UseIndex = NumExplicitUses + I;
+    Read.RegisterID = MCDesc.getImplicitUses()[I];
     Read.SchedClassID = SchedClassID;
-    LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID="
+    LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << ", RegisterID="
                       << MRI.getName(Read.RegisterID) << '\n');
   }
-  return ErrorSuccess();
 }
 
 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
@@ -435,11 +474,13 @@ InstrBuilder::createInstrDescImpl(const
 
   initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
   computeMaxLatency(*ID, MCDesc, SCDesc, STI);
-  if (auto Err = populateWrites(*ID, MCI, SchedClassID))
-    return std::move(Err);
-  if (auto Err = populateReads(*ID, MCI, SchedClassID))
+
+  if (Error Err = verifyOperands(MCDesc, MCI))
     return std::move(Err);
 
+  populateWrites(*ID, MCI, SchedClassID);
+  populateReads(*ID, MCI, SchedClassID);
+
   LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
   LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
 
@@ -556,9 +597,9 @@ InstrBuilder::createInstruction(const MC
     }
 
     assert(RegID && "Expected a valid register ID!");
-    NewIS->getDefs().emplace_back(
-        WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex],
-        /* WritesZero */ IsZeroIdiom);
+    NewIS->getDefs().emplace_back(WD, RegID,
+                                  /* ClearsSuperRegs */ WriteMask[WriteIndex],
+                                  /* WritesZero */ IsZeroIdiom);
     ++WriteIndex;
   }
 




More information about the llvm-commits mailing list