[llvm] r242723 - MachineScheduler: Restrict macroop fusion to data-dependent instructions.

Charlie Turner charlesturner7c5 at gmail.com
Wed Jul 22 09:55:59 PDT 2015


Hi Matthias,

Sorry, but this appears to cause a regression in
SingleSource/Benchmarks/Dhrystone/fldry , and more significant
regressions in some of our internal benchmarks.  I see about a 3.84%
execution time regression with this commit. Could you take a look?

Regards,
-- Charlie

On 20 July 2015 at 23:34, Matthias Braun <matze at braunis.de> wrote:
> Author: matze
> Date: Mon Jul 20 17:34:44 2015
> New Revision: 242723
>
> URL: http://llvm.org/viewvc/llvm-project?rev=242723&view=rev
> Log:
> MachineScheduler: Restrict macroop fusion to data-dependent instructions.
>
> Before creating a schedule edge to encourage MacroOpFusion check that:
> - The predecessor actually writes a register that the branch reads.
> - The predecessor has no successors in the ScheduleDAG so we can
>   schedule it in front of the branch.
>
> This avoids skewing the scheduling heuristic in cases where macroop
> fusion cannot happen.
>
> Differential Revision: http://reviews.llvm.org/D10745
>
> Modified:
>     llvm/trunk/lib/CodeGen/MachineScheduler.cpp
>     llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll
>
> Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=242723&r1=242722&r2=242723&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Mon Jul 20 17:34:44 2015
> @@ -1349,25 +1349,49 @@ namespace {
>  /// \brief Post-process the DAG to create cluster edges between instructions
>  /// that may be fused by the processor into a single operation.
>  class MacroFusion : public ScheduleDAGMutation {
> -  const TargetInstrInfo *TII;
> +  const TargetInstrInfo &TII;
> +  const TargetRegisterInfo &TRI;
>  public:
> -  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
> +  MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
> +    : TII(TII), TRI(TRI) {}
>
>    void apply(ScheduleDAGMI *DAG) override;
>  };
>  } // anonymous
>
> +/// Returns true if \p MI reads a register written by \p Other.
> +static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
> +                       const MachineInstr &Other) {
> +  for (const MachineOperand &MO : MI.uses()) {
> +    if (!MO.isReg() || !MO.readsReg())
> +      continue;
> +
> +    unsigned Reg = MO.getReg();
> +    if (Other.modifiesRegister(Reg, &TRI))
> +      return true;
> +  }
> +  return false;
> +}
> +
>  /// \brief Callback from DAG postProcessing to create cluster edges to encourage
>  /// fused operations.
>  void MacroFusion::apply(ScheduleDAGMI *DAG) {
>    // For now, assume targets can only fuse with the branch.
> -  MachineInstr *Branch = DAG->ExitSU.getInstr();
> +  SUnit &ExitSU = DAG->ExitSU;
> +  MachineInstr *Branch = ExitSU.getInstr();
>    if (!Branch)
>      return;
>
> -  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
> -    SUnit *SU = &DAG->SUnits[--Idx];
> -    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
> +  for (SUnit &SU : DAG->SUnits) {
> +    // SUnits with successors can't be schedule in front of the ExitSU.
> +    if (!SU.Succs.empty())
> +      continue;
> +    // We only care if the node writes to a register that the branch reads.
> +    MachineInstr *Pred = SU.getInstr();
> +    if (!HasDataDep(TRI, *Branch, *Pred))
> +      continue;
> +
> +    if (!TII.shouldScheduleAdjacent(Pred, Branch))
>        continue;
>
>      // Create a single weak edge from SU to ExitSU. The only effect is to cause
> @@ -1376,11 +1400,11 @@ void MacroFusion::apply(ScheduleDAGMI *D
>      // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
>      // of SU, we could create an artificial edge from the deepest root, but it
>      // hasn't been needed yet.
> -    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
> +    bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
>      (void)Success;
>      assert(Success && "No DAG nodes should be reachable from ExitSU");
>
> -    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
> +    DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
>      break;
>    }
>  }
> @@ -2887,7 +2911,7 @@ static ScheduleDAGInstrs *createGenericS
>    if (EnableLoadCluster && DAG->TII->enableClusterLoads())
>      DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
>    if (EnableMacroFusion)
> -    DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
> +    DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
>    return DAG;
>  }
>
>
> Modified: llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll?rev=242723&r1=242722&r2=242723&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll Mon Jul 20 17:34:44 2015
> @@ -104,11 +104,14 @@ if.end:
>  ; Speculatively execute division by zero.
>  ; The sdiv/udiv instructions do not trap when the divisor is zero, so they are
>  ; safe to speculate.
> -; CHECK: speculate_division
> -; CHECK-NOT: cmp
> -; CHECK: sdiv
> -; CHECK: cmp
> -; CHECK-NEXT: ccmp
> +; CHECK-LABEL: speculate_division:
> +; CHECK: cmp w0, #1
> +; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0
> +; CHECK: ccmp [[DIVRES]], #16, #0, ge
> +; CHECK: b.gt [[BLOCK:LBB[0-9_]+]]
> +; CHECK: bl _foo
> +; CHECK: [[BLOCK]]:
> +; CHECK: orr w0, wzr, #0x7
>  define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
>  entry:
>    %cmp = icmp sgt i32 %a, 0
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list