[llvm] r242723 - MachineScheduler: Restrict macroop fusion to data-dependent instructions.
Charlie Turner
charlesturner7c5 at gmail.com
Wed Jul 22 09:55:59 PDT 2015
Hi Matthias,
Sorry, but this appears to cause a regression in
SingleSource/Benchmarks/Dhrystone/fldry , and more significant
regressions in some of our internal benchmarks. I see about a 3.84%
execution time regression with this commit. Could you take a look?
Regards,
-- Charlie
On 20 July 2015 at 23:34, Matthias Braun <matze at braunis.de> wrote:
> Author: matze
> Date: Mon Jul 20 17:34:44 2015
> New Revision: 242723
>
> URL: http://llvm.org/viewvc/llvm-project?rev=242723&view=rev
> Log:
> MachineScheduler: Restrict macroop fusion to data-dependent instructions.
>
> Before creating a schedule edge to encourage MacroOpFusion check that:
> - The predecessor actually writes a register that the branch reads.
> - The predecessor has no successors in the ScheduleDAG so we can
> schedule it in front of the branch.
>
> This avoids skewing the scheduling heuristic in cases where macroop
> fusion cannot happen.
>
> Differential Revision: http://reviews.llvm.org/D10745
>
> Modified:
> llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll
>
> Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=242723&r1=242722&r2=242723&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Mon Jul 20 17:34:44 2015
> @@ -1349,25 +1349,49 @@ namespace {
> /// \brief Post-process the DAG to create cluster edges between instructions
> /// that may be fused by the processor into a single operation.
> class MacroFusion : public ScheduleDAGMutation {
> - const TargetInstrInfo *TII;
> + const TargetInstrInfo &TII;
> + const TargetRegisterInfo &TRI;
> public:
> - MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
> + MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
> + : TII(TII), TRI(TRI) {}
>
> void apply(ScheduleDAGMI *DAG) override;
> };
> } // anonymous
>
> +/// Returns true if \p MI reads a register written by \p Other.
> +static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
> + const MachineInstr &Other) {
> + for (const MachineOperand &MO : MI.uses()) {
> + if (!MO.isReg() || !MO.readsReg())
> + continue;
> +
> + unsigned Reg = MO.getReg();
> + if (Other.modifiesRegister(Reg, &TRI))
> + return true;
> + }
> + return false;
> +}
> +
> /// \brief Callback from DAG postProcessing to create cluster edges to encourage
> /// fused operations.
> void MacroFusion::apply(ScheduleDAGMI *DAG) {
> // For now, assume targets can only fuse with the branch.
> - MachineInstr *Branch = DAG->ExitSU.getInstr();
> + SUnit &ExitSU = DAG->ExitSU;
> + MachineInstr *Branch = ExitSU.getInstr();
> if (!Branch)
> return;
>
> - for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
> - SUnit *SU = &DAG->SUnits[--Idx];
> - if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
> + for (SUnit &SU : DAG->SUnits) {
> + // SUnits with successors can't be schedule in front of the ExitSU.
> + if (!SU.Succs.empty())
> + continue;
> + // We only care if the node writes to a register that the branch reads.
> + MachineInstr *Pred = SU.getInstr();
> + if (!HasDataDep(TRI, *Branch, *Pred))
> + continue;
> +
> + if (!TII.shouldScheduleAdjacent(Pred, Branch))
> continue;
>
> // Create a single weak edge from SU to ExitSU. The only effect is to cause
> @@ -1376,11 +1400,11 @@ void MacroFusion::apply(ScheduleDAGMI *D
> // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
> // of SU, we could create an artificial edge from the deepest root, but it
> // hasn't been needed yet.
> - bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
> + bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
> (void)Success;
> assert(Success && "No DAG nodes should be reachable from ExitSU");
>
> - DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
> + DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
> break;
> }
> }
> @@ -2887,7 +2911,7 @@ static ScheduleDAGInstrs *createGenericS
> if (EnableLoadCluster && DAG->TII->enableClusterLoads())
> DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
> if (EnableMacroFusion)
> - DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
> + DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
> return DAG;
> }
>
>
> Modified: llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll?rev=242723&r1=242722&r2=242723&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-ccmp.ll Mon Jul 20 17:34:44 2015
> @@ -104,11 +104,14 @@ if.end:
> ; Speculatively execute division by zero.
> ; The sdiv/udiv instructions do not trap when the divisor is zero, so they are
> ; safe to speculate.
> -; CHECK: speculate_division
> -; CHECK-NOT: cmp
> -; CHECK: sdiv
> -; CHECK: cmp
> -; CHECK-NEXT: ccmp
> +; CHECK-LABEL: speculate_division:
> +; CHECK: cmp w0, #1
> +; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0
> +; CHECK: ccmp [[DIVRES]], #16, #0, ge
> +; CHECK: b.gt [[BLOCK:LBB[0-9_]+]]
> +; CHECK: bl _foo
> +; CHECK: [[BLOCK]]:
> +; CHECK: orr w0, wzr, #0x7
> define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
> entry:
> %cmp = icmp sgt i32 %a, 0
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list