[llvm] r266437 - [MachineScheduler]Add support for store clustering
Jun Bum Lim via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 15 07:58:39 PDT 2016
Author: junbuml
Date: Fri Apr 15 09:58:38 2016
New Revision: 266437
URL: http://llvm.org/viewvc/llvm-project?rev=266437&view=rev
Log:
[MachineScheduler]Add support for store clustering
Perform store clustering just like load clustering. This change add
StoreClusterMutation in machine-scheduler. To control StoreClusterMutation,
added enableClusterStores() in TargetInstrInfo.h. This is enabled only on
AArch64 for now.
This change also add support for unscaled stores which were not handled in
getMemOpBaseRegImmOfs().
Added:
llvm/trunk/test/CodeGen/AArch64/aarch64-stp-cluster.ll
Modified:
llvm/trunk/include/llvm/Target/TargetInstrInfo.h
llvm/trunk/lib/CodeGen/MachineScheduler.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll
llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll
llvm/trunk/test/CodeGen/AArch64/global-merge-group-by-use.ll
Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=266437&r1=266436&r2=266437&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
+++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Fri Apr 15 09:58:38 2016
@@ -997,9 +997,11 @@ public:
virtual bool enableClusterLoads() const { return false; }
- virtual bool shouldClusterLoads(MachineInstr *FirstLdSt,
- MachineInstr *SecondLdSt,
- unsigned NumLoads) const {
+ virtual bool enableClusterStores() const { return false; }
+
+ virtual bool shouldClusterMemOps(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const {
return false;
}
Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=266437&r1=266436&r2=266437&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Fri Apr 15 09:58:38 2016
@@ -71,8 +71,9 @@ static cl::opt<bool> EnableRegPressure("
static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
cl::desc("Enable cyclic critical path analysis."), cl::init(true));
-static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
- cl::desc("Enable load clustering."), cl::init(true));
+static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
+ cl::desc("Enable memop clustering."),
+ cl::init(true));
// Experimental heuristics
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
@@ -1351,64 +1352,80 @@ void ScheduleDAGMILive::scheduleMI(SUnit
}
//===----------------------------------------------------------------------===//
-// LoadClusterMutation - DAG post-processing to cluster loads.
+// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
//===----------------------------------------------------------------------===//
namespace {
/// \brief Post-process the DAG to create cluster edges between neighboring
-/// loads.
-class LoadClusterMutation : public ScheduleDAGMutation {
- struct LoadInfo {
+/// loads or between neighboring stores.
+class BaseMemOpClusterMutation : public ScheduleDAGMutation {
+ struct MemOpInfo {
SUnit *SU;
unsigned BaseReg;
int64_t Offset;
- LoadInfo(SUnit *su, unsigned reg, int64_t ofs)
- : SU(su), BaseReg(reg), Offset(ofs) {}
+ MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
+ : SU(su), BaseReg(reg), Offset(ofs) {}
- bool operator<(const LoadInfo &RHS) const {
+ bool operator<(const MemOpInfo&RHS) const {
return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
}
};
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ bool IsLoad;
+
public:
- LoadClusterMutation(const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri)
- : TII(tii), TRI(tri) {}
+ BaseMemOpClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri, bool IsLoad)
+ : TII(tii), TRI(tri), IsLoad(IsLoad) {}
void apply(ScheduleDAGInstrs *DAGInstrs) override;
+
protected:
- void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
+ void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
+};
+
+class StoreClusterMutation : public BaseMemOpClusterMutation {
+public:
+ StoreClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, false) {}
+};
+
+class LoadClusterMutation : public BaseMemOpClusterMutation {
+public:
+ LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, true) {}
};
} // anonymous
-void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
- ScheduleDAGMI *DAG) {
- SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
- for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
- SUnit *SU = Loads[Idx];
+void BaseMemOpClusterMutation::clusterNeighboringMemOps(
+ ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
+ SmallVector<MemOpInfo, 32> MemOpRecords;
+ for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
+ SUnit *SU = MemOps[Idx];
unsigned BaseReg;
int64_t Offset;
if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
- LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
+ MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
}
- if (LoadRecords.size() < 2)
+ if (MemOpRecords.size() < 2)
return;
- std::sort(LoadRecords.begin(), LoadRecords.end());
+
+ std::sort(MemOpRecords.begin(), MemOpRecords.end());
unsigned ClusterLength = 1;
- for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
- if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
+ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
+ if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
ClusterLength = 1;
continue;
}
- SUnit *SUa = LoadRecords[Idx].SU;
- SUnit *SUb = LoadRecords[Idx+1].SU;
- if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
+ SUnit *SUa = MemOpRecords[Idx].SU;
+ SUnit *SUb = MemOpRecords[Idx+1].SU;
+ if (TII->shouldClusterMemOps(SUa->getInstr(), SUb->getInstr(), ClusterLength)
&& DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
-
- DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
+ DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
<< SUb->NodeNum << ")\n");
// Copy successor edges from SUa to SUb. Interleaving computation
// dependent on SUa can prevent load combining due to register reuse.
@@ -1429,17 +1446,20 @@ void LoadClusterMutation::clusterNeighbo
}
/// \brief Callback from DAG postProcessing to create cluster edges for loads.
-void LoadClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
+
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
// Map DAG NodeNum to store chain ID.
DenseMap<unsigned, unsigned> StoreChainIDs;
- // Map each store chain to a set of dependent loads.
+ // Map each store chain to a set of dependent MemOps.
SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
SUnit *SU = &DAG->SUnits[Idx];
- if (!SU->getInstr()->mayLoad())
+ if ((IsLoad && !SU->getInstr()->mayLoad()) ||
+ (!IsLoad && !SU->getInstr()->mayStore()))
continue;
+
unsigned ChainPredID = DAG->SUnits.size();
for (SUnit::const_pred_iterator
PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
@@ -1449,7 +1469,7 @@ void LoadClusterMutation::apply(Schedule
}
}
// Check if this chain-like pred has been seen
- // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
+ // before. ChainPredID==MaxNodeID at the top of the schedule.
unsigned NumChains = StoreChainDependents.size();
std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
@@ -1457,9 +1477,10 @@ void LoadClusterMutation::apply(Schedule
StoreChainDependents.resize(NumChains + 1);
StoreChainDependents[Result.first->second].push_back(SU);
}
+
// Iterate over the store chains.
for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
- clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
+ clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
}
//===----------------------------------------------------------------------===//
@@ -3054,8 +3075,12 @@ static ScheduleDAGInstrs *createGenericS
// data and pass it to later mutations. Have a single mutation that gathers
// the interesting nodes in one pass.
DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
- if (EnableLoadCluster && DAG->TII->enableClusterLoads())
- DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
+ if (EnableMemOpCluster) {
+ if (DAG->TII->enableClusterLoads())
+ DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
+ if (DAG->TII->enableClusterStores())
+ DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
+ }
if (EnableMacroFusion)
DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
return DAG;
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=266437&r1=266436&r2=266437&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Fri Apr 15 09:58:38 2016
@@ -1445,6 +1445,11 @@ bool AArch64InstrInfo::getMemOpBaseRegIm
case AArch64::LDRWui:
case AArch64::LDRSWui:
// Unscaled instructions.
+ case AArch64::STURSi:
+ case AArch64::STURDi:
+ case AArch64::STURQi:
+ case AArch64::STURXi:
+ case AArch64::STURWi:
case AArch64::LDURSi:
case AArch64::LDURDi:
case AArch64::LDURQi:
@@ -1554,15 +1559,20 @@ static bool scaleOffset(unsigned Opc, in
default:
return false;
case AArch64::LDURQi:
+ case AArch64::STURQi:
OffsetStride = 16;
break;
case AArch64::LDURXi:
case AArch64::LDURDi:
+ case AArch64::STURXi:
+ case AArch64::STURDi:
OffsetStride = 8;
break;
case AArch64::LDURWi:
case AArch64::LDURSi:
case AArch64::LDURSWi:
+ case AArch64::STURWi:
+ case AArch64::STURSi:
OffsetStride = 4;
break;
}
@@ -1598,9 +1608,9 @@ static bool canPairLdStOpc(unsigned Firs
/// Detect opportunities for ldp/stp formation.
///
/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
-bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
- MachineInstr *SecondLdSt,
- unsigned NumLoads) const {
+bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const {
// Only cluster up to a single pair.
if (NumLoads > 1)
return false;
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=266437&r1=266436&r2=266437&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Fri Apr 15 09:58:38 2016
@@ -109,7 +109,9 @@ public:
bool enableClusterLoads() const override { return true; }
- bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
+ bool enableClusterStores() const override { return true; }
+
+ bool shouldClusterMemOps(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
unsigned NumLoads) const override;
bool shouldScheduleAdjacent(MachineInstr *First,
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=266437&r1=266436&r2=266437&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Fri Apr 15 09:58:38 2016
@@ -292,9 +292,9 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(
return false;
}
-bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
- MachineInstr *SecondLdSt,
- unsigned NumLoads) const {
+bool SIInstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const {
const MachineOperand *FirstDst = nullptr;
const MachineOperand *SecondDst = nullptr;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=266437&r1=266436&r2=266437&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Fri Apr 15 09:58:38 2016
@@ -94,9 +94,9 @@ public:
int64_t &Offset,
const TargetRegisterInfo *TRI) const final;
- bool shouldClusterLoads(MachineInstr *FirstLdSt,
- MachineInstr *SecondLdSt,
- unsigned NumLoads) const final;
+ bool shouldClusterMemOps(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const final;
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
Added: llvm/trunk/test/CodeGen/AArch64/aarch64-stp-cluster.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-stp-cluster.ll?rev=266437&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-stp-cluster.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-stp-cluster.ll Fri Apr 15 09:58:38 2016
@@ -0,0 +1,149 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -aarch64-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s
+
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_i64_scale:BB#0
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(4): STRXui %vreg1, %vreg0, 1
+; CHECK:SU(3): STRXui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRXui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRXui %vreg1, %vreg0, 4
+define i64 @stp_i64_scale(i64* nocapture %P, i64 %v) {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
+ store i64 %v, i64* %arrayidx
+ %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
+ store i64 %v, i64* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
+ store i64 %v, i64* %arrayidx3
+ ret i64 %v
+}
+
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_i32_scale:BB#0
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(4): STRWui %vreg1, %vreg0, 1
+; CHECK:SU(3): STRWui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRWui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRWui %vreg1, %vreg0, 4
+define i32 @stp_i32_scale(i32* nocapture %P, i32 %v) {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %P, i32 3
+ store i32 %v, i32* %arrayidx
+ %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 2
+ store i32 %v, i32* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 1
+ store i32 %v, i32* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 4
+ store i32 %v, i32* %arrayidx3
+ ret i32 %v
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_i64_unscale:BB#0 entry
+; CHECK:Cluster ld/st SU(5) - SU(2)
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:SU(5): STURXi %vreg1, %vreg0, -32
+; CHECK:SU(2): STURXi %vreg1, %vreg0, -24
+; CHECK:SU(4): STURXi %vreg1, %vreg0, -16
+; CHECK:SU(3): STURXi %vreg1, %vreg0, -8
+define void @stp_i64_unscale(i64* nocapture %P, i64 %v) #0 {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3
+ store i64 %v, i64* %arrayidx
+ %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2
+ store i64 %v, i64* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4
+ store i64 %v, i64* %arrayidx3
+ ret void
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_i32_unscale:BB#0 entry
+; CHECK:Cluster ld/st SU(5) - SU(2)
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:SU(5): STURWi %vreg1, %vreg0, -16
+; CHECK:SU(2): STURWi %vreg1, %vreg0, -12
+; CHECK:SU(4): STURWi %vreg1, %vreg0, -8
+; CHECK:SU(3): STURWi %vreg1, %vreg0, -4
+define void @stp_i32_unscale(i32* nocapture %P, i32 %v) #0 {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3
+ store i32 %v, i32* %arrayidx
+ %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1
+ store i32 %v, i32* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2
+ store i32 %v, i32* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4
+ store i32 %v, i32* %arrayidx3
+ ret void
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_double:BB#0
+; CHECK:Cluster ld/st SU(3) - SU(4)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(3): STRDui %vreg1, %vreg0, 1
+; CHECK:SU(4): STRDui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRDui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRDui %vreg1, %vreg0, 4
+define void @stp_double(double* nocapture %P, double %v) {
+entry:
+ %arrayidx = getelementptr inbounds double, double* %P, i64 3
+ store double %v, double* %arrayidx
+ %arrayidx1 = getelementptr inbounds double, double* %P, i64 1
+ store double %v, double* %arrayidx1
+ %arrayidx2 = getelementptr inbounds double, double* %P, i64 2
+ store double %v, double* %arrayidx2
+ %arrayidx3 = getelementptr inbounds double, double* %P, i64 4
+ store double %v, double* %arrayidx3
+ ret void
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_float:BB#0
+; CHECK:Cluster ld/st SU(3) - SU(4)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(3): STRSui %vreg1, %vreg0, 1
+; CHECK:SU(4): STRSui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRSui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRSui %vreg1, %vreg0, 4
+define void @stp_float(float* nocapture %P, float %v) {
+entry:
+ %arrayidx = getelementptr inbounds float, float* %P, i64 3
+ store float %v, float* %arrayidx
+ %arrayidx1 = getelementptr inbounds float, float* %P, i64 1
+ store float %v, float* %arrayidx1
+ %arrayidx2 = getelementptr inbounds float, float* %P, i64 2
+ store float %v, float* %arrayidx2
+ %arrayidx3 = getelementptr inbounds float, float* %P, i64 4
+ store float %v, float* %arrayidx3
+ ret void
+}
+
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_volatile:BB#0
+; CHECK-NOT: Cluster ld/st
+; CHECK:SU(2): STRXui %vreg1, %vreg0, 3; mem:Volatile
+; CHECK:SU(3): STRXui %vreg1, %vreg0, 2; mem:Volatile
+; CHECK:SU(4): STRXui %vreg1, %vreg0, 1; mem:Volatile
+; CHECK:SU(5): STRXui %vreg1, %vreg0, 4; mem:Volatile
+define i64 @stp_volatile(i64* nocapture %P, i64 %v) {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
+ store volatile i64 %v, i64* %arrayidx
+ %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
+ store volatile i64 %v, i64* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
+ store volatile i64 %v, i64* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
+ store volatile i64 %v, i64* %arrayidx3
+ ret i64 %v
+}
+
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll?rev=266437&r1=266436&r2=266437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll Fri Apr 15 09:58:38 2016
@@ -5,12 +5,12 @@
; Test ldr clustering.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldr_int:BB#0
-; CHECK: Cluster loads SU(1) - SU(2)
+; CHECK: Cluster ld/st SU(1) - SU(2)
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldr_int:BB#0
-; EXYNOS: Cluster loads SU(1) - SU(2)
+; EXYNOS: Cluster ld/st SU(1) - SU(2)
; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
define i32 @ldr_int(i32* %a) nounwind {
@@ -25,12 +25,12 @@ define i32 @ldr_int(i32* %a) nounwind {
; Test ldpsw clustering
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldp_sext_int:BB#0
-; CHECK: Cluster loads SU(1) - SU(2)
+; CHECK: Cluster ld/st SU(1) - SU(2)
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui
; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldp_sext_int:BB#0
-; EXYNOS: Cluster loads SU(1) - SU(2)
+; EXYNOS: Cluster ld/st SU(1) - SU(2)
; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui
; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui
define i64 @ldp_sext_int(i32* %p) nounwind {
@@ -46,12 +46,12 @@ define i64 @ldp_sext_int(i32* %p) nounwi
; Test ldur clustering.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldur_int:BB#0
-; CHECK: Cluster loads SU(2) - SU(1)
+; CHECK: Cluster ld/st SU(2) - SU(1)
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDURWi
; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDURWi
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldur_int:BB#0
-; EXYNOS: Cluster loads SU(2) - SU(1)
+; EXYNOS: Cluster ld/st SU(2) - SU(1)
; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDURWi
; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDURWi
define i32 @ldur_int(i32* %a) nounwind {
@@ -66,12 +66,12 @@ define i32 @ldur_int(i32* %a) nounwind {
; Test sext + zext clustering.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldp_half_sext_zext_int:BB#0
-; CHECK: Cluster loads SU(3) - SU(4)
+; CHECK: Cluster ld/st SU(3) - SU(4)
; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui
; CHECK: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldp_half_sext_zext_int:BB#0
-; EXYNOS: Cluster loads SU(3) - SU(4)
+; EXYNOS: Cluster ld/st SU(3) - SU(4)
; EXYNOS: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui
; EXYNOS: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind {
@@ -89,12 +89,12 @@ define i64 @ldp_half_sext_zext_int(i64*
; Test zext + sext clustering.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldp_half_zext_sext_int:BB#0
-; CHECK: Cluster loads SU(3) - SU(4)
+; CHECK: Cluster ld/st SU(3) - SU(4)
; CHECK: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
; CHECK: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldp_half_zext_sext_int:BB#0
-; EXYNOS: Cluster loads SU(3) - SU(4)
+; EXYNOS: Cluster ld/st SU(3) - SU(4)
; EXYNOS: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
; EXYNOS: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui
define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind {
@@ -112,12 +112,12 @@ define i64 @ldp_half_zext_sext_int(i64*
; Verify we don't cluster volatile loads.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldr_int_volatile:BB#0
-; CHECK-NOT: Cluster loads
+; CHECK-NOT: Cluster ld/st
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldr_int_volatile:BB#0
-; EXYNOS-NOT: Cluster loads
+; EXYNOS-NOT: Cluster ld/st
; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
define i32 @ldr_int_volatile(i32* %a) nounwind {
@@ -132,12 +132,12 @@ define i32 @ldr_int_volatile(i32* %a) no
; Test ldq clustering (no clustering for Exynos).
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldq_cluster:BB#0
-; CHECK: Cluster loads SU(1) - SU(3)
+; CHECK: Cluster ld/st SU(1) - SU(3)
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRQui
; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRQui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldq_cluster:BB#0
-; EXYNOS-NOT: Cluster loads
+; EXYNOS-NOT: Cluster ld/st
define <2 x i64> @ldq_cluster(i64* %p) {
%a1 = bitcast i64* %p to <2 x i64>*
%tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll?rev=266437&r1=266436&r2=266437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-stp.ll Fri Apr 15 09:58:38 2016
@@ -100,9 +100,9 @@ entry:
; Read of %b to compute %tmp2 shouldn't prevent formation of stp
; CHECK-LABEL: stp_int_rar_hazard
-; CHECK: stp w0, w1, [x2]
; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]
-; CHECK: add w0, [[REG]], w1
+; CHECK: add w8, [[REG]], w1
+; CHECK: stp w0, w1, [x2]
; CHECK: ret
define i32 @stp_int_rar_hazard(i32 %a, i32 %b, i32* nocapture %p) nounwind {
store i32 %a, i32* %p, align 4
Modified: llvm/trunk/test/CodeGen/AArch64/global-merge-group-by-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/global-merge-group-by-use.ll?rev=266437&r1=266436&r2=266437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/global-merge-group-by-use.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/global-merge-group-by-use.ll Fri Apr 15 09:58:38 2016
@@ -64,8 +64,8 @@ define void @f3(i32 %a1, i32 %a2) #0 {
define void @f4(i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-NEXT: adrp x8, [[SET3]]@PAGE
; CHECK-NEXT: add x8, x8, [[SET3]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #4]
-; CHECK-NEXT: str w2, [x8]
+; CHECK-NEXT: stp w2, w0, [x8]
+; CHECK-NEXT: str w1, [x8, #8]
; CHECK-NEXT: ret
store i32 %a1, i32* @m4, align 4
store i32 %a2, i32* @n4, align 4
More information about the llvm-commits
mailing list