[llvm] r308059 - [AArch64][Falkor] Avoid HW prefetcher tag collisions (step 1)
Geoff Berry via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 14 14:44:13 PDT 2017
Author: gberry
Date: Fri Jul 14 14:44:12 2017
New Revision: 308059
URL: http://llvm.org/viewvc/llvm-project?rev=308059&view=rev
Log:
[AArch64][Falkor] Avoid HW prefetcher tag collisions (step 1)
Summary:
This patch is the first step in reducing HW prefetcher instruction tag
collisions in inner loops for Falkor. It adds a pass that annotates IR
loads with metadata to indicate that they are known to be strided loads,
and adds a target lowering hook that translates this metadata to a
target-specific MachineMemOperand flag.
A follow on change will use this MachineMemOperand flag to re-write
instructions to reduce tag collisions.
Reviewers: mcrosier, t.p.northover
Subscribers: aemerson, rengolin, mgorny, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D34963
Added:
llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
llvm/trunk/test/CodeGen/AArch64/falkor-hwpf.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64.h
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/trunk/lib/Target/AArch64/CMakeLists.txt
llvm/trunk/test/CodeGen/MIR/AArch64/target-memoperands.mir
Modified: llvm/trunk/lib/Target/AArch64/AArch64.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.h?rev=308059&r1=308058&r2=308059&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.h Fri Jul 14 14:44:12 2017
@@ -44,6 +44,7 @@ ModulePass *createAArch64PromoteConstant
FunctionPass *createAArch64ConditionOptimizerPass();
FunctionPass *createAArch64A57FPLoadBalancing();
FunctionPass *createAArch64A53Fix835769();
+FunctionPass *createFalkorMarkStridedAccessesPass();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
@@ -66,6 +67,7 @@ void initializeAArch64VectorByElementOpt
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64StorePairSuppressPass(PassRegistry&);
+void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
void initializeLDTLSCleanupPass(PassRegistry&);
} // end namespace llvm
Added: llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp?rev=308059&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp (added)
+++ llvm/trunk/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp Fri Jul 14 14:44:12 2017
@@ -0,0 +1,147 @@
+//===-- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// For Falkor, we want to avoid HW prefetcher instruction tag collisions that
+// may inhibit the HW prefetching. This is done in two steps. Before ISel, we
+// mark strided loads (i.e. those that will likely benefit from prefetching)
+// with metadata. Then, after opcodes have been finalized, we insert MOVs and
+// re-write loads to prevent unintnentional tag collisions.
+// ===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "falkor-hwpf-fix"
+
+STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
+
+namespace {
+
+class FalkorMarkStridedAccesses {
+public:
+ FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE)
+ : LI(LI), SE(SE) {}
+
+ bool run();
+
+private:
+ bool runOnLoop(Loop *L);
+
+ LoopInfo &LI;
+ ScalarEvolution &SE;
+};
+
+class FalkorMarkStridedAccessesLegacy : public FunctionPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) {
+ initializeFalkorMarkStridedAccessesLegacyPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ // FIXME: For some reason, preserving SE here breaks LSR (even if
+ // this pass changes nothing).
+ // AU.addPreserved<ScalarEvolutionWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+} // namespace
+
+char FalkorMarkStridedAccessesLegacy::ID = 0;
+INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
+ "Falkor HW Prefetch Fix", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
+ "Falkor HW Prefetch Fix", false, false)
+
+FunctionPass *llvm::createFalkorMarkStridedAccessesPass() {
+ return new FalkorMarkStridedAccessesLegacy();
+}
+
+bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) {
+ TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ const AArch64Subtarget *ST =
+ TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F);
+ if (ST->getProcFamily() != AArch64Subtarget::Falkor)
+ return false;
+
+ if (skipFunction(F))
+ return false;
+
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+ FalkorMarkStridedAccesses LDP(LI, SE);
+ return LDP.run();
+}
+
+bool FalkorMarkStridedAccesses::run() {
+ bool MadeChange = false;
+
+ for (Loop *I : LI)
+ for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
+
+ return MadeChange;
+}
+
+bool FalkorMarkStridedAccesses::runOnLoop(Loop *L) {
+ // Only mark strided loads in the inner-most loop
+ if (!L->empty())
+ return false;
+
+ bool MadeChange = false;
+
+ for (const auto BB : L->blocks()) {
+ for (auto &I : *BB) {
+ LoadInst *LoadI = dyn_cast<LoadInst>(&I);
+ if (!LoadI)
+ continue;
+
+ Value *PtrValue = LoadI->getPointerOperand();
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE.getSCEV(PtrValue);
+ const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
+ continue;
+
+ LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD,
+ MDNode::get(LoadI->getContext(), {}));
+ ++NumStridedLoadsMarked;
+ DEBUG(dbgs() << "Load: " << I << " marked as strided\n");
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=308059&r1=308058&r2=308059&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Fri Jul 14 14:44:12 2017
@@ -7482,6 +7482,14 @@ AArch64TargetLowering::getNumInterleaved
return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
}
+MachineMemOperand::Flags
+AArch64TargetLowering::getMMOFlags(const Instruction &I) const {
+ if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
+ I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
+ return MOStridedAccess;
+ return MachineMemOperand::MONone;
+}
+
bool AArch64TargetLowering::isLegalInterleavedAccessType(
VectorType *VecTy, const DataLayout &DL) const {
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=308059&r1=308058&r2=308059&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Fri Jul 14 14:44:12 2017
@@ -455,6 +455,8 @@ public:
unsigned getNumInterleavedAccesses(VectorType *VecTy,
const DataLayout &DL) const;
+ MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
+
private:
bool isExtFreeImpl(const Instruction *Ext) const override;
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=308059&r1=308058&r2=308059&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Fri Jul 14 14:44:12 2017
@@ -52,9 +52,6 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
-static const MachineMemOperand::Flags MOSuppressPair =
- MachineMemOperand::MOTargetFlag1;
-
static cl::opt<unsigned>
TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
@@ -1715,6 +1712,13 @@ void AArch64InstrInfo::suppressLdStPair(
(*MI.memoperands_begin())->setFlags(MOSuppressPair);
}
+/// Check all MachineMemOperands for a hint that the load/store is strided.
+bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const {
+ return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+ return MMO->getFlags() & MOStridedAccess;
+ });
+}
+
bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
switch (Opc) {
default:
@@ -4433,7 +4437,8 @@ AArch64InstrInfo::getSerializableBitmask
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
- {{MOSuppressPair, "aarch64-suppress-pair"}};
+ {{MOSuppressPair, "aarch64-suppress-pair"},
+ {MOStridedAccess, "aarch64-strided-access"}};
return makeArrayRef(TargetFlags);
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=308059&r1=308058&r2=308059&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Fri Jul 14 14:44:12 2017
@@ -27,6 +27,13 @@ namespace llvm {
class AArch64Subtarget;
class AArch64TargetMachine;
+static const MachineMemOperand::Flags MOSuppressPair =
+ MachineMemOperand::MOTargetFlag1;
+static const MachineMemOperand::Flags MOStridedAccess =
+ MachineMemOperand::MOTargetFlag2;
+
+#define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
+
class AArch64InstrInfo final : public AArch64GenInstrInfo {
const AArch64RegisterInfo RI;
const AArch64Subtarget &Subtarget;
@@ -81,6 +88,9 @@ public:
/// unprofitable.
bool isLdStPairSuppressed(const MachineInstr &MI) const;
+ /// Return true if the given load or store is a strided memory access.
+ bool isStridedAccess(const MachineInstr &MI) const;
+
/// Return true if this is an unscaled load/store.
bool isUnscaledLdSt(unsigned Opc) const;
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=308059&r1=308058&r2=308059&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Fri Jul 14 14:44:12 2017
@@ -138,6 +138,9 @@ static cl::opt<int> EnableGlobalISelAtO(
cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
cl::init(-1));
+static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
+ cl::init(true), cl::Hidden);
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -158,6 +161,7 @@ extern "C" void LLVMInitializeAArch64Tar
initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);
+ initializeFalkorMarkStridedAccessesLegacyPass(*PR);
initializeLDTLSCleanupPass(*PR);
}
@@ -346,8 +350,12 @@ void AArch64PassConfig::addIRPasses() {
//
// Run this before LSR to remove the multiplies involved in computing the
// pointer values N iterations ahead.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch)
- addPass(createLoopDataPrefetchPass());
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (EnableLoopDataPrefetch)
+ addPass(createLoopDataPrefetchPass());
+ if (EnableFalkorHWPFFix)
+ addPass(createFalkorMarkStridedAccessesPass());
+ }
TargetPassConfig::addIRPasses();
Modified: llvm/trunk/lib/Target/AArch64/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/CMakeLists.txt?rev=308059&r1=308058&r2=308059&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AArch64/CMakeLists.txt Fri Jul 14 14:44:12 2017
@@ -47,6 +47,7 @@ add_llvm_target(AArch64CodeGen
AArch64ConditionalCompares.cpp
AArch64DeadRegisterDefinitionsPass.cpp
AArch64ExpandPseudoInsts.cpp
+ AArch64FalkorHWPFFix.cpp
AArch64FastISel.cpp
AArch64A53Fix835769.cpp
AArch64FrameLowering.cpp
Added: llvm/trunk/test/CodeGen/AArch64/falkor-hwpf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/falkor-hwpf.ll?rev=308059&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/falkor-hwpf.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/falkor-hwpf.ll Fri Jul 14 14:44:12 2017
@@ -0,0 +1,106 @@
+; RUN: opt < %s -S -falkor-hwpf-fix -mtriple aarch64 -mcpu=falkor | FileCheck %s
+; RUN: opt < %s -S -falkor-hwpf-fix -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s --check-prefix=NOHWPF
+
+; Check that strided access metadata is added to loads in inner loops when compiling for Falkor.
+
+; CHECK-LABEL: @hwpf1(
+; CHECK: load i32, i32* %gep, !falkor.strided.access !0
+; CHECK: load i32, i32* %gep2, !falkor.strided.access !0
+
+; NOHWPF-LABEL: @hwpf1(
+; NOHWPF: load i32, i32* %gep{{$}}
+; NOHWPF: load i32, i32* %gep2{{$}}
+define void @hwpf1(i32* %p, i32* %p2) {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+
+ %gep = getelementptr inbounds i32, i32* %p, i32 %iv
+ %load = load i32, i32* %gep
+
+ %gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv
+ %load2 = load i32, i32* %gep2
+
+ %inc = add i32 %iv, 1
+ %exitcnd = icmp uge i32 %inc, 1024
+ br i1 %exitcnd, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Check that outer loop strided load isn't marked.
+; CHECK-LABEL: @hwpf2(
+; CHECK: load i32, i32* %gep, !falkor.strided.access !0
+; CHECK: load i32, i32* %gep2{{$}}
+
+; NOHWPF-LABEL: @hwpf2(
+; NOHWPF: load i32, i32* %gep{{$}}
+; NOHWPF: load i32, i32* %gep2{{$}}
+define void @hwpf2(i32* %p) {
+entry:
+ br label %loop1
+
+loop1:
+ %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+ %outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ]
+ br label %loop2.header
+
+loop2.header:
+ br label %loop2
+
+loop2:
+ %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+ %sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ]
+ %gep = getelementptr inbounds i32, i32* %p, i32 %iv2
+ %load = load i32, i32* %gep
+ %sum.inc = add i32 %sum, %load
+ %inc2 = add i32 %iv2, 1
+ %exitcnd2 = icmp uge i32 %inc2, 1024
+ br i1 %exitcnd2, label %exit2, label %loop2
+
+exit2:
+ %gep2 = getelementptr inbounds i32, i32* %p, i32 %iv1
+ %load2 = load i32, i32* %gep2
+ br label %loop1.latch
+
+loop1.latch:
+ %inc1 = add i32 %iv1, 1
+ %exitcnd1 = icmp uge i32 %inc1, 1024
+ br i1 %exitcnd2, label %exit, label %loop1
+
+exit:
+ ret void
+}
+
+
+; Check that non-strided load isn't marked.
+; CHECK-LABEL: @hwpf3(
+; CHECK: load i32, i32* %gep, !falkor.strided.access !0
+; CHECK: load i32, i32* %gep2{{$}}
+
+; NOHWPF-LABEL: @hwpf3(
+; NOHWPF: load i32, i32* %gep{{$}}
+; NOHWPF: load i32, i32* %gep2{{$}}
+define void @hwpf3(i32* %p, i32* %p2) {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+
+ %gep = getelementptr inbounds i32, i32* %p, i32 %iv
+ %load = load i32, i32* %gep
+
+ %gep2 = getelementptr inbounds i32, i32* %p2, i32 %load
+ %load2 = load i32, i32* %gep2
+
+ %inc = add i32 %iv, 1
+ %exitcnd = icmp uge i32 %inc, 1024
+ br i1 %exitcnd, label %exit, label %loop
+
+exit:
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/MIR/AArch64/target-memoperands.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AArch64/target-memoperands.mir?rev=308059&r1=308058&r2=308059&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AArch64/target-memoperands.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AArch64/target-memoperands.mir Fri Jul 14 14:44:12 2017
@@ -10,13 +10,17 @@
---
# CHECK-LABEL: name: target_memoperands
# CHECK: %1(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load 8)
+# CHECK: %2(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load 4)
# CHECK: G_STORE %1(s64), %0(p0) :: ("aarch64-suppress-pair" store 8)
+# CHECK: G_STORE %2(s32), %0(p0) :: ("aarch64-strided-access" store 4)
name: target_memoperands
body: |
bb.0:
%0:_(p0) = COPY %x0
%1:_(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load 8)
+ %2:_(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load 4)
G_STORE %1(s64), %0(p0) :: ("aarch64-suppress-pair" store 8)
+ G_STORE %2(s32), %0(p0) :: ("aarch64-strided-access" store 4)
RET_ReallyLR
...
More information about the llvm-commits
mailing list