[llvm] e541e1b - [NFC] Separate Peeling Properties into its own struct (re-land after minor fix)
Anh Tuyen Tran via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 10 11:39:53 PDT 2020
Author: Sidharth Baveja
Date: 2020-07-10T18:39:30Z
New Revision: e541e1b757237172c247904b670c9894d6b3759d
URL: https://github.com/llvm/llvm-project/commit/e541e1b757237172c247904b670c9894d6b3759d
DIFF: https://github.com/llvm/llvm-project/commit/e541e1b757237172c247904b670c9894d6b3759d.diff
LOG: [NFC] Separate Peeling Properties into its own struct (re-land after minor fix)
Summary:
This patch separates the peeling specific parameters from the UnrollingPreferences,
and creates a new struct called PeelingPreferences. Functions which used the
UnrollingPreferences struct for peeling have been updated to use the PeelingPreferences struct.
Author: sidbav (Sidharth Baveja)
Reviewers: Whitney (Whitney Tsang), Meinersbur (Michael Kruse), skatkov (Serguei Katkov), ashlykov (Arkady Shlykov), bogner (Justin Bogner), hfinkel (Hal Finkel), anhtuyen (Anh Tuyen Tran), nikic (Nikita Popov)
Reviewed By: Meinersbur (Michael Kruse)
Subscribers: fhahn (Florian Hahn), hiraditya (Aditya Kumar), llvm-commits, LLVM
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D80580
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/include/llvm/Transforms/Utils/UnrollLoop.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
llvm/lib/Target/ARM/ARMTargetTransformInfo.h
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 695b7d6061c0..b6698eefdb01 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -450,11 +450,6 @@ class TargetTransformInfo {
/// transformation will select an unrolling factor based on the current cost
/// threshold and other factors.
unsigned Count;
- /// A forced peeling factor (the number of bodied of the original loop
- /// that should be peeled off before the loop body). When set to 0, the
- /// unrolling transformation will select a peeling factor based on profile
- /// information and other factors.
- unsigned PeelCount;
/// Default unroll count for loops with run-time trip count.
unsigned DefaultUnrollRuntimeCount;
// Set the maximum unrolling factor. The unrolling factor may be selected
@@ -488,19 +483,10 @@ class TargetTransformInfo {
bool Force;
/// Allow using trip count upper bound to unroll loops.
bool UpperBound;
- /// Allow peeling off loop iterations.
- bool AllowPeeling;
- /// Allow peeling off loop iterations for loop nests.
- bool AllowLoopNestsPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollAndJam;
- /// Allow peeling basing on profile. Uses to enable peeling off all
- /// iterations basing on provided profile.
- /// If the value is true the peeling cost model can decide to peel only
- /// some iterations and in this case it will set this to false.
- bool PeelProfiledIterations;
/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
/// value above is used during unroll and jam for the outer loop size.
/// This value is used in the same manner to limit the size of the inner
@@ -534,6 +520,28 @@ class TargetTransformInfo {
/// intrinsic is supported.
bool emitGetActiveLaneMask() const;
+ // Parameters that control the loop peeling transformation
+ struct PeelingPreferences {
+ /// A forced peeling factor (the number of bodied of the original loop
+ /// that should be peeled off before the loop body). When set to 0, the
+ /// a peeling factor based on profile information and other factors.
+ unsigned PeelCount;
+ /// Allow peeling off loop iterations.
+ bool AllowPeeling;
+ /// Allow peeling off loop iterations for loop nests.
+ bool AllowLoopNestsPeeling;
+ /// Allow peeling basing on profile. Uses to enable peeling off all
+ /// iterations basing on provided profile.
+ /// If the value is true the peeling cost model can decide to peel only
+ /// some iterations and in this case it will set this to false.
+ bool PeelProfiledIterations;
+ };
+
+ /// Get target-customized preferences for the generic loop peeling
+ /// transformation. The caller will initialize \p PP with the current
+ /// target-independent defaults with information from \p L and \p SE.
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ PeelingPreferences &PP) const;
/// @}
/// \name Scalar Target Information
@@ -1282,6 +1290,8 @@ class TargetTransformInfo::Concept {
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;
+ virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ PeelingPreferences &PP) = 0;
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
@@ -1560,6 +1570,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
UnrollingPreferences &UP) override {
return Impl.getUnrollingPreferences(L, SE, UP);
}
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ PeelingPreferences &PP) override {
+ return Impl.getPeelingPreferences(L, SE, PP);
+ }
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) override {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index ca7106ab98aa..0ce975d6d4b5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -150,6 +150,9 @@ class TargetTransformInfoImplBase {
void getUnrollingPreferences(Loop *, ScalarEvolution &,
TTI::UnrollingPreferences &) {}
+ void getPeelingPreferences(Loop *, ScalarEvolution &,
+ TTI::PeelingPreferences &) {}
+
bool isLegalAddImmediate(int64_t Imm) { return false; }
bool isLegalICmpImmediate(int64_t Imm) { return false; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c6a9a65ae6c1..f9d32eadd23e 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -451,6 +451,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
UP.BEInsns = 2;
}
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ PP.PeelCount = 0;
+ PP.AllowPeeling = true;
+ PP.AllowLoopNestsPeeling = false;
+ PP.PeelProfiledIterations = true;
+ }
+
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 1970cefcefba..bb3d02b95956 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -94,6 +94,7 @@ bool UnrollRuntimeLoopRemainder(
void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
+ TargetTransformInfo::PeelingPreferences &PP,
unsigned &TripCount, ScalarEvolution &SE);
bool canPeel(Loop *L);
@@ -119,6 +120,8 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
unsigned MaxTripCount, bool MaxOrZero,
unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
+ TargetTransformInfo::PeelingPreferences &PP,
+
bool &UseUpperBound);
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
@@ -133,9 +136,13 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
- Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
- Optional<bool> UserAllowProfileBasedPeeling,
- Optional<unsigned> UserFullUnrollMaxCount);
+ Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
+
+TargetTransformInfo::PeelingPreferences
+gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI,
+ Optional<bool> UserAllowPeeling,
+ Optional<bool> UserAllowProfileBasedPeeling);
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable, bool &Convergent,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 87c6f83938ed..2f051e53790b 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -327,6 +327,11 @@ void TargetTransformInfo::getUnrollingPreferences(
return TTIImpl->getUnrollingPreferences(L, SE, UP);
}
+void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ PeelingPreferences &PP) const {
+ return TTIImpl->getPeelingPreferences(L, SE, PP);
+}
+
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
return TTIImpl->isLegalAddImmediate(Imm);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index be0c51b83a25..cf6de797727b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -859,6 +859,11 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
getFalkorUnrollingPreferences(L, SE, UP);
}
+void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
+
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) {
switch (Inst->getIntrinsicID()) {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 6b1e5d5083e2..1f029689a60e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -153,6 +153,9 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 8783427b5002..542a5f006c0f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -236,6 +236,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
}
+void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
// The concept of vector registers doesn't really exist. Some packed vector
// operations operate on the normal 32-bit registers.
@@ -997,6 +1001,11 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
CommonTTI.getUnrollingPreferences(L, SE, UP);
}
+void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ CommonTTI.getPeelingPreferences(L, SE, PP);
+}
+
unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
@@ -1103,3 +1112,8 @@ void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
CommonTTI.getUnrollingPreferences(L, SE, UP);
}
+
+void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ CommonTTI.getPeelingPreferences(L, SE, PP);
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index b8a027c79bfc..3364a9bcaccb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -61,6 +61,9 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
};
class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
@@ -146,6 +149,9 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
return TTI::PSK_FastHardware;
@@ -264,6 +270,8 @@ class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
unsigned getHardwareNumberOfRegisters(bool Vec) const;
unsigned getNumberOfRegisters(bool Vec) const;
unsigned getRegisterBitWidth(bool Vector) const;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 44dfb9e8c129..74b1331216a0 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1582,6 +1582,11 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Force = true;
}
+void ARMTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
+
bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const {
return ST->hasMVEIntegerOps();
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 5d914227c968..537a546361ee 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -251,6 +251,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
bool emitGetActiveLaneMask() const;
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
bool shouldBuildLookupTablesForConstant(Constant *C) const {
// In the ROPI and RWPI relocation models we can't have pointers to global
// variables or functions in constant data, so don't convert switches to
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 76df4e8e1931..80c8736cb74a 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -78,12 +78,17 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Runtime = UP.Partial = true;
+}
+
+void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
// Only try to peel innermost loops with small runtime trip counts.
if (L && L->empty() && canPeel(L) &&
SE.getSmallConstantTripCount(L) == 0 &&
SE.getSmallConstantMaxTripCount(L) > 0 &&
SE.getSmallConstantMaxTripCount(L) <= 5) {
- UP.PeelCount = 2;
+ PP.PeelCount = 2;
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 3365c5bf1cb1..5fe397486402 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -64,6 +64,9 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
/// Bias LSR towards creating post-increment opportunities.
bool shouldFavorPostInc() const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 5c14d0f1a24d..3873c73fb2e0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -155,3 +155,8 @@ void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Partial = UP.Runtime = true;
UP.PartialThreshold = UP.Threshold / 4;
}
+
+void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 88156f687284..cb832031f1ad 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -95,6 +95,10 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
// Volatile loads/stores are only supported for shared and global address
// spaces, or for generic AS that maps to them.
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f2c746a14299..53556ffc267d 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -568,6 +568,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
BaseT::getUnrollingPreferences(L, SE, UP);
}
+void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
// This function returns true to allow using coldcc calling convention.
// Returning true results in coldcc being used for functions which are cold at
// all call sites when the callers of the functions are not calling any other
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index b831789d3e6e..d998521084e1 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -66,6 +66,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
TargetLibraryInfo *LibInfo);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 36141426e27d..864200e5f71c 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -294,6 +294,10 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Force = true;
}
+void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) {
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index d20541774da1..7f8f7f6f923f 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -50,6 +50,9 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
/// @}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index f0ece1faa5fd..285cba6ee205 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -158,7 +158,8 @@ static bool computeUnrollAndJamCount(
const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
- unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) {
+ unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
+ TargetTransformInfo::PeelingPreferences &PP) {
// First up use computeUnrollCount from the loop unroller to get a count
// for unrolling the outer loop, plus any loops requiring explicit
// unrolling we leave to the unroller. This uses UP.Threshold /
@@ -168,7 +169,8 @@ static bool computeUnrollAndJamCount(
bool UseUpperBound = false;
bool ExplicitUnroll = computeUnrollCount(
L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
- /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
+ /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
+ UseUpperBound);
if (ExplicitUnroll || UseUpperBound) {
// If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
// for the unroller instead.
@@ -282,7 +284,9 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
OptimizationRemarkEmitter &ORE, int OptLevel) {
TargetTransformInfo::UnrollingPreferences UP =
gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
- None, None, None, None, None, None, None);
+ None, None, None, None, None);
+ TargetTransformInfo::PeelingPreferences PP =
+ gatherPeelingPreferences(L, SE, TTI, None, None);
if (AllowUnrollAndJam.getNumOccurrences() > 0)
UP.UnrollAndJam = AllowUnrollAndJam;
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
@@ -367,7 +371,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// Decide if, and by how much, to unroll
bool IsCountSetExplicitly = computeUnrollAndJamCount(
L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
- OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP);
+ OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
if (UP.Count <= 1)
return LoopUnrollResult::Unmodified;
// Unroll factor (Count) must be less or equal to TripCount.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index ec56610e41e5..87f40bb7ba85 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -193,9 +193,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
- Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
- Optional<bool> UserAllowProfileBasedPeeling,
- Optional<unsigned> UserFullUnrollMaxCount) {
+ Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
@@ -206,7 +204,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.PartialThreshold = 150;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
- UP.PeelCount = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = std::numeric_limits<unsigned>::max();
UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
@@ -218,10 +215,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
- UP.AllowPeeling = true;
- UP.AllowLoopNestsPeeling = false;
UP.UnrollAndJam = false;
- UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
@@ -249,8 +243,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxCount = UnrollMaxCount;
if (UnrollFullMaxCount.getNumOccurrences() > 0)
UP.FullUnrollMaxCount = UnrollFullMaxCount;
- if (UnrollPeelCount.getNumOccurrences() > 0)
- UP.PeelCount = UnrollPeelCount;
if (UnrollAllowPartial.getNumOccurrences() > 0)
UP.Partial = UnrollAllowPartial;
if (UnrollAllowRemainder.getNumOccurrences() > 0)
@@ -259,10 +251,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Runtime = UnrollRuntime;
if (UnrollMaxUpperBound == 0)
UP.UpperBound = false;
- if (UnrollAllowPeeling.getNumOccurrences() > 0)
- UP.AllowPeeling = UnrollAllowPeeling;
- if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
- UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
if (UnrollUnrollRemainder.getNumOccurrences() > 0)
UP.UnrollRemainder = UnrollUnrollRemainder;
if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
@@ -281,16 +269,45 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Runtime = *UserRuntime;
if (UserUpperBound.hasValue())
UP.UpperBound = *UserUpperBound;
- if (UserAllowPeeling.hasValue())
- UP.AllowPeeling = *UserAllowPeeling;
- if (UserAllowProfileBasedPeeling.hasValue())
- UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
if (UserFullUnrollMaxCount.hasValue())
UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
return UP;
}
+TargetTransformInfo::PeelingPreferences
+llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI,
+ Optional<bool> UserAllowPeeling,
+ Optional<bool> UserAllowProfileBasedPeeling) {
+ TargetTransformInfo::PeelingPreferences PP;
+
+ // Default values
+ PP.PeelCount = 0;
+ PP.AllowPeeling = true;
+ PP.AllowLoopNestsPeeling = false;
+ PP.PeelProfiledIterations = true;
+
+ // Get Target Specifc Values
+ TTI.getPeelingPreferences(L, SE, PP);
+
+ // User Specified Values using cl::opt
+ if (UnrollPeelCount.getNumOccurrences() > 0)
+ PP.PeelCount = UnrollPeelCount;
+ if (UnrollAllowPeeling.getNumOccurrences() > 0)
+ PP.AllowPeeling = UnrollAllowPeeling;
+ if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
+ PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
+
+ // User Specifed values provided by argument
+ if (UserAllowPeeling.hasValue())
+ PP.AllowPeeling = *UserAllowPeeling;
+ if (UserAllowProfileBasedPeeling.hasValue())
+ PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
+
+ return PP;
+}
+
namespace {
/// A struct to densely store the state of an instruction after unrolling at
@@ -761,7 +778,8 @@ bool llvm::computeUnrollCount(
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize,
- TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
+ TargetTransformInfo::UnrollingPreferences &UP,
+ TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
@@ -863,8 +881,8 @@ bool llvm::computeUnrollCount(
}
// 4th priority is loop peeling.
- computePeelCount(L, LoopSize, UP, TripCount, SE);
- if (UP.PeelCount) {
+ computePeelCount(L, LoopSize, UP, PP, TripCount, SE);
+ if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
return ExplicitUnroll;
@@ -1067,8 +1085,9 @@ static LoopUnrollResult tryToUnrollLoop(
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
- ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
ProvidedFullUnrollMaxCount);
+ TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
+ L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling);
// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
// as threshold later on.
@@ -1142,7 +1161,7 @@ static LoopUnrollResult tryToUnrollLoop(
bool UseUpperBound = false;
bool IsCountSetExplicitly = computeUnrollCount(
L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
- TripMultiple, LoopSize, UP, UseUpperBound);
+ TripMultiple, LoopSize, UP, PP, UseUpperBound);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
// Unroll factor (Count) must be less or equal to TripCount.
@@ -1157,7 +1176,7 @@ static LoopUnrollResult tryToUnrollLoop(
LoopUnrollResult UnrollResult = UnrollLoop(
L,
{UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
- UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+ UseUpperBound, MaxOrZero, TripMultiple, PP.PeelCount, UP.UnrollRemainder,
ForgetAllSCEV},
LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
if (UnrollResult == LoopUnrollResult::Unmodified)
@@ -1189,7 +1208,7 @@ static LoopUnrollResult tryToUnrollLoop(
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
- (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
+ (IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount)))
L->setLoopAlreadyUnrolled();
return UnrollResult;
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 43dfaf3e50dc..c653aacbee6c 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -279,19 +279,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
+ TargetTransformInfo::PeelingPreferences &PP,
unsigned &TripCount, ScalarEvolution &SE) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
- // Save the UP.PeelCount value set by the target in
- // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
- unsigned TargetPeelCount = UP.PeelCount;
- UP.PeelCount = 0;
+ // Save the PP.PeelCount value set by the target in
+ // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
+ unsigned TargetPeelCount = PP.PeelCount;
+ PP.PeelCount = 0;
if (!canPeel(L))
return;
// Only try to peel innermost loops by default.
// The constraint can be relaxed by the target in TTI.getUnrollingPreferences
// or by the flag -unroll-allow-loop-nests-peeling.
- if (!UP.AllowLoopNestsPeeling && !L->empty())
+ if (!PP.AllowLoopNestsPeeling && !L->empty())
return;
// If the user provided a peel count, use that.
@@ -299,13 +300,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (UserPeelCount) {
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
<< " iterations.\n");
- UP.PeelCount = UnrollForcePeelCount;
- UP.PeelProfiledIterations = true;
+ PP.PeelCount = UnrollForcePeelCount;
+ PP.PeelProfiledIterations = true;
return;
}
// Skip peeling if it's disabled.
- if (!UP.AllowPeeling)
+ if (!PP.AllowPeeling)
return;
unsigned AlreadyPeeled = 0;
@@ -354,8 +355,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
<< " iteration(s) to turn"
<< " some Phis into invariants.\n");
- UP.PeelCount = DesiredPeelCount;
- UP.PeelProfiledIterations = false;
+ PP.PeelCount = DesiredPeelCount;
+ PP.PeelProfiledIterations = false;
return;
}
}
@@ -367,7 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
return;
// Do not apply profile base peeling if it is disabled.
- if (!UP.PeelProfiledIterations)
+ if (!PP.PeelProfiledIterations)
return;
// If we don't know the trip count, but have reason to believe the average
// trip count is low, peeling should be beneficial, since we will usually
@@ -387,7 +388,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
<< " iterations.\n");
- UP.PeelCount = *PeelCount;
+ PP.PeelCount = *PeelCount;
return;
}
LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
More information about the llvm-commits
mailing list