[llvm] r364281 - [ExpandMemCmp] Move all options to TargetTransformInfo.
Clement Courbet via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 25 01:04:13 PDT 2019
Author: courbet
Date: Tue Jun 25 01:04:13 2019
New Revision: 364281
URL: http://llvm.org/viewvc/llvm-project?rev=364281&view=rev
Log:
[ExpandMemCmp] Move all options to TargetTransformInfo.
Split off from D60318.
Modified:
llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp
llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h
llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Tue Jun 25 01:04:13 2019
@@ -630,17 +630,35 @@ public:
/// Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
- /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
- /// true if this is the expansion of memcmp(p1, p2, s) == 0.
+ /// Returns options for expansion of memcmp. IsZeroCmp is
+ // true if this is the expansion of memcmp(p1, p2, s) == 0.
struct MemCmpExpansionOptions {
+ // Return true if memcmp expansion is enabled.
+ operator bool() const { return MaxNumLoads > 0; }
+
+ // Maximum number of load operations.
+ unsigned MaxNumLoads = 0;
+
// The list of available load sizes (in bytes), sorted in decreasing order.
SmallVector<unsigned, 8> LoadSizes;
+
+ // For memcmp expansion when the memcmp result is only compared equal or
+ // not-equal to 0, allow up to this number of load pairs per block. As an
+ // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
+ // a0 = load2bytes &a[0]
+ // b0 = load2bytes &b[0]
+ // a2 = load1byte &a[2]
+ // b2 = load1byte &b[2]
+ // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
+ unsigned NumLoadsPerBlock = 1;
+
// Set to true to allow overlapping loads. For example, 7-byte compares can
// be done with two 4-byte compares instead of 4+2+1-byte compares. This
// requires all loads in LoadSizes to be doable in an unaligned way.
bool AllowOverlappingLoads = false;
};
- const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
+ MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
/// Enable matching of interleaved access groups.
bool enableInterleavedAccessVectorization() const;
@@ -1162,8 +1180,8 @@ public:
unsigned VF) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
- virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const = 0;
+ virtual MemCmpExpansionOptions
+ enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool enableMaskedInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@@ -1464,9 +1482,9 @@ public:
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
}
- const MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const override {
- return Impl.enableMemCmpExpansion(IsZeroCmp);
+ MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const override {
+ return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
}
bool enableInterleavedAccessVectorization() override {
return Impl.enableInterleavedAccessVectorization();
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Tue Jun 25 01:04:13 2019
@@ -296,9 +296,9 @@ public:
bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
- const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const {
- return nullptr;
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const {
+ return {};
}
bool enableInterleavedAccessVectorization() { return false; }
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Tue Jun 25 01:04:13 2019
@@ -1385,18 +1385,6 @@ public:
return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
}
- /// For memcmp expansion when the memcmp result is only compared equal or
- /// not-equal to 0, allow up to this number of load pairs per block. As an
- /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
- /// a0 = load2bytes &a[0]
- /// b0 = load2bytes &b[0]
- /// a2 = load1byte &a[2]
- /// b2 = load1byte &b[2]
- /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
- virtual unsigned getMemcmpEqZeroLoadsPerBlock() const {
- return 1;
- }
-
/// Get maximum # of store operations permitted for llvm.memmove
///
/// This function returns the maximum number of store operations permitted
Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Tue Jun 25 01:04:13 2019
@@ -374,9 +374,9 @@ bool TargetTransformInfo::enableAggressi
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
-const TargetTransformInfo::MemCmpExpansionOptions *
-TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const {
- return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
+TargetTransformInfo::MemCmpExpansionOptions
+TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
}
bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
Modified: llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp (original)
+++ llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp Tue Jun 25 01:04:13 2019
@@ -113,8 +113,7 @@ class MemCmpExpansion {
public:
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout);
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout);
unsigned getNumBlocks();
uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -203,16 +202,10 @@ MemCmpExpansion::computeOverlappingLoadS
MemCmpExpansion::MemCmpExpansion(
CallInst *const CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout)
- : CI(CI),
- Size(Size),
- MaxLoadSize(0),
- NumLoadsNonOneByte(0),
- NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp),
- IsUsedForZeroCmp(IsUsedForZeroCmp),
- DL(TheDataLayout),
- Builder(CI) {
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout)
+ : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
+ NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
+ IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) {
assert(Size > 0 && "zero blocks");
// Scale the max size down if the target can load more bytes than we need.
llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
@@ -223,17 +216,17 @@ MemCmpExpansion::MemCmpExpansion(
MaxLoadSize = LoadSizes.front();
// Compute the decomposition.
unsigned GreedyNumLoadsNonOneByte = 0;
- LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads,
+ LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
GreedyNumLoadsNonOneByte);
NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
- assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+ assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
// If we allow overlapping loads and the load sequence is not already optimal,
// use overlapping loads.
if (Options.AllowOverlappingLoads &&
(LoadSequence.empty() || LoadSequence.size() > 2)) {
unsigned OverlappingNumLoadsNonOneByte = 0;
auto OverlappingLoads = computeOverlappingLoadSequence(
- Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte);
+ Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
if (!OverlappingLoads.empty() &&
(LoadSequence.empty() ||
OverlappingLoads.size() < LoadSequence.size())) {
@@ -241,7 +234,7 @@ MemCmpExpansion::MemCmpExpansion(
NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
}
}
- assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+ assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
}
unsigned MemCmpExpansion::getNumBlocks() {
@@ -748,23 +741,21 @@ static bool expandMemCmp(CallInst *CI, c
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
- const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp);
+ auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(),
+ IsUsedForZeroCmp);
if (!Options) return false;
- const unsigned MaxNumLoads = CI->getFunction()->hasOptSize()
- ? (MaxLoadsPerMemcmpOptSize.getNumOccurrences()
- ? MaxLoadsPerMemcmpOptSize
- : TLI->getMaxExpandSizeMemcmp(true))
- : (MaxLoadsPerMemcmp.getNumOccurrences()
- ? MaxLoadsPerMemcmp
- : TLI->getMaxExpandSizeMemcmp(false));
-
- unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()
- ? MemCmpEqZeroNumLoadsPerBlock
- : TLI->getMemcmpEqZeroLoadsPerBlock();
+ if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
+ Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
- MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
- IsUsedForZeroCmp, NumLoadsPerBlock, *DL);
+ if (CI->getFunction()->hasOptSize() &&
+ MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
+
+ if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmp;
+
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
// Don't expand if this will require more loads than desired by the target.
if (Expansion.getNumLoads() == 0) {
Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp Tue Jun 25 01:04:13 2019
@@ -582,17 +582,12 @@ bool PPCTTIImpl::enableAggressiveInterle
return LoopHasReductions;
}
-const PPCTTIImpl::TTI::MemCmpExpansionOptions *
-PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
- static const auto Options = []() {
- TTI::MemCmpExpansionOptions Options;
- Options.LoadSizes.push_back(8);
- Options.LoadSizes.push_back(4);
- Options.LoadSizes.push_back(2);
- Options.LoadSizes.push_back(1);
- return Options;
- }();
- return &Options;
+PPCTTIImpl::TTI::MemCmpExpansionOptions
+PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+ Options.LoadSizes = {8, 4, 2, 1};
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ return Options;
}
bool PPCTTIImpl::enableInterleavedAccessVectorization() {
Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.h Tue Jun 25 01:04:13 2019
@@ -66,8 +66,8 @@ public:
/// @{
bool useColdCCForColdCall(Function &F);
bool enableAggressiveInterleaving(bool LoopHasReductions);
- const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const;
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Jun 25 01:04:13 2019
@@ -879,11 +879,6 @@ namespace llvm {
/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
MVT hasFastEqualityCompare(unsigned NumBits) const override;
- /// Allow multiple load pairs per block for smaller and faster code.
- unsigned getMemcmpEqZeroLoadsPerBlock() const override {
- return 2;
- }
-
/// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Tue Jun 25 01:04:13 2019
@@ -3291,38 +3291,29 @@ bool X86TTIImpl::areFunctionArgsABICompa
TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs();
}
-const X86TTIImpl::TTI::MemCmpExpansionOptions *
-X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
- // Only enable vector loads for equality comparison.
- // Right now the vector version is not as fast, see #33329.
- static const auto ThreeWayOptions = [this]() {
- TTI::MemCmpExpansionOptions Options;
- if (ST->is64Bit()) {
- Options.LoadSizes.push_back(8);
- }
- Options.LoadSizes.push_back(4);
- Options.LoadSizes.push_back(2);
- Options.LoadSizes.push_back(1);
- return Options;
- }();
- static const auto EqZeroOptions = [this]() {
- TTI::MemCmpExpansionOptions Options;
+X86TTIImpl::TTI::MemCmpExpansionOptions
+X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ Options.NumLoadsPerBlock = 2;
+ if (IsZeroCmp) {
+ // Only enable vector loads for equality comparison. Right now the vector
+ // version is not as fast for three way compare (see #33329).
// TODO: enable AVX512 when the DAG is ready.
// if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
if (ST->hasAVX2()) Options.LoadSizes.push_back(32);
if (ST->hasSSE2()) Options.LoadSizes.push_back(16);
- if (ST->is64Bit()) {
- Options.LoadSizes.push_back(8);
- }
- Options.LoadSizes.push_back(4);
- Options.LoadSizes.push_back(2);
- Options.LoadSizes.push_back(1);
// All GPR and vector loads can be unaligned. SIMD compare requires integer
// vectors (SSE2/AVX2).
Options.AllowOverlappingLoads = true;
- return Options;
- }();
- return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
+ }
+ if (ST->is64Bit()) {
+ Options.LoadSizes.push_back(8);
+ }
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
}
bool X86TTIImpl::enableInterleavedAccessVectorization() {
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h Tue Jun 25 01:04:13 2019
@@ -199,8 +199,8 @@ public:
bool areFunctionArgsABICompatible(const Function *Caller,
const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const;
- const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const;
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
private:
int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
Modified: llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp?rev=364281&r1=364280&r2=364281&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp Tue Jun 25 01:04:13 2019
@@ -866,7 +866,7 @@ static bool runImpl(Function &F, const T
// We only try merging comparisons if the target wants to expand memcmp later.
// The rationale is to avoid turning small chains into memcmp calls.
- if (!TTI.enableMemCmpExpansion(true))
+ if (!TTI.enableMemCmpExpansion(F.hasOptSize(), true))
return false;
// If we don't have memcmp avaiable we can't emit calls to it.
More information about the llvm-commits
mailing list