[llvm] r321756 - [ExpandMemcmp] rename variables and add hook to override pref for number of loads per block; NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 3 12:02:40 PST 2018
Author: spatel
Date: Wed Jan 3 12:02:39 2018
New Revision: 321756
URL: http://llvm.org/viewvc/llvm-project?rev=321756&view=rev
Log:
[ExpandMemcmp] rename variables and add hook to override pref for number of loads per block; NFC
The preference only applies to 'memcmp() == 0' expansion, so try to make that clearer.
x86 will likely benefit by increasing the default value from '1' to '2' as seen in PR33325:
https://bugs.llvm.org/show_bug.cgi?id=33325
...so that is the planned follow-up to this clean-up step.
Modified:
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=321756&r1=321755&r2=321756&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Wed Jan 3 12:02:39 2018
@@ -1202,6 +1202,18 @@ public:
return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
}
+ /// For memcmp expansion when the memcmp result is only compared equal or
+ /// not-equal to 0, allow up to this number of load pairs per block. As an
+ /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
+ /// a0 = load2bytes &a[0]
+ /// b0 = load2bytes &b[0]
+ /// a2 = load1byte &a[2]
+ /// b2 = load1byte &b[2]
+ /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
+ virtual unsigned getMemcmpEqZeroLoadsPerBlock() const {
+ return 1;
+ }
+
/// \brief Get maximum # of store operations permitted for llvm.memmove
///
/// This function returns the maximum number of store operations permitted
Modified: llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp?rev=321756&r1=321755&r2=321756&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp (original)
+++ llvm/trunk/lib/CodeGen/ExpandMemCmp.cpp Wed Jan 3 12:02:39 2018
@@ -32,7 +32,7 @@ STATISTIC(NumMemCmpGreaterThanMax,
"Number of memcmp calls with size greater than max size");
STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
-static cl::opt<unsigned> MemCmpNumLoadsPerBlock(
+static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(
"memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
cl::desc("The number of loads per basic block for inline expansion of "
"memcmp that is only being compared against zero."));
@@ -56,7 +56,7 @@ class MemCmpExpansion {
const uint64_t Size;
unsigned MaxLoadSize;
uint64_t NumLoadsNonOneByte;
- const uint64_t NumLoadsPerBlock;
+ const uint64_t NumLoadsPerBlockForZeroCmp;
std::vector<BasicBlock *> LoadCmpBlocks;
BasicBlock *EndBlock;
PHINode *PhiRes;
@@ -102,7 +102,7 @@ class MemCmpExpansion {
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- unsigned NumLoadsPerBlock, const DataLayout &DL);
+ unsigned NumLoadsPerBlockForZeroCmp, const DataLayout &DL);
unsigned getNumBlocks();
uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -122,12 +122,12 @@ MemCmpExpansion::MemCmpExpansion(
CallInst *const CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout)
+ const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout)
: CI(CI),
Size(Size),
MaxLoadSize(0),
NumLoadsNonOneByte(0),
- NumLoadsPerBlock(NumLoadsPerBlock),
+ NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp),
IsUsedForZeroCmp(IsUsedForZeroCmp),
DL(TheDataLayout),
Builder(CI) {
@@ -171,8 +171,8 @@ MemCmpExpansion::MemCmpExpansion(
unsigned MemCmpExpansion::getNumBlocks() {
if (IsUsedForZeroCmp)
- return getNumLoads() / NumLoadsPerBlock +
- (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0);
+ return getNumLoads() / NumLoadsPerBlockForZeroCmp +
+ (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
return getNumLoads();
}
@@ -249,7 +249,7 @@ Value *MemCmpExpansion::getCompareLoadPa
Value *Diff;
const unsigned NumLoads =
- std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock);
+ std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
// For a single-block expansion, start inserting before the memcmp call.
if (LoadCmpBlocks.empty())
@@ -519,8 +519,6 @@ Value *MemCmpExpansion::getMemCmpEqZeroO
/// A memcmp expansion that only has one block of load and compare can bypass
/// the compare, branch, and phi IR that is required in the general case.
Value *MemCmpExpansion::getMemCmpOneBlock() {
- assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
-
Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
Value *Source1 = CI->getArgOperand(0);
Value *Source2 = CI->getArgOperand(1);
@@ -570,7 +568,8 @@ Value *MemCmpExpansion::getMemCmpExpansi
// not need to set up any extra blocks. This case could be handled in the DAG,
// but since we have all of the machinery to flexibly expand any memcpy here,
// we choose to handle this case too to avoid fragmented lowering.
- if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) {
+ if ((!IsUsedForZeroCmp && NumLoadsPerBlockForZeroCmp != 1) ||
+ getNumBlocks() != 1) {
BasicBlock *StartBlock = CI->getParent();
EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
setupEndBlockPHINodes();
@@ -596,8 +595,8 @@ Value *MemCmpExpansion::getMemCmpExpansi
return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
: getMemCmpExpansionZeroCase();
- // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
- if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock();
+ if (getNumBlocks() == 1)
+ return getMemCmpOneBlock();
for (unsigned I = 0; I < getNumBlocks(); ++I) {
emitLoadCompareBlock(I);
@@ -709,8 +708,12 @@ static bool expandMemCmp(CallInst *CI, c
const unsigned MaxNumLoads =
TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
+ unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()
+ ? MemCmpEqZeroNumLoadsPerBlock
+ : TLI->getMemcmpEqZeroLoadsPerBlock();
+
MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
- IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL);
+ IsUsedForZeroCmp, NumLoadsPerBlock, *DL);
// Don't expand if this will require more loads than desired by the target.
if (Expansion.getNumLoads() == 0) {
More information about the llvm-commits
mailing list