[llvm] [Transforms] Recognize memcmp-like loops in LoopIdiomRecognize (PR #181562)
Sayan Sivakumaran via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 23 20:03:45 PDT 2026
https://github.com/sivakusayan updated https://github.com/llvm/llvm-project/pull/181562
>From af457567bac58c250deda15d220e691f05e9366a Mon Sep 17 00:00:00 2001
From: Sayan Sivakumaran <sivakusayan at gmail.com>
Date: Tue, 20 Jan 2026 17:37:02 -0600
Subject: [PATCH] Prototype for recognizing memcmp idiom
---
.../Transforms/Scalar/LoopIdiomRecognize.h | 3 +
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 251 +++++++++++-
llvm/test/Transforms/LoopIdiom/memcmp.ll | 378 ++++++++++++++++++
3 files changed, 620 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/Transforms/LoopIdiom/memcmp.ll
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index 109b4520878cb..ddd190bb68c2f 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -43,6 +43,9 @@ struct DisableLIRP {
/// When true, HashRecognize is disabled.
static bool HashRecognize;
+
+ /// When true, Memcmp is disabled.
+ static bool Memcmp;
};
/// Performs Loop Idiom Recognize Pass.
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 105757140c758..c62ade5cb0bc2 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -20,8 +20,6 @@
//
// TODO List:
//
-// Future loop memory idioms to recognize: memcmp, etc.
-//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
@@ -40,6 +38,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/HashRecognize.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -53,6 +52,7 @@
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -100,6 +100,7 @@ using namespace SCEVPatternMatch;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
+STATISTIC(NumMemCmp, "Number of uncountable loops recognized as memcmp idiom");
STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
STATISTIC(
NumShiftUntilBitTest,
@@ -131,6 +132,14 @@ static cl::opt<bool, true>
cl::location(DisableLIRP::Memcpy), cl::init(false),
cl::ReallyHidden);
+bool DisableLIRP::Memcmp;
+static cl::opt<bool, true>
+ DisableLIRPMemcmp("disable-loop-idiom-memcmp",
+ cl::desc("Proceed with loop idiom recognize pass, but do "
+ "not convert loop(s) to memcmp."),
+ cl::location(DisableLIRP::Memcmp), cl::init(false),
+ cl::ReallyHidden);
+
bool DisableLIRP::Strlen;
static cl::opt<bool, true>
DisableLIRPStrlen("disable-loop-idiom-strlen",
@@ -182,17 +191,17 @@ class LoopIdiomRecognize {
const TargetTransformInfo *TTI;
const DataLayout *DL;
OptimizationRemarkEmitter &ORE;
+ AssumptionCache *AC;
bool ApplyCodeSizeHeuristics;
std::unique_ptr<MemorySSAUpdater> MSSAU;
public:
- explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
- LoopInfo *LI, ScalarEvolution *SE,
- TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, MemorySSA *MSSA,
- const DataLayout *DL,
- OptimizationRemarkEmitter &ORE)
- : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
+ explicit LoopIdiomRecognize(
+ AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
+ TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, MemorySSA *MSSA,
+ const DataLayout *DL, OptimizationRemarkEmitter &ORE, AssumptionCache *AC)
+ : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE),
+ AC(AC) {
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
@@ -286,6 +295,7 @@ class LoopIdiomRecognize {
bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
bool recognizeAndInsertStrLen();
+ bool recognizeAndInsertMemcmp();
/// @}
};
@@ -305,7 +315,7 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
- AR.MSSA, DL, ORE);
+ AR.MSSA, DL, ORE, &AR.AC);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -336,7 +346,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
// Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
if (Name == "memset" || Name == "memcpy" || Name == "strlen" ||
- Name == "wcslen")
+ Name == "wcslen" || Name == "memcmp")
return false;
// Determine if code size heuristics need to be applied.
@@ -1698,7 +1708,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
return recognizePopcount() || recognizeAndInsertFFS() ||
recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
- recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
+ recognizeShiftUntilLessThan() || recognizeAndInsertStrLen() ||
+ recognizeAndInsertMemcmp();
}
/// Check if the given conditional branch is based on the comparison between
@@ -3577,3 +3588,219 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
++NumShiftUntilZero;
return MadeChange;
}
+
+namespace {
+class MemcmpVerifier {
+public:
+ explicit MemcmpVerifier(Loop *CurLoop, ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, TargetLibraryInfo *TLI,
+ const DataLayout *DL)
+ : CurLoop(CurLoop), SE(SE), DT(DT), AC(AC), TLI(TLI), DL(DL) {}
+
+ bool isLoadMemcmpOperandCandidate(LoadInst *LI, const SCEVUnknown *&Base,
+ const APInt *&Step) {
+ if (!LI->isSimple())
+ return false;
+
+ Value *LoadPointer = LI->getPointerOperand();
+ if (LoadPointer->getType()->getPointerAddressSpace() != 0)
+ return false;
+
+ // Comparisons of floats can't be transformed. For example, the bits of
+ // two NaN values might be equivalent, but NaN is never equal to itself.
+ // This means `memcmp` would be a behavior change from float equality.
+ IntegerType *LoadType = dyn_cast<IntegerType>(LI->getType());
+ if (!LoadType)
+ return false;
+
+ // There should be no padding between consecutive members of the integer
+ // array, as `memcmp` could give a different answer from integer equality.
+ if (DL->getTypeAllocSizeInBits(LoadType) != DL->getTypeSizeInBits(LoadType))
+ return false;
+
+ // If the Load SCEV has a non-constant step, it is clearly not part of a
+ // `memcmp` idiom. Furthermore, the step must be equal to the size of
+ // the load type, as otherwise the for loop would skip equality checks
+ // for certain bytes.
+ const SCEVUse LoadSCEV = SE->getSCEVAtScope(LoadPointer, CurLoop);
+ const SCEVUnknown *LoadBase;
+ const APInt *LoadStep;
+ if (!match(LoadSCEV, m_scev_AffineAddRec(m_SCEVUnknown(LoadBase), m_scev_APInt(LoadStep)))) {
+ return false;
+ }
+
+ // TODO: Possibly handle negative strides.
+ if (LoadType->getBitWidth() != *LoadStep * CHAR_BIT || LoadStep->isNegative())
+ return false;
+
+ // Strangely the IR seems to allow this. Guess we'll just give up in this
+ // degenerate case.
+ unsigned SizeTBits = TLI->getSizeTSize(*CurLoop->getHeader()->getModule());
+ if (LoadStep->getBitWidth() > SizeTBits)
+ return false;
+
+ // The load must be dereferenceable no matter how many times the loop
+ // executes. Otherwise, creating a memcmp for this loop is undefined
+ // behavior.
+ if (!llvm::isDereferenceableAndAlignedInLoop(LI, CurLoop, *SE, *DT, AC))
+ return false;
+
+ Base = LoadBase;
+ Step = LoadStep;
+ return true;
+ }
+
+ /// We are trying to detect the following memcmp-like structure:
+ ///
+ /// preheader:
+ /// ...
+ /// br label %body
+ ///
+ /// body:
+ /// ... ; Both loads have equal SCEV steps and satisfy certain properties
+ /// %lhs = load i32, ptr %lhs_ptr
+ /// %rhs = load i32, ptr %rhs_ptr
+ /// %equal = icmp eq i32 %lhs, %rhs
+ /// br i1 %equal, label %cond, label %exit
+ ///
+ /// cond:
+ /// ... ; Compute whether loop should stop using some induction variable
+ /// br i1 %stop_loop, label %exit, label %body
+ ///
+ /// exit:
+ /// %buffers_equal = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ///
+ /// More specifically, we expect the pairs of loads to have a SCEV expression
+ /// of the form {%buffer,+,c}, where c is a ConstantInt that is equal to the
+ /// bitwidth of the load type. Furthermore, we must be able to prove that
+ /// there are no padding bytes in the buffers being read from, and that the
+ /// loads are always dereferenceable up to the maximum number of times the
+ /// loop backedge is taken.
+ bool detectMemcmpIdiom(PHINode *&PhiToReplace,
+ Value *&Lhs, Value *&Rhs, const SCEV *&Len) {
+ // Step 1: Make sure we have single header, latch and exit block.
+ // Furthermore, if the body or condition block have side effects,
+ // this is clearly not equivalent to memcmp.
+ BasicBlock *BodyBlock = CurLoop->getHeader();
+ BasicBlock *CondBlock = CurLoop->getLoopLatch();
+ BasicBlock *ExitBlock = CurLoop->getUniqueExitBlock();
+ if (!BodyBlock || !CondBlock || !ExitBlock)
+ return false;
+
+ for (const Instruction &I : *BodyBlock)
+ if (I.mayHaveSideEffects())
+ return false;
+
+ for (const Instruction &I : *CondBlock)
+ if (I.mayHaveSideEffects())
+ return false;
+
+ // Step 3: The pattern we are looking for has precisely one LCSSAPhi.
+ // Furthermore, that LCSSAPhi is constant.
+ if (std::distance(ExitBlock->phis().begin(), ExitBlock->phis().end()) != 1)
+ return false;
+
+ PHINode &Phi = *ExitBlock->phis().begin();
+ Value *Val = Phi.hasConstantValue();
+ if (!Val)
+ return false;
+
+ // Step 3: Verify that the value being used in the LCSSAPhi is actually
+ // a compare operation, and that the branch operation looks correct.
+ using namespace PatternMatch;
+ CmpInst *CI = dyn_cast<CmpInst>(Val);
+ const CondBrInst *BodyTerminator = dyn_cast<CondBrInst>(BodyBlock->getTerminator());
+ if (!match(BodyTerminator,
+ m_Br(m_Specific(CI), m_SpecificBB(CondBlock), m_SpecificBB(ExitBlock)))) {
+ return false;
+ }
+
+ // Step 5: Verify that the compare operation is comparing two
+ // pointers satisfying certain criteria. See
+ // `isLoadMemcmpOperandCandidate()` for more information.
+ LoadInst *LoadLHS = dyn_cast<LoadInst>(CI->getOperand(0));
+ LoadInst *LoadRHS = dyn_cast<LoadInst>(CI->getOperand(1));
+ if (!LoadLHS || !LoadRHS) {
+ return false;
+ }
+
+ const SCEVUnknown *BaseLHS;
+ const SCEVUnknown *BaseRHS;
+ const APInt *StepLHS;
+ const APInt *StepRHS;
+ if (!isLoadMemcmpOperandCandidate(LoadLHS, BaseLHS, StepLHS) ||
+ !isLoadMemcmpOperandCandidate(LoadRHS, BaseRHS, StepRHS))
+ return false;
+ if (*StepLHS != *StepRHS)
+ return false;
+
+ const SCEV *MaxBackedgeTaken =
+ llvm::dyn_cast<SCEVConstant>(SE->getBackedgeTakenCount(
+ CurLoop, llvm::ScalarEvolution::ConstantMaximum));
+ if (llvm::isa<SCEVCouldNotCompute>(MaxBackedgeTaken))
+ return false;
+
+ // Step 6: Celebrate! This transform is safe to be done, so return
+ // the information needed for the caller to emit an equivalent
+ // memcmp.
+ PhiToReplace = Φ
+ Lhs = BaseLHS->getValue();
+ Rhs = BaseRHS->getValue();
+ Type *IntIdxTy = DL->getIndexType(Lhs->getType());
+ Len = getNumBytes(MaxBackedgeTaken, IntIdxTy, SE->getConstant(*StepLHS), CurLoop, DL, SE);
+ return true;
+ }
+
+private:
+ Loop *CurLoop;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ AssumptionCache *AC;
+ const TargetLibraryInfo *TLI;
+ const DataLayout *DL;
+};
+} // namespace
+
+// TODO: Explain what's happening here a bit more.
+bool LoopIdiomRecognize::recognizeAndInsertMemcmp() {
+ if (!TLI->has(LibFunc_memcmp) || DisableLIRPMemcmp)
+ return false;
+
+ PHINode *PhiToReplace;
+ Value *Lhs;
+ Value *Rhs;
+ const SCEV *Len;
+ MemcmpVerifier Verifier(CurLoop, SE, DT, AC, TLI, DL);
+ if (!Verifier.detectMemcmpIdiom(PhiToReplace, Lhs, Rhs, Len))
+ return false;
+
+ BasicBlock *PhiBlock = PhiToReplace->getParent();
+ IRBuilder<> Builder(PhiBlock);
+ SCEVExpander Expander(*SE, "loop-idiom");
+ SCEVExpanderCleaner ExpCleaner(Expander);
+
+ if (!Expander.isSafeToExpand(Len))
+ return false;
+
+ Value* LenArg = Expander.expandCodeFor(Len, Len->getType(), PhiBlock->getTerminator());
+ Builder.SetInsertPoint(PhiBlock->getTerminator());
+ Value *MemCmpCall = llvm::emitMemCmp(Lhs, Rhs, LenArg, Builder, *DL, TLI);
+ Value *NewCmpInst = Builder.CreateCmp(
+ CmpInst::ICMP_EQ, MemCmpCall,
+ llvm::ConstantInt::get(
+ Builder.getContext(),
+ llvm::APInt(MemCmpCall->getType()->getPrimitiveSizeInBits(), 0)));
+ PhiToReplace->replaceAllUsesWith(NewCmpInst);
+ RecursivelyDeleteDeadPHINode(PhiToReplace);
+
+ ++NumMemCmp;
+ LLVM_DEBUG(dbgs() << "Formed memcmp idiom:" << *MemCmpCall << "\n");
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertMemcmp",
+ CurLoop->getStartLoc(), PhiBlock)
+ << "Transformed memcmp loop idiom";
+ });
+ ExpCleaner.markResultUsed();
+ // We'll let the loop-deletion pass handle deleting the now dead loop.
+ return true;
+}
diff --git a/llvm/test/Transforms/LoopIdiom/memcmp.ll b/llvm/test/Transforms/LoopIdiom/memcmp.ll
new file mode 100644
index 0000000000000..17d69f7736221
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/memcmp.ll
@@ -0,0 +1,378 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
+
+%memcmp_idiom_arr = type { [50 x i32] }
+define i1 @memcmp_idiom(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @memcmp_idiom(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[COND:.*]]:
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT: [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT: br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT: [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT: [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT: br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[LHS]], ptr [[RHS]], i64 200)
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT: ret i1 [[TMP0]]
+;
+entry:
+ br label %body
+cond:
+ %inc = add nuw nsw i64 %indvar, 1
+ %stop_loop = icmp eq i64 %inc, 50
+ br i1 %stop_loop, label %exit, label %body
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+ %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+ %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+ %lhs_val = load i32, ptr %lhs_addr, align 4
+ %rhs_val = load i32, ptr %rhs_addr, align 4
+ %equal = icmp eq i32 %lhs_val, %rhs_val
+ br i1 %equal, label %cond, label %exit
+exit:
+ %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ret i1 %ret
+}
+
+%too_short_arr = type { [49 x i32] }
+define i1 @no_memcmp_idiom_array_too_short(ptr byval(%too_short_arr) align 8 %lhs, ptr byval(%too_short_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_array_too_short(
+; CHECK-SAME: ptr byval([[TOO_SHORT_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[TOO_SHORT_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[COND:.*]]:
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT: [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT: br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT: [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT: [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT: br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RET:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+entry:
+ br label %body
+cond:
+ %inc = add nuw nsw i64 %indvar, 1
+ %stop_loop = icmp eq i64 %inc, 50
+ br i1 %stop_loop, label %exit, label %body
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+ %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+ %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+ %lhs_val = load i32, ptr %lhs_addr, align 4
+ %rhs_val = load i32, ptr %rhs_addr, align 4
+ %equal = icmp eq i32 %lhs_val, %rhs_val
+ br i1 %equal, label %cond, label %exit
+exit:
+ %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ret i1 %ret
+}
+
+%has_padding_arr = type { [50 x i36] }
+define i1 @no_memcmp_idiom_array_has_padding(ptr byval(%has_padding_arr) align 8 %lhs, ptr byval(%has_padding_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_array_has_padding(
+; CHECK-SAME: ptr byval([[HAS_PADDING_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[HAS_PADDING_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[COND:.*]]:
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT: [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT: br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT: [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[LHS_VAL:%.*]] = load i36, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT: [[RHS_VAL:%.*]] = load i36, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i36 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT: br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RET:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+entry:
+ br label %body
+cond:
+ %inc = add nuw nsw i64 %indvar, 1
+ %stop_loop = icmp eq i64 %inc, 50
+ br i1 %stop_loop, label %exit, label %body
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+ %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+ %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+ %lhs_val = load i36, ptr %lhs_addr, align 4
+ %rhs_val = load i36, ptr %rhs_addr, align 4
+ %equal = icmp eq i36 %lhs_val, %rhs_val
+ br i1 %equal, label %cond, label %exit
+exit:
+ %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ret i1 %ret
+}
+
+%mismatched_stride_arr = type { [100 x i32] }
+define i1 @no_memcmp_idiom_mismatched_stride(ptr byval(%mismatched_stride_arr) align 8 %lhs, ptr byval(%mismatched_stride_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_mismatched_stride(
+; CHECK-SAME: ptr byval([[MISMATCHED_STRIDE_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MISMATCHED_STRIDE_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[COND:.*]]:
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT: [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT: br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT: [[DOUBLEINDVAR:%.*]] = mul i64 [[INDVAR]], 2
+; CHECK-NEXT: [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[DOUBLEINDVAR]]
+; CHECK-NEXT: [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT: [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT: br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT: ret i1 [[TMP0]]
+;
+entry:
+ br label %body
+cond:
+ %inc = add nuw nsw i64 %indvar, 1
+ %stop_loop = icmp eq i64 %inc, 50
+ br i1 %stop_loop, label %exit, label %body
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+ %doubleindvar = mul i64 %indvar, 2
+ %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %doubleindvar
+ %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+ %lhs_val = load i32, ptr %lhs_addr, align 4
+ %rhs_val = load i32, ptr %rhs_addr, align 4
+ %equal = icmp eq i32 %lhs_val, %rhs_val
+ br i1 %equal, label %cond, label %exit
+exit:
+ %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ret i1 %ret
+}
+
+define i1 @no_memcmp_idiom_volatile_loads(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_volatile_loads(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[COND:.*]]:
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT: [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT: br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT: [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[LHS_VAL:%.*]] = load volatile i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT: [[RHS_VAL:%.*]] = load volatile i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT: br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT: ret i1 [[TMP0]]
+;
+entry:
+ br label %body
+cond:
+ %inc = add nuw nsw i64 %indvar, 1
+ %stop_loop = icmp eq i64 %inc, 50
+ br i1 %stop_loop, label %exit, label %body
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+ %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+ %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+ %lhs_val = load volatile i32, ptr %lhs_addr, align 4
+ %rhs_val = load volatile i32, ptr %rhs_addr, align 4
+ %equal = icmp eq i32 %lhs_val, %rhs_val
+ br i1 %equal, label %cond, label %exit
+exit:
+ %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ret i1 %ret
+}
+
+%float_arr = type { [50 x float] }
+define i1 @no_memcmp_idiom_float_comparisons(ptr byval(%float_arr) align 8 %lhs, ptr byval(%float_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_float_comparisons(
+; CHECK-SAME: ptr byval([[FLOAT_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[FLOAT_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[COND:.*]]:
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT: [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT: br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT: [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[LHS_VAL:%.*]] = load float, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT: [[RHS_VAL:%.*]] = load float, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT: [[EQUAL:%.*]] = fcmp oeq float [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT: br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT: ret i1 [[TMP0]]
+;
+entry:
+ br label %body
+cond:
+ %inc = add nuw nsw i64 %indvar, 1
+ %stop_loop = icmp eq i64 %inc, 50
+ br i1 %stop_loop, label %exit, label %body
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+ %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+ %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+ %lhs_val = load float, ptr %lhs_addr, align 4
+ %rhs_val = load float, ptr %rhs_addr, align 4
+ %equal = fcmp oeq float %lhs_val, %rhs_val
+ br i1 %equal, label %cond, label %exit
+exit:
+ %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ret i1 %ret
+}
+
+define i1 @no_memcmp_idiom_cond_block_side_effect(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_cond_block_side_effect(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[COND:.*]]:
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT: [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT: [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT: br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT: [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT: [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT: br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[LHS]], ptr [[RHS]], i64 200)
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT: ret i1 [[RET]]
+;
+entry:
+ br label %body
+cond:
+ %inc = add nuw nsw i64 %indvar, 1
+ %ptr = alloca i8
+ %stop_loop = icmp eq i64 %inc, 50
+ br i1 %stop_loop, label %exit, label %body
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+ %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+ %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+ %lhs_val = load i32, ptr %lhs_addr, align 4
+ %rhs_val = load i32, ptr %rhs_addr, align 4
+ %equal = icmp eq i32 %lhs_val, %rhs_val
+ br i1 %equal, label %cond, label %exit
+exit:
+ %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ret i1 %ret
+}
+
+define i1 @no_memcmp_idiom_body_block_side_effect(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_body_block_side_effect(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[COND:.*]]:
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT: [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT: br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT: [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT: [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT: [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT: br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[LHS]], ptr [[RHS]], i64 200)
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT: ret i1 [[RET]]
+;
+entry:
+ br label %body
+cond:
+ %inc = add nuw nsw i64 %indvar, 1
+ %stop_loop = icmp eq i64 %inc, 50
+ br i1 %stop_loop, label %exit, label %body
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+ %ptr = alloca i8
+ %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+ %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+ %lhs_val = load i32, ptr %lhs_addr, align 4
+ %rhs_val = load i32, ptr %rhs_addr, align 4
+ %equal = icmp eq i32 %lhs_val, %rhs_val
+ br i1 %equal, label %cond, label %exit
+exit:
+ %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ret i1 %ret
+}
+
+define i1 @no_memcmp_idiom_non_constant_loop_bound(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs, i64 %max) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_non_constant_loop_bound(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]], i64 [[MAX:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[COND:.*]]:
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT: [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], [[MAX]]
+; CHECK-NEXT: br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT: [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT: [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT: [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT: [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT: br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RET:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+entry:
+ br label %body
+cond:
+ %inc = add nuw nsw i64 %indvar, 1
+ %stop_loop = icmp eq i64 %inc, %max
+ br i1 %stop_loop, label %exit, label %body
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+ %ptr = alloca i8
+ %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+ %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+ %lhs_val = load i32, ptr %lhs_addr, align 4
+ %rhs_val = load i32, ptr %rhs_addr, align 4
+ %equal = icmp eq i32 %lhs_val, %rhs_val
+ br i1 %equal, label %cond, label %exit
+exit:
+ %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+ ret i1 %ret
+}
More information about the llvm-commits
mailing list